From a864de9297206b02dc64399e46ec9598f55ffda2 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Thu, 7 Dec 2023 17:05:56 +0000
Subject: [PATCH 001/117] bump version to 0.12.0

---
 README.rst       | 2 +-
 lsbi/_version.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index f8fe103..8be0d84 100644
--- a/README.rst
+++ b/README.rst
@@ -3,7 +3,7 @@ lsbi: Linear Simulation Based Inference
 =======================================
 :lsbi: Linear Simulation Based Inference
 :Author: Will Handley & David Yallup
-:Version: 0.11.2
+:Version: 0.12.0
 :Homepage: https://github.com/handley-lab/lsbi
 :Documentation: http://lsbi.readthedocs.io/
 
diff --git a/lsbi/_version.py b/lsbi/_version.py
index e2bd072..ea370a8 100644
--- a/lsbi/_version.py
+++ b/lsbi/_version.py
@@ -1 +1 @@
-__version__ = "0.11.2"
+__version__ = "0.12.0"

From a6cafd4e4ba0b97c0e1eee627eabff1228ef6ff7 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Thu, 7 Dec 2023 15:33:28 +0000
Subject: [PATCH 002/117] rvs and logpdf working

---
 lsbi/stats_1.py       | 393 ++++++++++++++++++++++++++++++++++++++++++
 tests/test_stats_1.py |  36 ++++
 2 files changed, 429 insertions(+)
 create mode 100644 lsbi/stats_1.py
 create mode 100644 tests/test_stats_1.py

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
new file mode 100644
index 0000000..c7a6e8c
--- /dev/null
+++ b/lsbi/stats_1.py
@@ -0,0 +1,393 @@
+"""Extensions to scipy.stats functions."""
+import numpy as np
+import scipy.stats
+from numpy.linalg import inv
+from scipy.special import erf, logsumexp
+from scipy.stats._multivariate import multivariate_normal_frozen
+
+from lsbi.utils import bisect, logdet
+
+
+class multivariate_normal(object):
+    """Vectorised multivariate normal distribution.
+
+    This extends scipy.stats.multivariate_normal to allow for vectorisation across
+    the distribution parameters. mean can be an array of shape (..., dim) and cov
+    can be an array of shape (..., dim, dim) where ... represent arbitrary broadcastable
+    shapes.
+
+    Implemented with the same style as scipy.stats.multivariate_normal, except that
+    results are not squeezed.
+
+    Parameters
+    ----------
+    mean : array_like, shape (..., dim)
+        Mean of each component.
+
+    cov array_like, shape (..., dim, dim)
+        Covariance matrix of each component.
+
+    shape: tuple, optional, default=()
+        Shape of the distribution. Useful for forcing a broadcast beyond that
+        inferred by mean and cov shapes
+
+    """
+
+    def __init__(self, mean, cov):
+        self.mean = mean
+        self.cov = cov
+        self.shape = ()
+        assert self.cov.shape[-2:] == (self.dim, self.dim)
+
+    @property
+    def shape(self):
+        """Shape of the distribution."""
+        return np.broadcast_shapes(
+            self.mean.shape[:-1], self.cov.shape[:-2], self._shape
+        )
+
+    @shape.setter
+    def shape(self, shape):
+        self._shape = shape
+
+    @property
+    def dim(self):
+        """Dimension of the distribution."""
+        return self.mean.shape[-1]
+
+    def logpdf(self, x):
+        """Log of the probability density function."""
+        dx = x.reshape(-1, 1, self.dim) - self.mean.reshape(-1, self.dim)
+        invcov = np.linalg.inv(self.cov).reshape(-1, self.dim, self.dim)
+        chi2 = np.einsum("xaj,ajk,xak->xa", dx, invcov, dx)
+        norm = -logdet(2 * np.pi * self.cov) / 2
+        logpdf = norm - chi2.reshape((*x.shape[:-1], *self.shape)) / 2
+        return logpdf
+
+    def rvs(self, size=1):
+        """Random variates."""
+        size = np.atleast_1d(np.array(size, dtype=int))
+        x = np.random.randn(np.prod(size), np.prod(self.shape, dtype=int), self.dim)
+        cholesky = np.linalg.cholesky(self.cov).reshape(-1, self.dim, self.dim)
+        t = self.mean.reshape(-1, self.dim) + np.einsum("ajk,xak->xaj", cholesky, x)
+        return t.reshape(*size, *self.shape, self.dim)
+
+    def marginalise(self, indices):
+        """Marginalise over indices.
+
+        Parameters
+        ----------
+        indices : array_like
+            Indices to marginalise.
+
+        Returns
+        -------
+        marginalised distribution: multimultivariate_normal
+        """
+        i = self._bar(indices)
+        means = self.means[:, i]
+        covs = self.covs[:, i][:, :, i]
+        return multimultivariate_normal(means, covs)
+
+    def condition(self, indices, values):
+        """Condition on indices with values.
+
+        Parameters
+        ----------
+        indices : array_like
+            Indices to condition over.
+        values : array_like
+            Values to condition on.
+
+        Returns
+        -------
+        conditional distribution: multimultivariate_normal
+        """
+        i = self._bar(indices)
+        k = indices
+        values = values.reshape(self.means[:, k].shape)
+        means = self.means[:, i] + np.einsum(
+            "ija,iab,ib->ij",
+            self.covs[:, i][:, :, k],
+            inv(self.covs[:, k][:, :, k]),
+            (values - self.means[:, k]),
+        )
+        covs = self.covs[:, i][:, :, i] - np.einsum(
+            "ija,iab,ibk->ijk",
+            self.covs[:, i][:, :, k],
+            inv(self.covs[:, k][:, :, k]),
+            self.covs[:, k][:, :, i],
+        )
+        return multimultivariate_normal(means, covs)
+
+    def _bar(self, indices):
+        """Return the indices not in the given indices."""
+        k = np.ones(self.means.shape[-1], dtype=bool)
+        k[indices] = False
+        return k
+
+    def bijector(self, x, inverse=False):
+        """Bijector between U([0, 1])^d and the distribution.
+
+        - x in [0, 1]^d is the hypercube space.
+        - theta in R^d is the physical space.
+
+        Computes the transformation from x to theta or theta to x depending on
+        the value of inverse.
+
+        Parameters
+        ----------
+        x : array_like, shape (..., d)
+            if inverse: x is theta
+            else: x is x
+        inverse : bool, optional, default=False
+            If True: compute the inverse transformation from physical to
+            hypercube space.
+
+        Returns
+        -------
+        transformed x or theta: array_like, shape (..., d)
+        """
+        Ls = np.linalg.cholesky(self.covs)
+        if inverse:
+            Linvs = inv(Ls)
+            y = np.einsum("ijk,...ik->...ij", Linvs, x - self.means)
+            return scipy.stats.norm.cdf(y)
+        else:
+            y = scipy.stats.norm.ppf(x)
+            return self.means + np.einsum("ijk,...ik->...ij", Ls, y)
+
+    def predict(self, A, b=None):
+        """Predict the mean and covariance of a linear transformation.
+
+        if:         x ~ N(mu, Sigma)
+        then:  Ax + b ~ N(A mu + b, A Sigma A^T)
+
+        Parameters
+        ----------
+        A : array_like, shape (k, q, n)
+            Linear transformation matrix.
+        b : array_like, shape (k, q), optional
+            Linear transformation vector.
+
+        Returns
+        -------
+        predicted distribution: mixture_multivariate_normal
+        """
+        if b is None:
+            b = np.zeros(A.shape[:-1])
+        means = np.einsum("kqn,kn->kq", A, self.means) + b
+        covs = np.einsum("kpn,knm,kqm->kpq", A, self.covs, A)
+        return multimultivariate_normal(means, covs)
+
+
+class mixture_multivariate_normal(object):
+    """Mixture of multivariate normal distributions.
+
+    Implemented with the same style as scipy.stats.multivariate_normal
+
+    Parameters
+    ----------
+    means : array_like, shape (n_components, n_features)
+        Mean of each component.
+
+    covs: array_like, shape (n_components, n_features, n_features)
+        Covariance matrix of each component.
+
+    logA: array_like, shape (n_components,)
+        Log of the mixing weights.
+    """
+
+    def __init__(self, means, covs, logA):
+        self.means = np.array([np.atleast_1d(m) for m in means])
+        self.covs = np.array([np.atleast_2d(c) for c in covs])
+        self.logA = np.atleast_1d(logA)
+
+    def logpdf(self, x, reduce=True, keepdims=False):
+        """Log of the probability density function."""
+        x = self._process_quantiles(x, self.means.shape[-1])
+        dx = self.means - x[..., None, :]
+        invcovs = np.linalg.inv(self.covs)
+        chi2 = np.einsum("...ij,ijk,...ik->...i", dx, invcovs, dx)
+        norm = -logdet(2 * np.pi * self.covs) / 2
+        logpdf = norm - chi2 / 2
+        if reduce:
+            logA = self.logA - scipy.special.logsumexp(self.logA)
+            logpdf = np.squeeze(scipy.special.logsumexp(logpdf + logA, axis=-1))
+        if not keepdims:
+            logpdf = np.squeeze(logpdf)
+        return logpdf
+
+    def rvs(self, size=1):
+        """Random variates."""
+        size = np.atleast_1d(size)
+        p = np.exp(self.logA - self.logA.max())
+        p /= p.sum()
+        i = np.random.choice(len(p), size, p=p)
+        x = np.random.randn(*size, self.means.shape[-1])
+        choleskys = np.linalg.cholesky(self.covs)
+        return np.squeeze(self.means[i, ..., None] + choleskys[i] @ x[..., None])
+
+    def marginalise(self, indices):
+        """Marginalise over indices.
+
+        Parameters
+        ----------
+        indices : array_like
+            Indices to marginalise.
+
+        Returns
+        -------
+        marginalised distribution: mixture_multivariate_normal
+        """
+        i = self._bar(indices)
+        means = self.means[:, i]
+        covs = self.covs[:, i][:, :, i]
+        logA = self.logA
+        return mixture_multivariate_normal(means, covs, logA)
+
+    def condition(self, indices, values):
+        """Condition on indices with values.
+
+        Parameters
+        ----------
+        indices : array_like
+            Indices to condition over.
+        values : array_like
+            Values to condition on.
+
+        Returns
+        -------
+        conditional distribution: mixture_multivariate_normal
+        """
+        i = self._bar(indices)
+        k = indices
+        marginal = self.marginalise(i)
+
+        means = self.means[:, i] + np.einsum(
+            "ija,iab,ib->ij",
+            self.covs[:, i][:, :, k],
+            inv(self.covs[:, k][:, :, k]),
+            (values - self.means[:, k]),
+        )
+        covs = self.covs[:, i][:, :, i] - np.einsum(
+            "ija,iab,ibk->ijk",
+            self.covs[:, i][:, :, k],
+            inv(self.covs[:, k][:, :, k]),
+            self.covs[:, k][:, :, i],
+        )
+        logA = (
+            marginal.logpdf(values, reduce=False) + self.logA - marginal.logpdf(values)
+        )
+        return mixture_multivariate_normal(means, covs, logA)
+
+    def _bar(self, indices):
+        """Return the indices not in the given indices."""
+        k = np.ones(self.means.shape[-1], dtype=bool)
+        k[indices] = False
+        return k
+
+    def bijector(self, x, inverse=False):
+        """Bijector between U([0, 1])^d and the distribution.
+
+        - x in [0, 1]^d is the hypercube space.
+        - theta in R^d is the physical space.
+
+        Computes the transformation from x to theta or theta to x depending on
+        the value of inverse.
+
+        Parameters
+        ----------
+        x : array_like, shape (..., d)
+            if inverse: x is theta
+            else: x is x
+        inverse : bool, optional, default=False
+            If True: compute the inverse transformation from physical to
+            hypercube space.
+
+        Returns
+        -------
+        transformed x or theta: array_like, shape (..., d)
+        """
+        theta = np.empty_like(x)
+        if inverse:
+            theta[:] = x
+            x = np.empty_like(x)
+
+        for i in range(x.shape[-1]):
+            m = self.means[..., :, i] + np.einsum(
+                "ia,iab,...ib->...i",
+                self.covs[:, i, :i],
+                inv(self.covs[:, :i, :i]),
+                theta[..., None, :i] - self.means[:, :i],
+            )
+            c = self.covs[:, i, i] - np.einsum(
+                "ia,iab,ib->i",
+                self.covs[:, i, :i],
+                inv(self.covs[:, :i, :i]),
+                self.covs[:, i, :i],
+            )
+            dist = mixture_multivariate_normal(
+                self.means[:, :i], self.covs[:, :i, :i], self.logA
+            )
+            logA = (
+                self.logA
+                + dist.logpdf(theta[..., :i], reduce=False, keepdims=True)
+                - dist.logpdf(theta[..., :i], keepdims=True)[..., None]
+            )
+            A = np.exp(logA - logsumexp(logA, axis=-1)[..., None])
+
+            def f(t):
+                return (A * 0.5 * (1 + erf((t[..., None] - m) / np.sqrt(2 * c)))).sum(
+                    axis=-1
+                ) - y
+
+            if inverse:
+                y = 0
+                x[..., i] = f(theta[..., i])
+            else:
+                y = x[..., i]
+                a = (m - 10 * np.sqrt(c)).min(axis=-1)
+                b = (m + 10 * np.sqrt(c)).max(axis=-1)
+                theta[..., i] = bisect(f, a, b)
+        if inverse:
+            return x
+        else:
+            return theta
+
+    def _process_quantiles(self, x, dim):
+        x = np.asarray(x, dtype=float)
+
+        if x.ndim == 0:
+            x = x[np.newaxis, np.newaxis]
+        elif x.ndim == 1:
+            if dim == 1:
+                x = x[:, np.newaxis]
+            else:
+                x = x[np.newaxis, :]
+
+        return x
+
+    def predict(self, A, b=None):
+        """Predict the mean and covariance of a linear transformation.
+
+        if:         x ~ mixN(mu, Sigma, logA)
+        then:  Ax + b ~ mixN(A mu + b, A Sigma A^T, logA)
+
+        Parameters
+        ----------
+        A : array_like, shape (k, q, n)
+            Linear transformation matrix.
+        b : array_like, shape (k, q,), optional
+            Linear transformation vector.
+
+        Returns
+        -------
+        predicted distribution: mixture_multivariate_normal
+        """
+        if b is None:
+            b = np.zeros(A.shape[:-1])
+        means = np.einsum("kqn,kn->kq", A, self.means) + b
+        covs = np.einsum("kqn,knm,kpm->kqp", A, self.covs, A)
+        logA = self.logA
+        return mixture_multivariate_normal(means, covs, logA)
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
new file mode 100644
index 0000000..d50f26a
--- /dev/null
+++ b/tests/test_stats_1.py
@@ -0,0 +1,36 @@
+import numpy as np
+import pytest
+import scipy.special
+from numpy.testing import assert_allclose
+from scipy.stats import kstest
+
+from lsbi.stats_1 import multivariate_normal
+
+
+@pytest.mark.parametrize("shape", [(2, 3, 4), (2, 3), (2,), ()])
+@pytest.mark.parametrize("dim", [1, 5])
+class TestMultivariateNormal(object):
+    cls = multivariate_normal
+
+    def random(self, dim, shape):
+        mean = np.random.randn(*shape, dim)
+        cov = np.random.randn(*shape, dim, dim)
+        cov = np.einsum("...ij,...kj->...ik", cov, cov) + dim * np.eye(dim)
+        return self.cls(mean, cov)
+
+    def test_properties(self, dim, shape):
+        dist = self.random(dim, shape)
+        assert dist.dim == dim
+        assert dist.shape == shape
+
+    @pytest.mark.parametrize("xshape", [(8, 7, 6), (8, 7), (8,), ()])
+    def test_logpdf(self, dim, shape, xshape):
+        dist = self.random(dim, shape)
+        x = np.random.randn(*xshape, dim)
+        assert dist.logpdf(x).shape == xshape + shape
+
+    @pytest.mark.parametrize("size", [(8, 7, 6), (8, 7), (8,), 8])
+    def test_rvs(self, dim, shape, size):
+        dist = self.random(dim, shape)
+        x = dist.rvs(size)
+        assert x.shape == tuple(np.atleast_1d(size)) + shape + (dim,)

From eda44826b0de90ea961ce03d7de8a1ec0796f79d Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Thu, 7 Dec 2023 17:05:22 +0000
Subject: [PATCH 003/117] Now broadcasting for cov and mean

---
 lsbi/stats_1.py       | 22 ++++++++++++++++------
 tests/test_stats_1.py | 39 ++++++++++++++++++++++-----------------
 2 files changed, 38 insertions(+), 23 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index c7a6e8c..b8e7fdb 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -33,10 +33,10 @@ class multivariate_normal(object):
 
     """
 
-    def __init__(self, mean, cov):
+    def __init__(self, mean, cov, shape=()):
         self.mean = mean
         self.cov = cov
-        self.shape = ()
+        self.shape = shape
         assert self.cov.shape[-2:] == (self.dim, self.dim)
 
     @property
@@ -55,10 +55,20 @@ def dim(self):
         """Dimension of the distribution."""
         return self.mean.shape[-1]
 
+    def _flatten_1d(self, x):
+        """Flatten the distribution parameters."""
+        return np.broadcast_to(x, (*self.shape, self.dim)).reshape(-1, self.dim)
+
+    def _flatten_2d(self, x):
+        """Flatten the distribution parameters."""
+        return np.broadcast_to(x, (*self.shape, self.dim, self.dim)).reshape(
+            -1, self.dim, self.dim
+        )
+
     def logpdf(self, x):
         """Log of the probability density function."""
-        dx = x.reshape(-1, 1, self.dim) - self.mean.reshape(-1, self.dim)
-        invcov = np.linalg.inv(self.cov).reshape(-1, self.dim, self.dim)
+        dx = x.reshape(-1, 1, self.dim) - self._flatten_1d(self.mean)
+        invcov = self._flatten_2d(np.linalg.inv(self.cov))
         chi2 = np.einsum("xaj,ajk,xak->xa", dx, invcov, dx)
         norm = -logdet(2 * np.pi * self.cov) / 2
         logpdf = norm - chi2.reshape((*x.shape[:-1], *self.shape)) / 2
@@ -68,8 +78,8 @@ def rvs(self, size=1):
         """Random variates."""
         size = np.atleast_1d(np.array(size, dtype=int))
         x = np.random.randn(np.prod(size), np.prod(self.shape, dtype=int), self.dim)
-        cholesky = np.linalg.cholesky(self.cov).reshape(-1, self.dim, self.dim)
-        t = self.mean.reshape(-1, self.dim) + np.einsum("ajk,xak->xaj", cholesky, x)
+        cholesky = self._flatten_2d(np.linalg.cholesky(self.cov))
+        t = self._flatten_1d(self.mean) + np.einsum("ajk,xak->xaj", cholesky, x)
         return t.reshape(*size, *self.shape, self.dim)
 
     def marginalise(self, indices):
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index d50f26a..6182b86 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -6,31 +6,36 @@
 
 from lsbi.stats_1 import multivariate_normal
 
+shapes = [(2, 3, 4), (3, 4), (4,), ()]
 
-@pytest.mark.parametrize("shape", [(2, 3, 4), (2, 3), (2,), ()])
+
+@pytest.mark.parametrize("mean_shape", shapes)
+@pytest.mark.parametrize("cov_shape", shapes)
+@pytest.mark.parametrize("shape", shapes)
 @pytest.mark.parametrize("dim", [1, 5])
 class TestMultivariateNormal(object):
     cls = multivariate_normal
 
-    def random(self, dim, shape):
-        mean = np.random.randn(*shape, dim)
-        cov = np.random.randn(*shape, dim, dim)
+    def random(self, dim, shape, mean_shape, cov_shape):
+        mean = np.random.randn(*mean_shape, dim)
+        cov = np.random.randn(*cov_shape, dim, dim)
         cov = np.einsum("...ij,...kj->...ik", cov, cov) + dim * np.eye(dim)
-        return self.cls(mean, cov)
+        return self.cls(mean, cov, shape)
 
-    def test_properties(self, dim, shape):
-        dist = self.random(dim, shape)
+    def test_properties(self, dim, shape, mean_shape, cov_shape):
+        dist = self.random(dim, shape, mean_shape, cov_shape)
         assert dist.dim == dim
-        assert dist.shape == shape
+        assert dist.shape == np.broadcast_shapes(shape, mean_shape, cov_shape)
 
-    @pytest.mark.parametrize("xshape", [(8, 7, 6), (8, 7), (8,), ()])
-    def test_logpdf(self, dim, shape, xshape):
-        dist = self.random(dim, shape)
-        x = np.random.randn(*xshape, dim)
-        assert dist.logpdf(x).shape == xshape + shape
+    @pytest.mark.parametrize("x_shape", [(8, 7, 6), (8, 7), (8,), ()])
+    def test_logpdf(self, dim, shape, mean_shape, cov_shape, x_shape):
+        dist = self.random(dim, shape, mean_shape, cov_shape)
+        x = np.random.randn(*x_shape, dim)
+        logpdf = dist.logpdf(x)
+        assert logpdf.shape == x_shape + dist.shape
 
-    @pytest.mark.parametrize("size", [(8, 7, 6), (8, 7), (8,), 8])
-    def test_rvs(self, dim, shape, size):
-        dist = self.random(dim, shape)
+    @pytest.mark.parametrize("size", [(8, 7, 6), (8, 7), (8,)])
+    def test_rvs(self, dim, shape, mean_shape, cov_shape, size):
+        dist = self.random(dim, shape, mean_shape, cov_shape)
         x = dist.rvs(size)
-        assert x.shape == tuple(np.atleast_1d(size)) + shape + (dim,)
+        assert x.shape == tuple(np.atleast_1d(size)) + dist.shape + (dim,)

From c204e64de5c86aa1bc555e276ea8d2a528b4d834 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Thu, 7 Dec 2023 23:05:04 +0000
Subject: [PATCH 004/117] predict, condition and marginalise now working

---
 lsbi/stats_1.py       | 74 ++++++++++++++++++++++++-------------------
 tests/test_stats_1.py | 57 +++++++++++++++++++++++++++++----
 2 files changed, 92 insertions(+), 39 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index b8e7fdb..ead7bc9 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -48,27 +48,23 @@ def shape(self):
 
     @shape.setter
     def shape(self, shape):
-        self._shape = shape
+        self._shape = np.broadcast_shapes(
+            self.mean.shape[:-1], self.cov.shape[:-2], shape
+        )
 
     @property
     def dim(self):
         """Dimension of the distribution."""
         return self.mean.shape[-1]
 
-    def _flatten_1d(self, x):
+    def _flatten(self, x, *args):
         """Flatten the distribution parameters."""
-        return np.broadcast_to(x, (*self.shape, self.dim)).reshape(-1, self.dim)
-
-    def _flatten_2d(self, x):
-        """Flatten the distribution parameters."""
-        return np.broadcast_to(x, (*self.shape, self.dim, self.dim)).reshape(
-            -1, self.dim, self.dim
-        )
+        return np.broadcast_to(x, (*self.shape, *args)).reshape(-1, *args)
 
     def logpdf(self, x):
         """Log of the probability density function."""
-        dx = x.reshape(-1, 1, self.dim) - self._flatten_1d(self.mean)
-        invcov = self._flatten_2d(np.linalg.inv(self.cov))
+        dx = x.reshape(-1, 1, self.dim) - self._flatten(self.mean, self.dim)
+        invcov = self._flatten(np.linalg.inv(self.cov), self.dim, self.dim)
         chi2 = np.einsum("xaj,ajk,xak->xa", dx, invcov, dx)
         norm = -logdet(2 * np.pi * self.cov) / 2
         logpdf = norm - chi2.reshape((*x.shape[:-1], *self.shape)) / 2
@@ -78,8 +74,8 @@ def rvs(self, size=1):
         """Random variates."""
         size = np.atleast_1d(np.array(size, dtype=int))
         x = np.random.randn(np.prod(size), np.prod(self.shape, dtype=int), self.dim)
-        cholesky = self._flatten_2d(np.linalg.cholesky(self.cov))
-        t = self._flatten_1d(self.mean) + np.einsum("ajk,xak->xaj", cholesky, x)
+        cholesky = self._flatten(np.linalg.cholesky(self.cov), self.dim, self.dim)
+        t = self._flatten(self.mean, self.dim) + np.einsum("ajk,xak->xaj", cholesky, x)
         return t.reshape(*size, *self.shape, self.dim)
 
     def marginalise(self, indices):
@@ -95,9 +91,9 @@ def marginalise(self, indices):
         marginalised distribution: multimultivariate_normal
         """
         i = self._bar(indices)
-        means = self.means[:, i]
-        covs = self.covs[:, i][:, :, i]
-        return multimultivariate_normal(means, covs)
+        mean = self.mean[..., i]
+        cov = self.cov[..., i, :][..., i]
+        return multivariate_normal(mean, cov, self.shape)
 
     def condition(self, indices, values):
         """Condition on indices with values.
@@ -115,24 +111,30 @@ def condition(self, indices, values):
         """
         i = self._bar(indices)
         k = indices
-        values = values.reshape(self.means[:, k].shape)
-        means = self.means[:, i] + np.einsum(
+        kdim = len(k)
+        idim = self.dim - kdim
+        old_shape = self.shape
+        self.shape = np.broadcast_shapes(values.shape[:-1], self.shape)
+        mean = self._flatten(self.mean[..., i], idim) + np.einsum(
             "ija,iab,ib->ij",
-            self.covs[:, i][:, :, k],
-            inv(self.covs[:, k][:, :, k]),
-            (values - self.means[:, k]),
+            self._flatten(self.cov[..., i, :][..., :, k], idim, kdim),
+            self._flatten(inv(self.cov[..., k, :][..., :, k]), kdim, kdim),
+            self._flatten(values, kdim) - self._flatten(self.mean[..., k], kdim),
         )
-        covs = self.covs[:, i][:, :, i] - np.einsum(
+        cov = self._flatten(self.cov[..., i, :][..., :, i], idim, idim) - np.einsum(
             "ija,iab,ibk->ijk",
-            self.covs[:, i][:, :, k],
-            inv(self.covs[:, k][:, :, k]),
-            self.covs[:, k][:, :, i],
+            self._flatten(self.cov[..., i, :][..., :, k], idim, kdim),
+            self._flatten(inv(self.cov[..., k, :][..., :, k]), kdim, kdim),
+            self._flatten(self.cov[..., k, :][..., :, i], kdim, idim),
         )
-        return multimultivariate_normal(means, covs)
+        mean = mean.reshape(*self.shape, idim)
+        cov = cov.reshape(*self.shape, idim, idim)
+        self.shape = old_shape
+        return multivariate_normal(mean, cov, self.shape)
 
     def _bar(self, indices):
         """Return the indices not in the given indices."""
-        k = np.ones(self.means.shape[-1], dtype=bool)
+        k = np.ones(self.dim, dtype=bool)
         k[indices] = False
         return k
 
@@ -175,20 +177,28 @@ def predict(self, A, b=None):
 
         Parameters
         ----------
-        A : array_like, shape (k, q, n)
+        A : array_like, shape (..., k, n)
             Linear transformation matrix.
-        b : array_like, shape (k, q), optional
+        b : array_like, shape (..., k), optional
             Linear transformation vector.
 
         Returns
         -------
         predicted distribution: mixture_multivariate_normal
+        shape (..., k)
         """
+        k = A.shape[-2]
         if b is None:
             b = np.zeros(A.shape[:-1])
-        means = np.einsum("kqn,kn->kq", A, self.means) + b
-        covs = np.einsum("kpn,knm,kqm->kpq", A, self.covs, A)
-        return multimultivariate_normal(means, covs)
+        A = self._flatten(A, k, self.dim)
+        b = self._flatten(b, k)
+        mean = np.einsum("kqn,kn->kq", A, self._flatten(self.mean, self.dim)) + b
+        cov = np.einsum(
+            "kqn,knm,kpm->kqp", A, self._flatten(self.cov, self.dim, self.dim), A
+        )
+        mean = mean.reshape(*self.shape, k)
+        cov = cov.reshape(*self.shape, k, k)
+        return multivariate_normal(mean, cov)
 
 
 class mixture_multivariate_normal(object):
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index 6182b86..9ee482d 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -7,12 +7,14 @@
 from lsbi.stats_1 import multivariate_normal
 
 shapes = [(2, 3, 4), (3, 4), (4,), ()]
+sizes = [(8, 7, 6), (7, 6), (6,), ()]
+dims = [1, 2, 5]
 
 
 @pytest.mark.parametrize("mean_shape", shapes)
 @pytest.mark.parametrize("cov_shape", shapes)
 @pytest.mark.parametrize("shape", shapes)
-@pytest.mark.parametrize("dim", [1, 5])
+@pytest.mark.parametrize("dim", dims)
 class TestMultivariateNormal(object):
     cls = multivariate_normal
 
@@ -27,15 +29,56 @@ def test_properties(self, dim, shape, mean_shape, cov_shape):
         assert dist.dim == dim
         assert dist.shape == np.broadcast_shapes(shape, mean_shape, cov_shape)
 
-    @pytest.mark.parametrize("x_shape", [(8, 7, 6), (8, 7), (8,), ()])
-    def test_logpdf(self, dim, shape, mean_shape, cov_shape, x_shape):
+    @pytest.mark.parametrize("size", sizes)
+    def test_logpdf(self, dim, shape, mean_shape, cov_shape, size):
         dist = self.random(dim, shape, mean_shape, cov_shape)
-        x = np.random.randn(*x_shape, dim)
+        x = np.random.randn(*size, dim)
         logpdf = dist.logpdf(x)
-        assert logpdf.shape == x_shape + dist.shape
+        assert logpdf.shape == size + dist.shape
 
-    @pytest.mark.parametrize("size", [(8, 7, 6), (8, 7), (8,)])
+    @pytest.mark.parametrize("size", sizes)
     def test_rvs(self, dim, shape, mean_shape, cov_shape, size):
         dist = self.random(dim, shape, mean_shape, cov_shape)
         x = dist.rvs(size)
-        assert x.shape == tuple(np.atleast_1d(size)) + dist.shape + (dim,)
+        assert x.shape == size + dist.shape + (dim,)
+
+    @pytest.mark.parametrize("k", dims)
+    def test_predict(self, dim, shape, mean_shape, cov_shape, k):
+        dist = self.random(dim, shape, mean_shape, cov_shape)
+        A = np.random.randn(*dist.shape, k, dim)
+        b = np.random.randn(*dist.shape, k)
+
+        d = dist.predict(A, b)
+        assert d.shape == dist.shape
+        assert d.dim == k
+
+        d = dist.predict(A)
+        assert d.shape == dist.shape
+        assert d.dim == k
+
+    @pytest.mark.parametrize("p", dims)
+    def test_marginalise(self, dim, shape, mean_shape, cov_shape, p):
+        if dim <= p:
+            pytest.skip("dim <= p")
+        i = np.random.choice(dim, p, replace=False)
+        dist = self.random(dim, shape, mean_shape, cov_shape)
+        dist_2 = dist.marginalise(i)
+
+        assert isinstance(dist_2, self.cls)
+        assert dist_2.shape == dist.shape
+        assert dist_2.dim == dim - p
+
+    @pytest.mark.parametrize("values_shape", shapes)
+    @pytest.mark.parametrize("p", dims)
+    def test_condition(self, dim, shape, mean_shape, cov_shape, p, values_shape):
+        if dim <= p:
+            pytest.skip("dim <= p")
+        indices = np.random.choice(dim, p, replace=False)
+        values = np.random.randn(*values_shape, p)
+        dist = self.random(dim, shape, mean_shape, cov_shape)
+        dist_2 = dist.condition(indices, values)
+
+        assert isinstance(dist_2, self.cls)
+        assert dist.shape == np.broadcast_shapes(shape, mean_shape, cov_shape)
+        assert dist_2.shape == np.broadcast_shapes(dist.shape, values_shape)
+        assert dist_2.dim == dim - p

From 280c79c1bad410d790a5828aba6e12d2bfd800ba Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Thu, 7 Dec 2023 23:42:17 +0000
Subject: [PATCH 005/117] bijector now tested

---
 lsbi/stats_1.py       | 27 +++++++++++++++++++++------
 tests/test_stats_1.py | 14 ++++++++++++++
 2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index ead7bc9..a5573d7 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -74,8 +74,8 @@ def rvs(self, size=1):
         """Random variates."""
         size = np.atleast_1d(np.array(size, dtype=int))
         x = np.random.randn(np.prod(size), np.prod(self.shape, dtype=int), self.dim)
-        cholesky = self._flatten(np.linalg.cholesky(self.cov), self.dim, self.dim)
-        t = self._flatten(self.mean, self.dim) + np.einsum("ajk,xak->xaj", cholesky, x)
+        L = self._flatten(np.linalg.cholesky(self.cov), self.dim, self.dim)
+        t = self._flatten(self.mean, self.dim) + np.einsum("ajk,xak->xaj", L, x)
         return t.reshape(*size, *self.shape, self.dim)
 
     def marginalise(self, indices):
@@ -160,14 +160,29 @@ def bijector(self, x, inverse=False):
         -------
         transformed x or theta: array_like, shape (..., d)
         """
-        Ls = np.linalg.cholesky(self.covs)
+        L = np.linalg.cholesky(self.cov)
+        old_shape = self.shape
+        self.shape = np.broadcast_shapes(x.shape[:-1], self.shape)
         if inverse:
-            Linvs = inv(Ls)
-            y = np.einsum("ijk,...ik->...ij", Linvs, x - self.means)
+            invL = inv(L)
+            y = np.einsum(
+                "ajk,ak->aj",
+                self._flatten(invL, self.dim, self.dim),
+                self._flatten(x, self.dim) - self._flatten(self.mean, self.dim),
+            )
+            y = y.reshape(*self.shape, self.dim)
+            self.shape = old_shape
             return scipy.stats.norm.cdf(y)
         else:
             y = scipy.stats.norm.ppf(x)
-            return self.means + np.einsum("ijk,...ik->...ij", Ls, y)
+            z = self._flatten(self.mean, self.dim) + np.einsum(
+                "ajk,ak->aj",
+                self._flatten(L, self.dim, self.dim),
+                self._flatten(y, self.dim),
+            )
+            z = z.reshape(*self.shape, self.dim)
+            self.shape = old_shape
+            return z
 
     def predict(self, A, b=None):
         """Predict the mean and covariance of a linear transformation.
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index 9ee482d..da31f12 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -82,3 +82,17 @@ def test_condition(self, dim, shape, mean_shape, cov_shape, p, values_shape):
         assert dist.shape == np.broadcast_shapes(shape, mean_shape, cov_shape)
         assert dist_2.shape == np.broadcast_shapes(dist.shape, values_shape)
         assert dist_2.dim == dim - p
+
+    @pytest.mark.parametrize("x_shape", shapes)
+    def test_bijector(self, dim, shape, mean_shape, cov_shape, x_shape):
+        dist = self.random(dim, shape, mean_shape, cov_shape)
+        x = np.random.rand(*x_shape, dim)
+        y = dist.bijector(x)
+        assert dist.shape == np.broadcast_shapes(shape, mean_shape, cov_shape)
+        assert y.shape == np.broadcast_shapes(dist.shape + (dim,), x.shape)
+
+        y = np.random.rand(*x_shape, dim)
+        x = dist.bijector(y, inverse=True)
+
+        assert dist.shape == np.broadcast_shapes(shape, mean_shape, cov_shape)
+        assert x.shape == np.broadcast_shapes(dist.shape + (dim,), x.shape)

From d57bbfb31cf5cef7052326090d122d8b78d52a4f Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Fri, 8 Dec 2023 16:41:29 +0000
Subject: [PATCH 006/117] Halfway through mixture upgrade

---
 lsbi/stats_1.py       | 61 +++++++++++++++++++++++--------------------
 tests/test_stats_1.py | 61 ++++++++++++++++++++++++++++++++++---------
 2 files changed, 82 insertions(+), 40 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index a5573d7..8927634 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -34,8 +34,8 @@ class multivariate_normal(object):
     """
 
     def __init__(self, mean, cov, shape=()):
-        self.mean = mean
-        self.cov = cov
+        self.mean = np.atleast_1d(mean)
+        self.cov = np.atleast_2d(cov)
         self.shape = shape
         assert self.cov.shape[-2:] == (self.dim, self.dim)
 
@@ -48,9 +48,8 @@ def shape(self):
 
     @shape.setter
     def shape(self, shape):
-        self._shape = np.broadcast_shapes(
-            self.mean.shape[:-1], self.cov.shape[:-2], shape
-        )
+        self._shape = shape
+        self._shape = self.shape
 
     @property
     def dim(self):
@@ -203,8 +202,10 @@ def predict(self, A, b=None):
         shape (..., k)
         """
         k = A.shape[-2]
+        old_shape = self.shape
         if b is None:
             b = np.zeros(A.shape[:-1])
+        self.shape = np.broadcast_shapes(self.shape, A.shape[:-2], b.shape[:-1])
         A = self._flatten(A, k, self.dim)
         b = self._flatten(b, k)
         mean = np.einsum("kqn,kn->kq", A, self._flatten(self.mean, self.dim)) + b
@@ -213,45 +214,49 @@ def predict(self, A, b=None):
         )
         mean = mean.reshape(*self.shape, k)
         cov = cov.reshape(*self.shape, k, k)
-        return multivariate_normal(mean, cov)
+        ans = multivariate_normal(mean, cov, self.shape)
+        self.shape = old_shape
+        return ans
 
 
-class mixture_multivariate_normal(object):
+class mixture_normal(multivariate_normal):
     """Mixture of multivariate normal distributions.
 
-    Implemented with the same style as scipy.stats.multivariate_normal
+    Broadcastable multivariate mixture model.
 
     Parameters
     ----------
-    means : array_like, shape (n_components, n_features)
+    mean : array_like, shape (..., n, dim)
         Mean of each component.
 
-    covs: array_like, shape (n_components, n_features, n_features)
+    cov: array_like, shape (..., n, dim, dim)
         Covariance matrix of each component.
 
-    logA: array_like, shape (n_components,)
+    logA: array_like, shape (..., n,)
         Log of the mixing weights.
     """
 
-    def __init__(self, means, covs, logA):
-        self.means = np.array([np.atleast_1d(m) for m in means])
-        self.covs = np.array([np.atleast_2d(c) for c in covs])
-        self.logA = np.atleast_1d(logA)
+    def __init__(self, logA, mean, cov, shape=()):
+        self.logA = np.array(logA)
+        super().__init__(mean, cov, shape)
+
+    @property
+    def shape(self):
+        """Shape of the distribution."""
+        return np.broadcast_shapes(
+            self.logA.shape, self.mean.shape[:-1], self.cov.shape[:-2], self._shape
+        )
+
+    @shape.setter
+    def shape(self, shape):
+        self._shape = shape
+        self._shape = self.shape
 
-    def logpdf(self, x, reduce=True, keepdims=False):
+    def logpdf(self, x):
         """Log of the probability density function."""
-        x = self._process_quantiles(x, self.means.shape[-1])
-        dx = self.means - x[..., None, :]
-        invcovs = np.linalg.inv(self.covs)
-        chi2 = np.einsum("...ij,ijk,...ik->...i", dx, invcovs, dx)
-        norm = -logdet(2 * np.pi * self.covs) / 2
-        logpdf = norm - chi2 / 2
-        if reduce:
-            logA = self.logA - scipy.special.logsumexp(self.logA)
-            logpdf = np.squeeze(scipy.special.logsumexp(logpdf + logA, axis=-1))
-        if not keepdims:
-            logpdf = np.squeeze(logpdf)
-        return logpdf
+        logpdf = super().logpdf(x)
+        logA = self.logA - scipy.special.logsumexp(self.logA)
+        return scipy.special.logsumexp(logpdf + logA, axis=-1)
 
     def rvs(self, size=1):
         """Random variates."""
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index da31f12..3ed89e2 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -4,17 +4,17 @@
 from numpy.testing import assert_allclose
 from scipy.stats import kstest
 
-from lsbi.stats_1 import multivariate_normal
+from lsbi.stats_1 import mixture_normal, multivariate_normal
 
 shapes = [(2, 3, 4), (3, 4), (4,), ()]
 sizes = [(8, 7, 6), (7, 6), (6,), ()]
 dims = [1, 2, 5]
 
 
+@pytest.mark.parametrize("dim", dims)
+@pytest.mark.parametrize("shape", shapes)
 @pytest.mark.parametrize("mean_shape", shapes)
 @pytest.mark.parametrize("cov_shape", shapes)
-@pytest.mark.parametrize("shape", shapes)
-@pytest.mark.parametrize("dim", dims)
 class TestMultivariateNormal(object):
     cls = multivariate_normal
 
@@ -42,19 +42,26 @@ def test_rvs(self, dim, shape, mean_shape, cov_shape, size):
         x = dist.rvs(size)
         assert x.shape == size + dist.shape + (dim,)
 
+    @pytest.mark.parametrize("A_shape", shapes)
+    @pytest.mark.parametrize("b_shape", shapes)
     @pytest.mark.parametrize("k", dims)
-    def test_predict(self, dim, shape, mean_shape, cov_shape, k):
+    def test_predict(self, dim, shape, mean_shape, cov_shape, k, A_shape, b_shape):
         dist = self.random(dim, shape, mean_shape, cov_shape)
-        A = np.random.randn(*dist.shape, k, dim)
-        b = np.random.randn(*dist.shape, k)
+        A = np.random.randn(*A_shape, k, dim)
+        b = np.random.randn(*b_shape, k)
 
-        d = dist.predict(A, b)
-        assert d.shape == dist.shape
-        assert d.dim == k
+        dist_2 = dist.predict(A, b)
+        assert isinstance(dist_2, self.cls)
+        assert dist_2.shape == np.broadcast_shapes(
+            dist.shape, A.shape[:-2], b.shape[:-1]
+        )
+        # assert dist_2.cov.shape[:-2] == np.broadcast_shapes(cov.shape[:-2], A.shape[:-2], b.shape[:-1], (k, k))
+        assert dist_2.dim == k
 
-        d = dist.predict(A)
-        assert d.shape == dist.shape
-        assert d.dim == k
+        dist_2 = dist.predict(A)
+        assert isinstance(dist_2, self.cls)
+        assert dist_2.shape == np.broadcast_shapes(dist.shape, A.shape[:-2])
+        assert dist_2.dim == k
 
     @pytest.mark.parametrize("p", dims)
     def test_marginalise(self, dim, shape, mean_shape, cov_shape, p):
@@ -96,3 +103,33 @@ def test_bijector(self, dim, shape, mean_shape, cov_shape, x_shape):
 
         assert dist.shape == np.broadcast_shapes(shape, mean_shape, cov_shape)
         assert x.shape == np.broadcast_shapes(dist.shape + (dim,), x.shape)
+
+
+@pytest.mark.parametrize("shape", shapes)
+@pytest.mark.parametrize("logA_shape", shapes)
+@pytest.mark.parametrize("mean_shape", shapes)
+@pytest.mark.parametrize("cov_shape", shapes)
+@pytest.mark.parametrize("dim", dims)
+class TestMixtureNormal(object):
+    cls = mixture_normal
+
+    def random(self, dim, shape, logA_shape, mean_shape, cov_shape):
+        logA = np.random.randn(*logA_shape)
+        mean = np.random.randn(*mean_shape, dim)
+        cov = np.random.randn(*cov_shape, dim, dim)
+        cov = np.einsum("...ij,...kj->...ik", cov, cov) + dim * np.eye(dim)
+        return self.cls(logA, mean, cov, shape)
+
+    def test_properties(self, dim, shape, logA_shape, mean_shape, cov_shape):
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape)
+        assert dist.dim == dim
+        assert dist.shape == np.broadcast_shapes(
+            shape, mean_shape, cov_shape, logA_shape
+        )
+
+    @pytest.mark.parametrize("size", sizes)
+    def test_logpdf(self, dim, shape, logA_shape, mean_shape, cov_shape, size):
+        dist = self.random(dim, logA_shape, shape, mean_shape, cov_shape)
+        x = np.random.randn(*size, dim)
+        logpdf = dist.logpdf(x)
+        assert logpdf.shape == size + dist.shape[:-1]

From 5cf4a5b6ea66c322a45b52b1ad84a38fbc063d1d Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Fri, 8 Dec 2023 16:43:47 +0000
Subject: [PATCH 007/117] Extended predict tests

---
 lsbi/stats_1.py       | 15 +++++++++------
 tests/test_stats_1.py | 29 ++++++++++++++++++-----------
 2 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index a5573d7..9b00c8d 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -34,8 +34,8 @@ class multivariate_normal(object):
     """
 
     def __init__(self, mean, cov, shape=()):
-        self.mean = mean
-        self.cov = cov
+        self.mean = np.atleast_1d(mean)
+        self.cov = np.atleast_2d(cov)
         self.shape = shape
         assert self.cov.shape[-2:] == (self.dim, self.dim)
 
@@ -48,9 +48,8 @@ def shape(self):
 
     @shape.setter
     def shape(self, shape):
-        self._shape = np.broadcast_shapes(
-            self.mean.shape[:-1], self.cov.shape[:-2], shape
-        )
+        self._shape = shape
+        self._shape = self.shape
 
     @property
     def dim(self):
@@ -203,8 +202,10 @@ def predict(self, A, b=None):
         shape (..., k)
         """
         k = A.shape[-2]
+        old_shape = self.shape
         if b is None:
             b = np.zeros(A.shape[:-1])
+        self.shape = np.broadcast_shapes(self.shape, A.shape[:-2], b.shape[:-1])
         A = self._flatten(A, k, self.dim)
         b = self._flatten(b, k)
         mean = np.einsum("kqn,kn->kq", A, self._flatten(self.mean, self.dim)) + b
@@ -213,7 +214,9 @@ def predict(self, A, b=None):
         )
         mean = mean.reshape(*self.shape, k)
         cov = cov.reshape(*self.shape, k, k)
-        return multivariate_normal(mean, cov)
+        ans = multivariate_normal(mean, cov, self.shape)
+        self.shape = old_shape
+        return ans
 
 
 class mixture_multivariate_normal(object):
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index da31f12..71c0a5a 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -11,10 +11,10 @@
 dims = [1, 2, 5]
 
 
+@pytest.mark.parametrize("dim", dims)
+@pytest.mark.parametrize("shape", shapes)
 @pytest.mark.parametrize("mean_shape", shapes)
 @pytest.mark.parametrize("cov_shape", shapes)
-@pytest.mark.parametrize("shape", shapes)
-@pytest.mark.parametrize("dim", dims)
 class TestMultivariateNormal(object):
     cls = multivariate_normal
 
@@ -42,19 +42,26 @@ def test_rvs(self, dim, shape, mean_shape, cov_shape, size):
         x = dist.rvs(size)
         assert x.shape == size + dist.shape + (dim,)
 
+    @pytest.mark.parametrize("A_shape", shapes)
+    @pytest.mark.parametrize("b_shape", shapes)
     @pytest.mark.parametrize("k", dims)
-    def test_predict(self, dim, shape, mean_shape, cov_shape, k):
+    def test_predict(self, dim, shape, mean_shape, cov_shape, k, A_shape, b_shape):
         dist = self.random(dim, shape, mean_shape, cov_shape)
-        A = np.random.randn(*dist.shape, k, dim)
-        b = np.random.randn(*dist.shape, k)
+        A = np.random.randn(*A_shape, k, dim)
+        b = np.random.randn(*b_shape, k)
 
-        d = dist.predict(A, b)
-        assert d.shape == dist.shape
-        assert d.dim == k
+        dist_2 = dist.predict(A, b)
+        assert isinstance(dist_2, self.cls)
+        assert dist_2.shape == np.broadcast_shapes(
+            dist.shape, A.shape[:-2], b.shape[:-1]
+        )
+        # assert dist_2.cov.shape[:-2] == np.broadcast_shapes(cov.shape[:-2], A.shape[:-2], b.shape[:-1], (k, k))
+        assert dist_2.dim == k
 
-        d = dist.predict(A)
-        assert d.shape == dist.shape
-        assert d.dim == k
+        dist_2 = dist.predict(A)
+        assert isinstance(dist_2, self.cls)
+        assert dist_2.shape == np.broadcast_shapes(dist.shape, A.shape[:-2])
+        assert dist_2.dim == k
 
     @pytest.mark.parametrize("p", dims)
     def test_marginalise(self, dim, shape, mean_shape, cov_shape, p):

From 5d907632ee94485c49535621bfe8f7542db01484 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Fri, 8 Dec 2023 17:08:51 +0000
Subject: [PATCH 008/117] Now not unecessarily flattening covariance matrices

---
 lsbi/stats_1.py       | 48 +++++++++++++------------------------------
 tests/test_stats_1.py | 22 +++++++++++++++++++-
 2 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index 9b00c8d..4e33f31 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -110,25 +110,18 @@ def condition(self, indices, values):
         """
         i = self._bar(indices)
         k = indices
-        kdim = len(k)
-        idim = self.dim - kdim
-        old_shape = self.shape
-        self.shape = np.broadcast_shapes(values.shape[:-1], self.shape)
-        mean = self._flatten(self.mean[..., i], idim) + np.einsum(
-            "ija,iab,ib->ij",
-            self._flatten(self.cov[..., i, :][..., :, k], idim, kdim),
-            self._flatten(inv(self.cov[..., k, :][..., :, k]), kdim, kdim),
-            self._flatten(values, kdim) - self._flatten(self.mean[..., k], kdim),
+        mean = self.mean[..., i] + np.einsum(
+            "...ja,...ab,...b->...j",
+            self.cov[..., i, :][..., :, k],
+            inv(self.cov[..., k, :][..., :, k]),
+            values - self.mean[..., k],
         )
-        cov = self._flatten(self.cov[..., i, :][..., :, i], idim, idim) - np.einsum(
-            "ija,iab,ibk->ijk",
-            self._flatten(self.cov[..., i, :][..., :, k], idim, kdim),
-            self._flatten(inv(self.cov[..., k, :][..., :, k]), kdim, kdim),
-            self._flatten(self.cov[..., k, :][..., :, i], kdim, idim),
+        cov = self.cov[..., i, :][..., :, i] - np.einsum(
+            "...ja,...ab,...bk->...jk",
+            self.cov[..., i, :][..., :, k],
+            inv(self.cov[..., k, :][..., :, k]),
+            self.cov[..., k, :][..., :, i],
         )
-        mean = mean.reshape(*self.shape, idim)
-        cov = cov.reshape(*self.shape, idim, idim)
-        self.shape = old_shape
         return multivariate_normal(mean, cov, self.shape)
 
     def _bar(self, indices):
@@ -183,7 +176,7 @@ def bijector(self, x, inverse=False):
             self.shape = old_shape
             return z
 
-    def predict(self, A, b=None):
+    def predict(self, A, b=0):
         """Predict the mean and covariance of a linear transformation.
 
         if:         x ~ N(mu, Sigma)
@@ -201,22 +194,9 @@ def predict(self, A, b=None):
         predicted distribution: mixture_multivariate_normal
         shape (..., k)
         """
-        k = A.shape[-2]
-        old_shape = self.shape
-        if b is None:
-            b = np.zeros(A.shape[:-1])
-        self.shape = np.broadcast_shapes(self.shape, A.shape[:-2], b.shape[:-1])
-        A = self._flatten(A, k, self.dim)
-        b = self._flatten(b, k)
-        mean = np.einsum("kqn,kn->kq", A, self._flatten(self.mean, self.dim)) + b
-        cov = np.einsum(
-            "kqn,knm,kpm->kqp", A, self._flatten(self.cov, self.dim, self.dim), A
-        )
-        mean = mean.reshape(*self.shape, k)
-        cov = cov.reshape(*self.shape, k, k)
-        ans = multivariate_normal(mean, cov, self.shape)
-        self.shape = old_shape
-        return ans
+        mean = np.einsum("...qn,...n->...q", A, self.mean) + b
+        cov = np.einsum("...qn,...nm,...pm->...qp", A, self.cov, A)
+        return multivariate_normal(mean, cov, self.shape)
 
 
 class mixture_multivariate_normal(object):
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index 71c0a5a..dc0a1ad 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -52,15 +52,28 @@ def test_predict(self, dim, shape, mean_shape, cov_shape, k, A_shape, b_shape):
 
         dist_2 = dist.predict(A, b)
         assert isinstance(dist_2, self.cls)
+        assert dist.shape == np.broadcast_shapes(shape, mean_shape, cov_shape)
         assert dist_2.shape == np.broadcast_shapes(
             dist.shape, A.shape[:-2], b.shape[:-1]
         )
-        # assert dist_2.cov.shape[:-2] == np.broadcast_shapes(cov.shape[:-2], A.shape[:-2], b.shape[:-1], (k, k))
+        assert dist_2.cov.shape[:-2] == np.broadcast_shapes(
+            dist.cov.shape[:-2], A.shape[:-2]
+        )
+        assert dist_2.mean.shape[:-1] == np.broadcast_shapes(
+            dist.mean.shape[:-1], A.shape[:-2], b.shape[:-1]
+        )
         assert dist_2.dim == k
 
         dist_2 = dist.predict(A)
         assert isinstance(dist_2, self.cls)
+        assert dist.shape == np.broadcast_shapes(shape, mean_shape, cov_shape)
         assert dist_2.shape == np.broadcast_shapes(dist.shape, A.shape[:-2])
+        assert dist_2.cov.shape[:-2] == np.broadcast_shapes(
+            dist.cov.shape[:-2], A.shape[:-2]
+        )
+        assert dist_2.mean.shape[:-1] == np.broadcast_shapes(
+            dist.mean.shape[:-1], A.shape[:-2]
+        )
         assert dist_2.dim == k
 
     @pytest.mark.parametrize("p", dims)
@@ -72,7 +85,10 @@ def test_marginalise(self, dim, shape, mean_shape, cov_shape, p):
         dist_2 = dist.marginalise(i)
 
         assert isinstance(dist_2, self.cls)
+        assert dist.shape == np.broadcast_shapes(shape, mean_shape, cov_shape)
         assert dist_2.shape == dist.shape
+        assert dist_2.cov.shape[:-2] == dist.cov.shape[:-2]
+        assert dist_2.mean.shape[:-1] == dist.mean.shape[:-1]
         assert dist_2.dim == dim - p
 
     @pytest.mark.parametrize("values_shape", shapes)
@@ -88,6 +104,10 @@ def test_condition(self, dim, shape, mean_shape, cov_shape, p, values_shape):
         assert isinstance(dist_2, self.cls)
         assert dist.shape == np.broadcast_shapes(shape, mean_shape, cov_shape)
         assert dist_2.shape == np.broadcast_shapes(dist.shape, values_shape)
+        assert dist_2.cov.shape[:-2] == dist.cov.shape[:-2]
+        assert dist_2.mean.shape[:-1] == np.broadcast_shapes(
+            dist.mean.shape[:-1], dist.cov.shape[:-2], values_shape
+        )
         assert dist_2.dim == dim - p
 
     @pytest.mark.parametrize("x_shape", shapes)

From 52155bd0e3e351990be5e9c5eae457e7d535a5d7 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sat, 9 Dec 2023 15:27:18 +0000
Subject: [PATCH 009/117] rvs working

---
 lsbi/stats_1.py       | 33 ++++++++++++++++++++++++---------
 tests/test_stats_1.py |  6 ++++++
 2 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index b3b8d78..6dedd4f 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -8,6 +8,13 @@
 from lsbi.utils import bisect, logdet
 
 
+def choice(size, p):
+    """Vectorised choice function."""
+    cump = np.cumsum(p, axis=-1)
+    u = np.random.rand(*size, *p.shape)
+    return np.argmin(u > cump, axis=-1)
+
+
 class multivariate_normal(object):
     """Vectorised multivariate normal distribution.
 
@@ -235,18 +242,26 @@ def shape(self, shape):
     def logpdf(self, x):
         """Log of the probability density function."""
         logpdf = super().logpdf(x)
-        logA = self.logA - scipy.special.logsumexp(self.logA)
-        return scipy.special.logsumexp(logpdf + logA, axis=-1)
+        if self.shape == ():
+            return logpdf
+        logA = self.logA - logsumexp(self.logA, axis=-1)[..., None]
+        return logsumexp(logpdf + logA, axis=-1)
 
     def rvs(self, size=1):
         """Random variates."""
-        size = np.atleast_1d(size)
-        p = np.exp(self.logA - self.logA.max())
-        p /= p.sum()
-        i = np.random.choice(len(p), size, p=p)
-        x = np.random.randn(*size, self.means.shape[-1])
-        choleskys = np.linalg.cholesky(self.covs)
-        return np.squeeze(self.means[i, ..., None] + choleskys[i] @ x[..., None])
+        if self.shape == ():
+            return super().rvs(size)
+        size = np.atleast_1d(np.array(size, dtype=int))
+        p = np.exp(self.logA - logsumexp(self.logA, axis=-1)[..., None])
+        p = np.broadcast_to(p, self.shape)
+        i = choice(size, p)
+        L = np.linalg.cholesky(self.cov)
+        L = np.broadcast_to(L, (*self.shape, self.dim, self.dim))
+        L = np.choose(i[..., None, None], np.moveaxis(L, -3, 0))
+        mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
+        mean = np.choose(i[..., None], np.moveaxis(mean, -2, 0))
+        x = np.random.randn(*size, *self.shape[:-1], self.dim)
+        return mean + np.einsum("...ij,...j->...i", L, x)
 
     def marginalise(self, indices):
         """Marginalise over indices.
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index 70604f6..71163e0 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -153,3 +153,9 @@ def test_logpdf(self, dim, shape, logA_shape, mean_shape, cov_shape, size):
         x = np.random.randn(*size, dim)
         logpdf = dist.logpdf(x)
         assert logpdf.shape == size + dist.shape[:-1]
+
+    @pytest.mark.parametrize("size", sizes)
+    def test_rvs(self, dim, shape, logA_shape, mean_shape, cov_shape, size):
+        dist = self.random(dim, logA_shape, shape, mean_shape, cov_shape)
+        x = dist.rvs(size)
+        assert x.shape == size + dist.shape[:-1] + (dim,)

From 32d847bc7ba7aa844e785a32df0950688ca28374 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sun, 10 Dec 2023 01:44:38 +0000
Subject: [PATCH 010/117] Neater version of logpdf and rvs without flatten

---
 lsbi/stats_1.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index 6dedd4f..b9aae9f 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -69,20 +69,19 @@ def _flatten(self, x, *args):
 
     def logpdf(self, x):
         """Log of the probability density function."""
-        dx = x.reshape(-1, 1, self.dim) - self._flatten(self.mean, self.dim)
-        invcov = self._flatten(np.linalg.inv(self.cov), self.dim, self.dim)
-        chi2 = np.einsum("xaj,ajk,xak->xa", dx, invcov, dx)
+        mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
+        dx = x.reshape(*x.shape[:-1], *np.ones_like(self.shape), self.dim) - mean
+        invcov = np.linalg.inv(self.cov)
+        chi2 = np.einsum("...j,...jk,...k->...", dx, invcov, dx)
         norm = -logdet(2 * np.pi * self.cov) / 2
-        logpdf = norm - chi2.reshape((*x.shape[:-1], *self.shape)) / 2
-        return logpdf
+        return norm - chi2 / 2
 
     def rvs(self, size=1):
         """Random variates."""
-        size = np.atleast_1d(np.array(size, dtype=int))
-        x = np.random.randn(np.prod(size), np.prod(self.shape, dtype=int), self.dim)
-        L = self._flatten(np.linalg.cholesky(self.cov), self.dim, self.dim)
-        t = self._flatten(self.mean, self.dim) + np.einsum("ajk,xak->xaj", L, x)
-        return t.reshape(*size, *self.shape, self.dim)
+        size = np.atleast_1d(size)
+        x = np.random.randn(*size, *self.shape, self.dim)
+        L = np.linalg.cholesky(self.cov)
+        return self.mean + np.einsum("...jk,...k->...j", L, x)
 
     def marginalise(self, indices):
         """Marginalise over indices.

From d0cdd9dfd7d0715f31c90a623fc5284cc9f1da42 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sun, 10 Dec 2023 02:00:57 +0000
Subject: [PATCH 011/117] Removed _flatten completely

---
 lsbi/stats_1.py       | 27 +++++----------------------
 tests/test_stats_1.py |  6 ------
 2 files changed, 5 insertions(+), 28 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index b9aae9f..49e6e2a 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -63,10 +63,6 @@ def dim(self):
         """Dimension of the distribution."""
         return self.mean.shape[-1]
 
-    def _flatten(self, x, *args):
-        """Flatten the distribution parameters."""
-        return np.broadcast_to(x, (*self.shape, *args)).reshape(-1, *args)
-
     def logpdf(self, x):
         """Log of the probability density function."""
         mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
@@ -159,28 +155,15 @@ def bijector(self, x, inverse=False):
         transformed x or theta: array_like, shape (..., d)
         """
         L = np.linalg.cholesky(self.cov)
-        old_shape = self.shape
-        self.shape = np.broadcast_shapes(x.shape[:-1], self.shape)
+        mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
         if inverse:
-            invL = inv(L)
-            y = np.einsum(
-                "ajk,ak->aj",
-                self._flatten(invL, self.dim, self.dim),
-                self._flatten(x, self.dim) - self._flatten(self.mean, self.dim),
-            )
-            y = y.reshape(*self.shape, self.dim)
-            self.shape = old_shape
+            invL = np.broadcast_to(inv(L), (*self.shape, self.dim, self.dim))
+            y = np.einsum("...jk,...k->...j", invL, x - mean)
             return scipy.stats.norm.cdf(y)
         else:
+            L = np.broadcast_to(L, (*self.shape, self.dim, self.dim))
             y = scipy.stats.norm.ppf(x)
-            z = self._flatten(self.mean, self.dim) + np.einsum(
-                "ajk,ak->aj",
-                self._flatten(L, self.dim, self.dim),
-                self._flatten(y, self.dim),
-            )
-            z = z.reshape(*self.shape, self.dim)
-            self.shape = old_shape
-            return z
+            return mean + np.einsum("...jk,...k->...j", L, y)
 
     def predict(self, A, b=0):
         """Predict the mean and covariance of a linear transformation.
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index 71163e0..57bf823 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -52,7 +52,6 @@ def test_predict(self, dim, shape, mean_shape, cov_shape, k, A_shape, b_shape):
 
         dist_2 = dist.predict(A, b)
         assert isinstance(dist_2, self.cls)
-        assert dist.shape == np.broadcast_shapes(shape, mean_shape, cov_shape)
         assert dist_2.shape == np.broadcast_shapes(
             dist.shape, A.shape[:-2], b.shape[:-1]
         )
@@ -66,7 +65,6 @@ def test_predict(self, dim, shape, mean_shape, cov_shape, k, A_shape, b_shape):
 
         dist_2 = dist.predict(A)
         assert isinstance(dist_2, self.cls)
-        assert dist.shape == np.broadcast_shapes(shape, mean_shape, cov_shape)
         assert dist_2.shape == np.broadcast_shapes(dist.shape, A.shape[:-2])
         assert dist_2.cov.shape[:-2] == np.broadcast_shapes(
             dist.cov.shape[:-2], A.shape[:-2]
@@ -85,7 +83,6 @@ def test_marginalise(self, dim, shape, mean_shape, cov_shape, p):
         dist_2 = dist.marginalise(i)
 
         assert isinstance(dist_2, self.cls)
-        assert dist.shape == np.broadcast_shapes(shape, mean_shape, cov_shape)
         assert dist_2.shape == dist.shape
         assert dist_2.cov.shape[:-2] == dist.cov.shape[:-2]
         assert dist_2.mean.shape[:-1] == dist.mean.shape[:-1]
@@ -102,7 +99,6 @@ def test_condition(self, dim, shape, mean_shape, cov_shape, p, values_shape):
         dist_2 = dist.condition(indices, values)
 
         assert isinstance(dist_2, self.cls)
-        assert dist.shape == np.broadcast_shapes(shape, mean_shape, cov_shape)
         assert dist_2.shape == np.broadcast_shapes(dist.shape, values_shape)
         assert dist_2.cov.shape[:-2] == dist.cov.shape[:-2]
         assert dist_2.mean.shape[:-1] == np.broadcast_shapes(
@@ -115,13 +111,11 @@ def test_bijector(self, dim, shape, mean_shape, cov_shape, x_shape):
         dist = self.random(dim, shape, mean_shape, cov_shape)
         x = np.random.rand(*x_shape, dim)
         y = dist.bijector(x)
-        assert dist.shape == np.broadcast_shapes(shape, mean_shape, cov_shape)
         assert y.shape == np.broadcast_shapes(dist.shape + (dim,), x.shape)
 
         y = np.random.rand(*x_shape, dim)
         x = dist.bijector(y, inverse=True)
 
-        assert dist.shape == np.broadcast_shapes(shape, mean_shape, cov_shape)
         assert x.shape == np.broadcast_shapes(dist.shape + (dim,), x.shape)
 
 

From 7cd4bdd22cce60c3a96afcf3faa82ac8c54aef65 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sun, 10 Dec 2023 04:10:08 +0000
Subject: [PATCH 012/117] condition now running

---
 lsbi/stats_1.py       | 156 ++++++++++++++++--------------------------
 tests/test_stats_1.py |  47 ++++++++++++-
 2 files changed, 104 insertions(+), 99 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index 49e6e2a..282cbdc 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -65,6 +65,7 @@ def dim(self):
 
     def logpdf(self, x):
         """Log of the probability density function."""
+        x = np.array(x)
         mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
         dx = x.reshape(*x.shape[:-1], *np.ones_like(self.shape), self.dim) - mean
         invcov = np.linalg.inv(self.cov)
@@ -79,6 +80,28 @@ def rvs(self, size=1):
         L = np.linalg.cholesky(self.cov)
         return self.mean + np.einsum("...jk,...k->...j", L, x)
 
+    def predict(self, A, b=0):
+        """Predict the mean and covariance of a linear transformation.
+
+        if:         x ~ N(mu, Sigma)
+        then:  Ax + b ~ N(A mu + b, A Sigma A^T)
+
+        Parameters
+        ----------
+        A : array_like, shape (..., k, n)
+            Linear transformation matrix.
+        b : array_like, shape (..., k), optional
+            Linear transformation vector.
+
+        Returns
+        -------
+        predicted distribution: multivariate_normal
+        shape (..., k)
+        """
+        mean = np.einsum("...qn,...n->...q", A, self.mean) + b
+        cov = np.einsum("...qn,...nm,...pm->...qp", A, self.cov, A)
+        return multivariate_normal(mean, cov, self.shape)
+
     def marginalise(self, indices):
         """Marginalise over indices.
 
@@ -89,7 +112,7 @@ def marginalise(self, indices):
 
         Returns
         -------
-        marginalised distribution: multimultivariate_normal
+        marginalised distribution: multivariate_normal
         """
         i = self._bar(indices)
         mean = self.mean[..., i]
@@ -108,7 +131,7 @@ def condition(self, indices, values):
 
         Returns
         -------
-        conditional distribution: multimultivariate_normal
+        conditional distribution: multivariate_normal
         """
         i = self._bar(indices)
         k = indices
@@ -154,6 +177,7 @@ def bijector(self, x, inverse=False):
         -------
         transformed x or theta: array_like, shape (..., d)
         """
+        x = np.array(x)
         L = np.linalg.cholesky(self.cov)
         mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
         if inverse:
@@ -165,28 +189,6 @@ def bijector(self, x, inverse=False):
             y = scipy.stats.norm.ppf(x)
             return mean + np.einsum("...jk,...k->...j", L, y)
 
-    def predict(self, A, b=0):
-        """Predict the mean and covariance of a linear transformation.
-
-        if:         x ~ N(mu, Sigma)
-        then:  Ax + b ~ N(A mu + b, A Sigma A^T)
-
-        Parameters
-        ----------
-        A : array_like, shape (..., k, n)
-            Linear transformation matrix.
-        b : array_like, shape (..., k), optional
-            Linear transformation vector.
-
-        Returns
-        -------
-        predicted distribution: mixture_multivariate_normal
-        shape (..., k)
-        """
-        mean = np.einsum("...qn,...n->...q", A, self.mean) + b
-        cov = np.einsum("...qn,...nm,...pm->...qp", A, self.cov, A)
-        return multivariate_normal(mean, cov, self.shape)
-
 
 class mixture_normal(multivariate_normal):
     """Mixture of multivariate normal distributions.
@@ -245,6 +247,26 @@ def rvs(self, size=1):
         x = np.random.randn(*size, *self.shape[:-1], self.dim)
         return mean + np.einsum("...ij,...j->...i", L, x)
 
+    def predict(self, A, b=None):
+        """Predict the mean and covariance of a linear transformation.
+
+        if:         x ~ mixN(mu, Sigma, logA)
+        then:  Ax + b ~ mixN(A mu + b, A Sigma A^T, logA)
+
+        Parameters
+        ----------
+        A : array_like, shape (k, q, n)
+            Linear transformation matrix.
+        b : array_like, shape (k, q,), optional
+            Linear transformation vector.
+
+        Returns
+        -------
+        predicted distribution: multivariate_normal
+        """
+        dist = super().predict(A, b)
+        return multivariate_normal(self.logA, dist.mean, dist.cov, dist.shape)
+
     def marginalise(self, indices):
         """Marginalise over indices.
 
@@ -255,13 +277,10 @@ def marginalise(self, indices):
 
         Returns
         -------
-        marginalised distribution: mixture_multivariate_normal
+        marginalised distribution: mixture_normal
         """
-        i = self._bar(indices)
-        means = self.means[:, i]
-        covs = self.covs[:, i][:, :, i]
-        logA = self.logA
-        return mixture_multivariate_normal(means, covs, logA)
+        dist = super().marginalise(indices)
+        return mixture_normal(self.logA, dist.mean, dist.cov, self.shape)
 
     def condition(self, indices, values):
         """Condition on indices with values.
@@ -275,34 +294,15 @@ def condition(self, indices, values):
 
         Returns
         -------
-        conditional distribution: mixture_multivariate_normal
+        conditional distribution: mixture_normal
         """
-        i = self._bar(indices)
-        k = indices
-        marginal = self.marginalise(i)
-
-        means = self.means[:, i] + np.einsum(
-            "ija,iab,ib->ij",
-            self.covs[:, i][:, :, k],
-            inv(self.covs[:, k][:, :, k]),
-            (values - self.means[:, k]),
-        )
-        covs = self.covs[:, i][:, :, i] - np.einsum(
-            "ija,iab,ibk->ijk",
-            self.covs[:, i][:, :, k],
-            inv(self.covs[:, k][:, :, k]),
-            self.covs[:, k][:, :, i],
-        )
-        logA = (
-            marginal.logpdf(values, reduce=False) + self.logA - marginal.logpdf(values)
-        )
-        return mixture_multivariate_normal(means, covs, logA)
-
-    def _bar(self, indices):
-        """Return the indices not in the given indices."""
-        k = np.ones(self.means.shape[-1], dtype=bool)
-        k[indices] = False
-        return k
+        dist = super().condition(indices, values)
+        marginal = self.marginalise(self._bar(indices))
+        marginal.mean = marginal.mean - values
+        logA = super(marginal.__class__, marginal).logpdf(0.0)
+        logA -= logsumexp(logA, axis=-1)[..., None]
+        logA += self.logA
+        return mixture_normal(logA, dist.mean, dist.cov, dist.shape)
 
     def bijector(self, x, inverse=False):
         """Bijector between U([0, 1])^d and the distribution.
@@ -326,6 +326,7 @@ def bijector(self, x, inverse=False):
         -------
         transformed x or theta: array_like, shape (..., d)
         """
+        x = np.array(x)
         theta = np.empty_like(x)
         if inverse:
             theta[:] = x
@@ -344,9 +345,7 @@ def bijector(self, x, inverse=False):
                 inv(self.covs[:, :i, :i]),
                 self.covs[:, i, :i],
             )
-            dist = mixture_multivariate_normal(
-                self.means[:, :i], self.covs[:, :i, :i], self.logA
-            )
+            dist = mixture_normal(self.means[:, :i], self.covs[:, :i, :i], self.logA)
             logA = (
                 self.logA
                 + dist.logpdf(theta[..., :i], reduce=False, keepdims=True)
@@ -371,40 +370,3 @@ def f(t):
             return x
         else:
             return theta
-
-    def _process_quantiles(self, x, dim):
-        x = np.asarray(x, dtype=float)
-
-        if x.ndim == 0:
-            x = x[np.newaxis, np.newaxis]
-        elif x.ndim == 1:
-            if dim == 1:
-                x = x[:, np.newaxis]
-            else:
-                x = x[np.newaxis, :]
-
-        return x
-
-    def predict(self, A, b=None):
-        """Predict the mean and covariance of a linear transformation.
-
-        if:         x ~ mixN(mu, Sigma, logA)
-        then:  Ax + b ~ mixN(A mu + b, A Sigma A^T, logA)
-
-        Parameters
-        ----------
-        A : array_like, shape (k, q, n)
-            Linear transformation matrix.
-        b : array_like, shape (k, q,), optional
-            Linear transformation vector.
-
-        Returns
-        -------
-        predicted distribution: mixture_multivariate_normal
-        """
-        if b is None:
-            b = np.zeros(A.shape[:-1])
-        means = np.einsum("kqn,kn->kq", A, self.means) + b
-        covs = np.einsum("kqn,knm,kpm->kqp", A, self.covs, A)
-        logA = self.logA
-        return mixture_multivariate_normal(means, covs, logA)
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index 57bf823..339beaa 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -143,13 +143,56 @@ def test_properties(self, dim, shape, logA_shape, mean_shape, cov_shape):
 
     @pytest.mark.parametrize("size", sizes)
     def test_logpdf(self, dim, shape, logA_shape, mean_shape, cov_shape, size):
-        dist = self.random(dim, logA_shape, shape, mean_shape, cov_shape)
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape)
         x = np.random.randn(*size, dim)
         logpdf = dist.logpdf(x)
         assert logpdf.shape == size + dist.shape[:-1]
 
     @pytest.mark.parametrize("size", sizes)
     def test_rvs(self, dim, shape, logA_shape, mean_shape, cov_shape, size):
-        dist = self.random(dim, logA_shape, shape, mean_shape, cov_shape)
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape)
         x = dist.rvs(size)
         assert x.shape == size + dist.shape[:-1] + (dim,)
+
+    @pytest.mark.parametrize("p", dims)
+    def test_marginalise(self, dim, shape, logA_shape, mean_shape, cov_shape, p):
+        if dim <= p:
+            pytest.skip("dim <= p")
+        i = np.random.choice(dim, p, replace=False)
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape)
+        dist_2 = dist.marginalise(i)
+
+        assert isinstance(dist_2, self.cls)
+        assert dist_2.shape == dist.shape
+        assert dist_2.cov.shape[:-2] == dist.cov.shape[:-2]
+        assert dist_2.mean.shape[:-1] == dist.mean.shape[:-1]
+        assert dist_2.logA.shape == dist.logA.shape
+        assert dist_2.dim == dim - p
+
+    @pytest.mark.parametrize("values_shape", shapes)
+    @pytest.mark.parametrize("p", dims)
+    def test_condition(
+        self, dim, shape, logA_shape, mean_shape, cov_shape, p, values_shape
+    ):
+        dim = 2
+        shape = (4,)
+        logA_shape = (3, 4)
+        mean_shape = ()
+        cov_shape = (3, 4)
+        p = 1
+        values_shape = (2, 3, 4)
+
+        if dim <= p:
+            pytest.skip("dim <= p")
+        indices = np.random.choice(dim, p, replace=False)
+        values = np.random.randn(*values_shape, p)
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape)
+        dist_2 = dist.condition(indices, values)
+
+        assert isinstance(dist_2, self.cls)
+        assert dist_2.shape == np.broadcast_shapes(dist.shape, values_shape)
+        assert dist_2.cov.shape[:-2] == dist.cov.shape[:-2]
+        assert dist_2.mean.shape[:-1] == np.broadcast_shapes(
+            dist.mean.shape[:-1], dist.cov.shape[:-2], values_shape
+        )
+        assert dist_2.dim == dim - p

From c154533f72d49f718562e4f9bad1d1d3bb3fc413 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sun, 10 Dec 2023 05:18:05 +0000
Subject: [PATCH 013/117] bijector now tested and working

---
 lsbi/stats_1.py       | 40 ++++++++++++++--------------------------
 tests/test_stats_1.py | 25 ++++++++++++++-----------
 2 files changed, 28 insertions(+), 37 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index 282cbdc..45f089d 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -3,7 +3,6 @@
 import scipy.stats
 from numpy.linalg import inv
 from scipy.special import erf, logsumexp
-from scipy.stats._multivariate import multivariate_normal_frozen
 
 from lsbi.utils import bisect, logdet
 
@@ -296,10 +295,10 @@ def condition(self, indices, values):
         -------
         conditional distribution: mixture_normal
         """
-        dist = super().condition(indices, values)
+        dist = super().condition(indices, values[..., None, :])
         marginal = self.marginalise(self._bar(indices))
-        marginal.mean = marginal.mean - values
-        logA = super(marginal.__class__, marginal).logpdf(0.0)
+        marginal.mean = marginal.mean - values[..., None, :]
+        logA = super(marginal.__class__, marginal).logpdf(np.zeros(marginal.dim))
         logA -= logsumexp(logA, axis=-1)[..., None]
         logA += self.logA
         return mixture_normal(logA, dist.mean, dist.cov, dist.shape)
@@ -327,31 +326,20 @@ def bijector(self, x, inverse=False):
         transformed x or theta: array_like, shape (..., d)
         """
         x = np.array(x)
-        theta = np.empty_like(x)
+        theta = np.empty(np.broadcast_shapes(x.shape, self.shape[:-1] + (self.dim,)))
+
         if inverse:
             theta[:] = x
-            x = np.empty_like(x)
-
-        for i in range(x.shape[-1]):
-            m = self.means[..., :, i] + np.einsum(
-                "ia,iab,...ib->...i",
-                self.covs[:, i, :i],
-                inv(self.covs[:, :i, :i]),
-                theta[..., None, :i] - self.means[:, :i],
-            )
-            c = self.covs[:, i, i] - np.einsum(
-                "ia,iab,ib->i",
-                self.covs[:, i, :i],
-                inv(self.covs[:, :i, :i]),
-                self.covs[:, i, :i],
-            )
-            dist = mixture_normal(self.means[:, :i], self.covs[:, :i, :i], self.logA)
-            logA = (
-                self.logA
-                + dist.logpdf(theta[..., :i], reduce=False, keepdims=True)
-                - dist.logpdf(theta[..., :i], keepdims=True)[..., None]
+            x = np.empty(np.broadcast_shapes(x.shape, self.shape[:-1] + (self.dim,)))
+
+        for i in range(self.dim):
+            dist = self.marginalise(np.s_[i + 1 :]).condition(
+                np.s_[:-1], theta[..., :i]
             )
-            A = np.exp(logA - logsumexp(logA, axis=-1)[..., None])
+            m = dist.mean[..., 0]
+            c = dist.cov[..., 0, 0]
+            A = np.exp(dist.logA - logsumexp(dist.logA, axis=-1)[..., None])
+            m = np.broadcast_to(m, dist.shape)
 
             def f(t):
                 return (A * 0.5 * (1 + erf((t[..., None] - m) / np.sqrt(2 * c)))).sum(
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index 339beaa..61f3903 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -174,25 +174,28 @@ def test_marginalise(self, dim, shape, logA_shape, mean_shape, cov_shape, p):
     def test_condition(
         self, dim, shape, logA_shape, mean_shape, cov_shape, p, values_shape
     ):
-        dim = 2
-        shape = (4,)
-        logA_shape = (3, 4)
-        mean_shape = ()
-        cov_shape = (3, 4)
-        p = 1
-        values_shape = (2, 3, 4)
-
         if dim <= p:
             pytest.skip("dim <= p")
         indices = np.random.choice(dim, p, replace=False)
-        values = np.random.randn(*values_shape, p)
+        values = np.random.randn(*values_shape[:-1], p)
         dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape)
         dist_2 = dist.condition(indices, values)
 
         assert isinstance(dist_2, self.cls)
-        assert dist_2.shape == np.broadcast_shapes(dist.shape, values_shape)
+        assert dist_2.shape == np.broadcast_shapes(dist.shape, values_shape[:-1] + (1,))
         assert dist_2.cov.shape[:-2] == dist.cov.shape[:-2]
         assert dist_2.mean.shape[:-1] == np.broadcast_shapes(
-            dist.mean.shape[:-1], dist.cov.shape[:-2], values_shape
+            dist.mean.shape[:-1], dist.cov.shape[:-2], values_shape[:-1] + (1,)
         )
         assert dist_2.dim == dim - p
+
+    @pytest.mark.parametrize("x_shape", shapes)
+    def test_bijector(self, dim, shape, logA_shape, mean_shape, cov_shape, x_shape):
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape)
+        x = np.random.rand(*x_shape[:-1], dim)
+        y = dist.bijector(x)
+        assert y.shape == np.broadcast_shapes(x.shape, dist.shape[:-1] + (dim,))
+
+        y = np.random.rand(*x_shape[:-1], dim)
+        x = dist.bijector(y, inverse=True)
+        assert x.shape == np.broadcast_shapes(y.shape, dist.shape[:-1] + (dim,))

From 30f456d820345f3ee2177e670055921c32c77416 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sun, 10 Dec 2023 06:12:11 +0000
Subject: [PATCH 014/117] Commented and corrected the predict function for
 mixtures

---
 lsbi/stats_1.py       | 135 +++++++++++++++++++++++++++---------------
 lsbi/utils.py         |  20 +++++++
 tests/test_stats_1.py |  37 +++++++++++-
 3 files changed, 142 insertions(+), 50 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index 45f089d..3438e5d 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -4,27 +4,20 @@
 from numpy.linalg import inv
 from scipy.special import erf, logsumexp
 
-from lsbi.utils import bisect, logdet
-
-
-def choice(size, p):
-    """Vectorised choice function."""
-    cump = np.cumsum(p, axis=-1)
-    u = np.random.rand(*size, *p.shape)
-    return np.argmin(u > cump, axis=-1)
+from lsbi.utils import bisect, choice, logdet
 
 
 class multivariate_normal(object):
     """Vectorised multivariate normal distribution.
 
     This extends scipy.stats.multivariate_normal to allow for vectorisation across
-    the distribution parameters. mean can be an array of shape (..., dim) and cov
-    can be an array of shape (..., dim, dim) where ... represent arbitrary broadcastable
-    shapes.
+    the distribution parameters.
 
     Implemented with the same style as scipy.stats.multivariate_normal, except that
     results are not squeezed.
 
+    mean and cov are lazily broadcasted to the same shape to improve performance.
+
     Parameters
     ----------
     mean : array_like, shape (..., dim)
@@ -36,13 +29,12 @@ class multivariate_normal(object):
     shape: tuple, optional, default=()
         Shape of the distribution. Useful for forcing a broadcast beyond that
         inferred by mean and cov shapes
-
     """
 
     def __init__(self, mean, cov, shape=()):
         self.mean = np.atleast_1d(mean)
         self.cov = np.atleast_2d(cov)
-        self.shape = shape
+        self._shape = shape
         assert self.cov.shape[-2:] == (self.dim, self.dim)
 
     @property
@@ -52,28 +44,47 @@ def shape(self):
             self.mean.shape[:-1], self.cov.shape[:-2], self._shape
         )
 
-    @shape.setter
-    def shape(self, shape):
-        self._shape = shape
-        self._shape = self.shape
-
     @property
     def dim(self):
         """Dimension of the distribution."""
         return self.mean.shape[-1]
 
     def logpdf(self, x):
-        """Log of the probability density function."""
+        """Log of the probability density function.
+
+        Parameters
+        ----------
+        x : array_like, shape (*size, dim)
+            Points at which to evaluate the log of the probability density
+            function.
+
+        Returns
+        -------
+        logpdf : array_like, shape (*size, *shape)
+            Log of the probability density function evaluated at x.
+        """
         x = np.array(x)
+        size = x.shape[:-1]
         mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
-        dx = x.reshape(*x.shape[:-1], *np.ones_like(self.shape), self.dim) - mean
-        invcov = np.linalg.inv(self.cov)
+        dx = x.reshape(*size, *np.ones_like(self.shape), self.dim) - mean
+        invcov = inv(self.cov)
         chi2 = np.einsum("...j,...jk,...k->...", dx, invcov, dx)
         norm = -logdet(2 * np.pi * self.cov) / 2
         return norm - chi2 / 2
 
     def rvs(self, size=1):
-        """Random variates."""
+        """Draw random samples from the distribution.
+
+        Parameters
+        ----------
+        size : int or tuple of ints, optional, default=1
+            Number of samples to draw.
+
+        Returns
+        -------
+        rvs : ndarray, shape (*size, *shape, dim)
+            Random samples from the distribution.
+        """
         size = np.atleast_1d(size)
         x = np.random.randn(*size, *self.shape, self.dim)
         L = np.linalg.cholesky(self.cov)
@@ -87,15 +98,16 @@ def predict(self, A, b=0):
 
         Parameters
         ----------
-        A : array_like, shape (..., k, n)
+        A : array_like, shape (..., k, dim)
             Linear transformation matrix.
         b : array_like, shape (..., k), optional
             Linear transformation vector.
 
+        where self.shape is broadcastable to ...
+
         Returns
         -------
-        predicted distribution: multivariate_normal
-        shape (..., k)
+        multivariate_normal shape (..., k)
         """
         mean = np.einsum("...qn,...n->...q", A, self.mean) + b
         cov = np.einsum("...qn,...nm,...pm->...qp", A, self.cov, A)
@@ -111,7 +123,7 @@ def marginalise(self, indices):
 
         Returns
         -------
-        marginalised distribution: multivariate_normal
+        multivariate_normal shape (*shape, dim - len(indices))
         """
         i = self._bar(indices)
         mean = self.mean[..., i]
@@ -125,12 +137,14 @@ def condition(self, indices, values):
         ----------
         indices : array_like
             Indices to condition over.
-        values : array_like
+        values : array_like shape (..., len(indices))
             Values to condition on.
 
+        where where self.shape is broadcastable to ...
+
         Returns
         -------
-        conditional distribution: multivariate_normal
+        multivariate_normal shape (..., len(indices))
         """
         i = self._bar(indices)
         k = indices
@@ -165,16 +179,18 @@ def bijector(self, x, inverse=False):
 
         Parameters
         ----------
-        x : array_like, shape (..., d)
+        x : array_like, shape (..., dim)
             if inverse: x is theta
             else: x is x
         inverse : bool, optional, default=False
             If True: compute the inverse transformation from physical to
             hypercube space.
 
+        where self.shape is broadcastable to ...
+
         Returns
         -------
-        transformed x or theta: array_like, shape (..., d)
+        transformed x or theta: array_like, shape (..., dim)
         """
         x = np.array(x)
         L = np.linalg.cholesky(self.cov)
@@ -202,7 +218,7 @@ class mixture_normal(multivariate_normal):
     cov: array_like, shape (..., n, dim, dim)
         Covariance matrix of each component.
 
-    logA: array_like, shape (..., n,)
+    logA: array_like, shape (..., n)
         Log of the mixing weights.
     """
 
@@ -217,13 +233,20 @@ def shape(self):
             self.logA.shape, self.mean.shape[:-1], self.cov.shape[:-2], self._shape
         )
 
-    @shape.setter
-    def shape(self, shape):
-        self._shape = shape
-        self._shape = self.shape
-
     def logpdf(self, x):
-        """Log of the probability density function."""
+        """Log of the probability density function.
+
+        Parameters
+        ----------
+        x : array_like, shape (*size, dim)
+            Points at which to evaluate the log of the probability density
+            function.
+
+        Returns
+        -------
+        logpdf : array_like, shape (*size, *shape[:-1])
+            Log of the probability density function evaluated at x.
+        """
         logpdf = super().logpdf(x)
         if self.shape == ():
             return logpdf
@@ -231,7 +254,16 @@ def logpdf(self, x):
         return logsumexp(logpdf + logA, axis=-1)
 
     def rvs(self, size=1):
-        """Random variates."""
+        """Draw random samples from the distribution.
+
+        Parameters
+        ----------
+        size : int or tuple of ints, optional, default=1
+
+        Returns
+        -------
+        rvs : array_like, shape (*size, *shape[:-1], dim)
+        """
         if self.shape == ():
             return super().rvs(size)
         size = np.atleast_1d(np.array(size, dtype=int))
@@ -246,7 +278,7 @@ def rvs(self, size=1):
         x = np.random.randn(*size, *self.shape[:-1], self.dim)
         return mean + np.einsum("...ij,...j->...i", L, x)
 
-    def predict(self, A, b=None):
+    def predict(self, A, b=0):
         """Predict the mean and covariance of a linear transformation.
 
         if:         x ~ mixN(mu, Sigma, logA)
@@ -254,17 +286,22 @@ def predict(self, A, b=None):
 
         Parameters
         ----------
-        A : array_like, shape (k, q, n)
+        A : array_like, shape (..., k, dim)
             Linear transformation matrix.
-        b : array_like, shape (k, q,), optional
+        b : array_like, shape (..., k), optional
             Linear transformation vector.
 
+        where self.shape[:-1] is broadcastable to ...
+
         Returns
         -------
-        predicted distribution: multivariate_normal
+        mixture_normal shape (..., k)
         """
-        dist = super().predict(A, b)
-        return multivariate_normal(self.logA, dist.mean, dist.cov, dist.shape)
+        if b is 0:
+            dist = super().predict(A[..., None, :, :])
+        else:
+            dist = super().predict(A[..., None, :, :], b[..., None, :])
+        return mixture_normal(self.logA, dist.mean, dist.cov, dist.shape)
 
     def marginalise(self, indices):
         """Marginalise over indices.
@@ -276,7 +313,7 @@ def marginalise(self, indices):
 
         Returns
         -------
-        marginalised distribution: mixture_normal
+        mixture_normal shape (*shape, dim - len(indices))
         """
         dist = super().marginalise(indices)
         return mixture_normal(self.logA, dist.mean, dist.cov, self.shape)
@@ -288,12 +325,14 @@ def condition(self, indices, values):
         ----------
         indices : array_like
             Indices to condition over.
-        values : array_like
+        values : array_like shape (..., len(indices))
             Values to condition on.
 
+        where self.shape[:-1] is broadcastable to ...
+
         Returns
         -------
-        conditional distribution: mixture_normal
+        mixture_normal shape (*shape, len(indices))
         """
         dist = super().condition(indices, values[..., None, :])
         marginal = self.marginalise(self._bar(indices))
@@ -321,6 +360,8 @@ def bijector(self, x, inverse=False):
             If True: compute the inverse transformation from physical to
             hypercube space.
 
+        where self.shape[:-1] is broadcastable to ...
+
         Returns
         -------
         transformed x or theta: array_like, shape (..., d)
diff --git a/lsbi/utils.py b/lsbi/utils.py
index 38ae683..09ce54c 100644
--- a/lsbi/utils.py
+++ b/lsbi/utils.py
@@ -55,3 +55,23 @@ def bisect(f, a, b, args=(), tol=1e-8):
         b = np.where(fq == 0, q, b)
         b = np.where(fb * fq > 0, q, b)
     return (a + b) / 2
+
+
+def choice(size, p):
+    """Vectorised choice function.
+
+    Parameters
+    ----------
+    size : int or tuple of ints
+        Shape of the output.
+    p : array_like
+        Probability array
+
+    Returns
+    -------
+    out : ndarray
+        Output array of shape (*size, *p.shape[:-1]).
+    """
+    cump = np.cumsum(p, axis=-1)
+    u = np.random.rand(*size, *p.shape)
+    return np.argmin(u > cump, axis=-1)
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index 61f3903..f644db0 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -1,8 +1,5 @@
 import numpy as np
 import pytest
-import scipy.special
-from numpy.testing import assert_allclose
-from scipy.stats import kstest
 
 from lsbi.stats_1 import mixture_normal, multivariate_normal
 
@@ -154,6 +151,40 @@ def test_rvs(self, dim, shape, logA_shape, mean_shape, cov_shape, size):
         x = dist.rvs(size)
         assert x.shape == size + dist.shape[:-1] + (dim,)
 
+    @pytest.mark.parametrize("A_shape", shapes)
+    @pytest.mark.parametrize("b_shape", shapes)
+    @pytest.mark.parametrize("k", dims)
+    def test_predict(
+        self, dim, shape, logA_shape, mean_shape, cov_shape, k, A_shape, b_shape
+    ):
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape)
+        A = np.random.randn(*A_shape[:-1], k, dim)
+        b = np.random.randn(*b_shape[:-1], k)
+
+        dist_2 = dist.predict(A, b)
+        assert isinstance(dist_2, self.cls)
+        assert dist_2.shape == np.broadcast_shapes(
+            dist.shape, A.shape[:-2] + (1,), b.shape[:-1] + (1,)
+        )
+        assert dist_2.cov.shape[:-2] == np.broadcast_shapes(
+            dist.cov.shape[:-2], A.shape[:-2] + (1,)
+        )
+        assert dist_2.mean.shape[:-1] == np.broadcast_shapes(
+            dist.mean.shape[:-1], A.shape[:-2] + (1,), b.shape[:-1] + (1,)
+        )
+        assert dist_2.dim == k
+
+        dist_2 = dist.predict(A)
+        assert isinstance(dist_2, self.cls)
+        assert dist_2.shape == np.broadcast_shapes(dist.shape, A.shape[:-2] + (1,))
+        assert dist_2.cov.shape[:-2] == np.broadcast_shapes(
+            dist.cov.shape[:-2], A.shape[:-2] + (1,)
+        )
+        assert dist_2.mean.shape[:-1] == np.broadcast_shapes(
+            dist.mean.shape[:-1], A.shape[:-2] + (1,)
+        )
+        assert dist_2.dim == k
+
     @pytest.mark.parametrize("p", dims)
     def test_marginalise(self, dim, shape, logA_shape, mean_shape, cov_shape, p):
         if dim <= p:

From f5cc946b16fbc029bd104e85c20e53b446c3284e Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sun, 10 Dec 2023 16:40:09 +0000
Subject: [PATCH 015/117] Added cleverer defaults to multivariate_normal

---
 lsbi/stats_1.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index 3438e5d..4e9ee0c 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -7,11 +7,19 @@
 from lsbi.utils import bisect, choice, logdet
 
 
+def _broadcast_to(x, shape):
+    if x.shape == shape:
+        return x
+    if x.shape[1:] == shape[1:]:
+        return np.broadcast_to(x, shape)
+    return x * np.ones(shape) * np.eye(shape[1], shape[2])[None, ...]
+
+
 class multivariate_normal(object):
     """Vectorised multivariate normal distribution.
 
     This extends scipy.stats.multivariate_normal to allow for vectorisation across
-    the distribution parameters.
+    the distribution parameters mean and cov.
 
     Implemented with the same style as scipy.stats.multivariate_normal, except that
     results are not squeezed.
@@ -35,7 +43,8 @@ def __init__(self, mean, cov, shape=()):
         self.mean = np.atleast_1d(mean)
         self.cov = np.atleast_2d(cov)
         self._shape = shape
-        assert self.cov.shape[-2:] == (self.dim, self.dim)
+        self.mean = self.mean + np.zeros(self.dim)
+        self.cov = self.cov * np.eye(self.dim)
 
     @property
     def shape(self):

From 015f5e54250cb03ead31b95ad76dcf6c74efd100 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sun, 10 Dec 2023 23:08:56 +0000
Subject: [PATCH 016/117] Added checks for the new default system

---
 lsbi/stats_1.py       | 20 ++++------
 tests/test_stats_1.py | 89 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 88 insertions(+), 21 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index 4e9ee0c..b191979 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -7,14 +7,6 @@
 from lsbi.utils import bisect, choice, logdet
 
 
-def _broadcast_to(x, shape):
-    if x.shape == shape:
-        return x
-    if x.shape[1:] == shape[1:]:
-        return np.broadcast_to(x, shape)
-    return x * np.ones(shape) * np.eye(shape[1], shape[2])[None, ...]
-
-
 class multivariate_normal(object):
     """Vectorised multivariate normal distribution.
 
@@ -39,12 +31,14 @@ class multivariate_normal(object):
         inferred by mean and cov shapes
     """
 
-    def __init__(self, mean, cov, shape=()):
+    def __init__(self, mean=0, cov=1, shape=(), dim=1):
         self.mean = np.atleast_1d(mean)
-        self.cov = np.atleast_2d(cov)
+        self.cov = np.atleast_1d(cov)
         self._shape = shape
-        self.mean = self.mean + np.zeros(self.dim)
-        self.cov = self.cov * np.eye(self.dim)
+        dim = np.max([dim, self.mean.shape[-1], self.cov.shape[-1]])
+        self.mean = self.mean + np.zeros(dim)
+        if self.cov.ndim < 2:
+            self.cov = self.cov * np.eye(dim)
 
     @property
     def shape(self):
@@ -306,7 +300,7 @@ def predict(self, A, b=0):
         -------
         mixture_normal shape (..., k)
         """
-        if b is 0:
+        if b == 0:
             dist = super().predict(A[..., None, :, :])
         else:
             dist = super().predict(A[..., None, :, :], b[..., None, :])
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index f644db0..c91a9e0 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -19,12 +19,82 @@ def random(self, dim, shape, mean_shape, cov_shape):
         mean = np.random.randn(*mean_shape, dim)
         cov = np.random.randn(*cov_shape, dim, dim)
         cov = np.einsum("...ij,...kj->...ik", cov, cov) + dim * np.eye(dim)
-        return self.cls(mean, cov, shape)
-
-    def test_properties(self, dim, shape, mean_shape, cov_shape):
-        dist = self.random(dim, shape, mean_shape, cov_shape)
+        dist = self.cls(mean, cov, shape)
         assert dist.dim == dim
         assert dist.shape == np.broadcast_shapes(shape, mean_shape, cov_shape)
+        assert (dist.mean == mean).all()
+        assert (dist.cov == cov).all()
+        return dist
+
+    def test_defaults(self, dim, shape, mean_shape, cov_shape):
+        dist_0 = self.random(dim, shape, mean_shape, cov_shape)
+
+        # Default arguments
+        dist = self.cls()
+        assert dist.shape == ()
+        assert dist.dim == 1
+        assert (dist.mean == np.zeros(1)).all()
+        assert (dist.cov == np.eye(1)).all()
+
+        dist = self.cls(dim=dim)
+        assert dist.shape == ()
+        assert dist.dim == dim
+        assert (dist.mean == np.zeros(dim)).all()
+        assert (dist.cov == np.eye(dim)).all()
+
+        dist = self.cls(shape=shape)
+        assert dist.shape == shape
+        assert dist.dim == 1
+        assert (dist.mean == np.zeros(1)).all()
+        assert (dist.cov == np.eye(1)).all()
+
+        dist = self.cls(shape=shape, dim=dim)
+        assert dist.shape == shape
+        assert dist.dim == dim
+        assert (dist.mean == np.zeros(dim)).all()
+        assert (dist.cov == np.eye(dim)).all()
+
+        # inference from mean or cov
+        dist = self.cls(mean=dist_0.mean)
+        assert dist.shape == dist_0.mean.shape[:-1]
+        assert dist.dim == dim
+        assert (dist.mean == dist_0.mean).all()
+        assert (dist.cov == np.eye(dim)).all()
+
+        dist = self.cls(cov=dist_0.cov)
+        assert dist.shape == dist_0.cov.shape[:-2]
+        assert dist.dim == dim
+        assert (dist.mean == np.zeros(dim)).all()
+        assert (dist.cov == dist_0.cov).all()
+
+        # mean broadcasting
+        mean = np.random.randn()
+        dist = self.cls(mean, shape=shape, dim=dim)
+        assert dist.dim == dim
+        assert dist.shape == shape
+        assert (dist.mean == np.ones(dim) * mean).all()
+        assert (dist.cov == np.eye(dim)).all()
+
+        dist = self.cls(mean, dist_0.cov)
+        assert dist.dim == dim
+        assert dist.shape == dist_0.cov.shape[:-2]
+        assert (dist.mean == np.ones(dim) * mean).all()
+        assert (dist.cov == dist_0.cov).all()
+
+        # cov broadcasting
+        cov = np.random.randn() ** 2
+        dist = self.cls(dist_0.mean, cov)
+        assert dist.dim == dim
+        assert dist.shape == dist_0.mean.shape[:-1]
+        assert (dist.mean == dist_0.mean).all()
+        assert (dist.cov == cov * np.eye(dim)).all()
+
+        cov = np.random.randn(dim) ** 2
+        dist = self.cls(dist_0.mean, cov)
+        assert dist.dim == dim
+        assert dist.shape == dist_0.mean.shape[:-1]
+        assert (dist.mean == dist_0.mean).all()
+        assert (dist.cov == np.diag(cov)).all()
 
     @pytest.mark.parametrize("size", sizes)
     def test_logpdf(self, dim, shape, mean_shape, cov_shape, size):
@@ -129,14 +199,17 @@ def random(self, dim, shape, logA_shape, mean_shape, cov_shape):
         mean = np.random.randn(*mean_shape, dim)
         cov = np.random.randn(*cov_shape, dim, dim)
         cov = np.einsum("...ij,...kj->...ik", cov, cov) + dim * np.eye(dim)
-        return self.cls(logA, mean, cov, shape)
 
-    def test_properties(self, dim, shape, logA_shape, mean_shape, cov_shape):
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape)
+        dist = self.cls(logA, mean, cov, shape)
+
         assert dist.dim == dim
         assert dist.shape == np.broadcast_shapes(
-            shape, mean_shape, cov_shape, logA_shape
+            shape, logA_shape, mean_shape, cov_shape
         )
+        assert (dist.logA == logA).all()
+        assert (dist.mean == mean).all()
+        assert (dist.cov == cov).all()
+        return dist
 
     @pytest.mark.parametrize("size", sizes)
     def test_logpdf(self, dim, shape, logA_shape, mean_shape, cov_shape, size):

From c548fb6c96e3d0a2d373826b027edc6ffeac81b1 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Mon, 11 Dec 2023 18:18:24 +0000
Subject: [PATCH 017/117] Better defaults for b

---
 lsbi/stats_1.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index b191979..7cdd633 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -93,7 +93,7 @@ def rvs(self, size=1):
         L = np.linalg.cholesky(self.cov)
         return self.mean + np.einsum("...jk,...k->...j", L, x)
 
-    def predict(self, A, b=0):
+    def predict(self, A, b=np.zeros(1)):
         """Predict the mean and covariance of a linear transformation.
 
         if:         x ~ N(mu, Sigma)
@@ -281,7 +281,7 @@ def rvs(self, size=1):
         x = np.random.randn(*size, *self.shape[:-1], self.dim)
         return mean + np.einsum("...ij,...j->...i", L, x)
 
-    def predict(self, A, b=0):
+    def predict(self, A, b=np.zeros(1)):
         """Predict the mean and covariance of a linear transformation.
 
         if:         x ~ mixN(mu, Sigma, logA)
@@ -300,10 +300,9 @@ def predict(self, A, b=0):
         -------
         mixture_normal shape (..., k)
         """
-        if b == 0:
-            dist = super().predict(A[..., None, :, :])
-        else:
-            dist = super().predict(A[..., None, :, :], b[..., None, :])
+        A = np.array(A)
+        b = np.array(b)
+        dist = super().predict(A[..., None, :, :], b[..., None, :])
         return mixture_normal(self.logA, dist.mean, dist.cov, dist.shape)
 
     def marginalise(self, indices):

From 6849ad33e0b0869d43d4576d8a056c16a2b4b7b5 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Tue, 12 Dec 2023 01:40:59 +0000
Subject: [PATCH 018/117] Lazy expansion of mean now working

---
 lsbi/stats_1.py       | 151 +++++++++++++++++---------
 tests/test_stats_1.py | 243 +++++++++++++++++++++++-------------------
 2 files changed, 231 insertions(+), 163 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index 7cdd633..30fdfd4 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -1,7 +1,7 @@
 """Extensions to scipy.stats functions."""
 import numpy as np
 import scipy.stats
-from numpy.linalg import inv
+from numpy.linalg import cholesky, inv
 from scipy.special import erf, logsumexp
 
 from lsbi.utils import bisect, choice, logdet
@@ -31,26 +31,31 @@ class multivariate_normal(object):
         inferred by mean and cov shapes
     """
 
-    def __init__(self, mean=0, cov=1, shape=(), dim=1):
-        self.mean = np.atleast_1d(mean)
-        self.cov = np.atleast_1d(cov)
+    def __init__(self, mean=0, cov=1, shape=(), dim=0):
+        self.mean = mean
+        self.cov = cov
         self._shape = shape
-        dim = np.max([dim, self.mean.shape[-1], self.cov.shape[-1]])
-        self.mean = self.mean + np.zeros(dim)
-        if self.cov.ndim < 2:
-            self.cov = self.cov * np.eye(dim)
+        self._dim = dim
 
     @property
     def shape(self):
         """Shape of the distribution."""
         return np.broadcast_shapes(
-            self.mean.shape[:-1], self.cov.shape[:-2], self._shape
+            np.atleast_1d(self.mean).shape[:-1],
+            np.atleast_2d(self.cov).shape[:-2],
+            self._shape,
         )
 
     @property
     def dim(self):
         """Dimension of the distribution."""
-        return self.mean.shape[-1]
+        return np.max(
+            [
+                *np.shape(self.mean)[-1:],
+                *np.shape(self.cov)[-2:],
+                self._dim,
+            ]
+        )
 
     def logpdf(self, x):
         """Log of the probability density function.
@@ -70,9 +75,12 @@ def logpdf(self, x):
         size = x.shape[:-1]
         mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
         dx = x.reshape(*size, *np.ones_like(self.shape), self.dim) - mean
-        invcov = inv(self.cov)
-        chi2 = np.einsum("...j,...jk,...k->...", dx, invcov, dx)
-        norm = -logdet(2 * np.pi * self.cov) / 2
+        if len(np.shape(self.cov)) > 1:
+            chi2 = np.einsum("...j,...jk,...k->...", dx, inv(self.cov), dx)
+            norm = -logdet(2 * np.pi * self.cov) / 2
+        else:
+            chi2 = (dx**2 / self.cov).sum(axis=-1)
+            norm = -np.log(2 * np.pi * np.ones(self.dim) * self.cov).sum() / 2
         return norm - chi2 / 2
 
     def rvs(self, size=1):
@@ -90,10 +98,12 @@ def rvs(self, size=1):
         """
         size = np.atleast_1d(size)
         x = np.random.randn(*size, *self.shape, self.dim)
-        L = np.linalg.cholesky(self.cov)
-        return self.mean + np.einsum("...jk,...k->...j", L, x)
+        if len(np.shape(self.cov)) > 1:
+            return self.mean + np.einsum("...jk,...k->...j", cholesky(self.cov), x)
+        else:
+            return self.mean + np.sqrt(self.cov) * x
 
-    def predict(self, A, b=np.zeros(1)):
+    def predict(self, A, b=0):
         """Predict the mean and covariance of a linear transformation.
 
         if:         x ~ N(mu, Sigma)
@@ -112,9 +122,12 @@ def predict(self, A, b=np.zeros(1)):
         -------
         multivariate_normal shape (..., k)
         """
-        mean = np.einsum("...qn,...n->...q", A, self.mean) + b
-        cov = np.einsum("...qn,...nm,...pm->...qp", A, self.cov, A)
-        return multivariate_normal(mean, cov, self.shape)
+        mean = np.einsum("...qn,...n->...q", A, np.atleast_1d(self.mean)) + b
+        if len(np.shape(self.cov)) > 1:
+            cov = np.einsum("...qn,...nm,...pm->...qp", A, self.cov, A)
+        else:
+            cov = np.einsum("...qn,...pn->...qp", A, A * self.cov)
+        return multivariate_normal(mean, cov, self.shape, A.shape[-2])
 
     def marginalise(self, indices):
         """Marginalise over indices.
@@ -129,9 +142,19 @@ def marginalise(self, indices):
         multivariate_normal shape (*shape, dim - len(indices))
         """
         i = self._bar(indices)
-        mean = self.mean[..., i]
-        cov = self.cov[..., i, :][..., i]
-        return multivariate_normal(mean, cov, self.shape)
+        if len(np.shape(self.mean)) > 0:
+            mean = self.mean[..., i]
+        else:
+            mean = self.mean
+
+        if len(np.shape(self.cov)) > 1:
+            cov = self.cov[..., i, :][..., i]
+        elif len(np.shape(self.cov)) == 1:
+            cov = self.cov[i]
+        else:
+            cov = self.cov
+
+        return multivariate_normal(mean, cov, self.shape, sum(i))
 
     def condition(self, indices, values):
         """Condition on indices with values.
@@ -151,19 +174,34 @@ def condition(self, indices, values):
         """
         i = self._bar(indices)
         k = indices
-        mean = self.mean[..., i] + np.einsum(
-            "...ja,...ab,...b->...j",
-            self.cov[..., i, :][..., :, k],
-            inv(self.cov[..., k, :][..., :, k]),
-            values - self.mean[..., k],
-        )
-        cov = self.cov[..., i, :][..., :, i] - np.einsum(
-            "...ja,...ab,...bk->...jk",
-            self.cov[..., i, :][..., :, k],
-            inv(self.cov[..., k, :][..., :, k]),
-            self.cov[..., k, :][..., :, i],
-        )
-        return multivariate_normal(mean, cov, self.shape)
+
+        if len(np.shape(self.mean)) > 0:
+            mean_i = self.mean[..., i]
+            mean_k = self.mean[..., k]
+        else:
+            mean_i = self.mean
+            mean_k = self.mean
+
+        if len(np.shape(self.cov)) > 1:
+            mean = mean_i + np.einsum(
+                "...ja,...ab,...b->...j",
+                self.cov[..., i, :][..., :, k],
+                inv(self.cov[..., k, :][..., :, k]),
+                values - mean_k,
+            )
+            cov = self.cov[..., i, :][..., :, i] - np.einsum(
+                "...ja,...ab,...bk->...jk",
+                self.cov[..., i, :][..., :, k],
+                inv(self.cov[..., k, :][..., :, k]),
+                self.cov[..., k, :][..., :, i],
+            )
+        else:
+            mean = mean_i
+            if len(np.shape(self.cov)) == 1:
+                cov = self.cov[i]
+            else:
+                cov = self.cov
+        return multivariate_normal(mean, cov, self.shape, sum(i))
 
     def _bar(self, indices):
         """Return the indices not in the given indices."""
@@ -196,16 +234,20 @@ def bijector(self, x, inverse=False):
         transformed x or theta: array_like, shape (..., dim)
         """
         x = np.array(x)
-        L = np.linalg.cholesky(self.cov)
         mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
         if inverse:
-            invL = np.broadcast_to(inv(L), (*self.shape, self.dim, self.dim))
-            y = np.einsum("...jk,...k->...j", invL, x - mean)
+            if len(np.shape(self.cov)) > 1:
+                y = np.einsum("...jk,...k->...j", inv(cholesky(self.cov)), x - mean)
+            else:
+                y = (x - mean) / sqrt(self.cov)
             return scipy.stats.norm.cdf(y)
         else:
-            L = np.broadcast_to(L, (*self.shape, self.dim, self.dim))
             y = scipy.stats.norm.ppf(x)
-            return mean + np.einsum("...jk,...k->...j", L, y)
+            if len(np.shape(self.cov)) > 1:
+                L = cholesky(self.cov)
+                return mean + np.einsum("...jk,...k->...j", L, y)
+            else:
+                return mean + np.sqrt(self.cov) * y
 
 
 class mixture_normal(multivariate_normal):
@@ -225,15 +267,18 @@ class mixture_normal(multivariate_normal):
         Log of the mixing weights.
     """
 
-    def __init__(self, logA, mean, cov, shape=()):
-        self.logA = np.array(logA)
-        super().__init__(mean, cov, shape)
+    def __init__(self, logA=0, mean=0, cov=1, shape=(), dim=0):
+        self.logA = logA
+        super().__init__(mean, cov, shape, dim)
 
     @property
     def shape(self):
         """Shape of the distribution."""
         return np.broadcast_shapes(
-            self.logA.shape, self.mean.shape[:-1], self.cov.shape[:-2], self._shape
+            np.array(self.logA).shape,
+            np.atleast_1d(self.mean).shape[:-1],
+            np.atleast_2d(self.cov).shape[:-2],
+            self._shape,
         )
 
     def logpdf(self, x):
@@ -267,13 +312,14 @@ def rvs(self, size=1):
         -------
         rvs : array_like, shape (*size, *shape[:-1], dim)
         """
+        # TODO Fix this for self.cov and self.logA
         if self.shape == ():
             return super().rvs(size)
         size = np.atleast_1d(np.array(size, dtype=int))
         p = np.exp(self.logA - logsumexp(self.logA, axis=-1)[..., None])
         p = np.broadcast_to(p, self.shape)
         i = choice(size, p)
-        L = np.linalg.cholesky(self.cov)
+        L = cholesky(self.cov)
         L = np.broadcast_to(L, (*self.shape, self.dim, self.dim))
         L = np.choose(i[..., None, None], np.moveaxis(L, -3, 0))
         mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
@@ -281,7 +327,7 @@ def rvs(self, size=1):
         x = np.random.randn(*size, *self.shape[:-1], self.dim)
         return mean + np.einsum("...ij,...j->...i", L, x)
 
-    def predict(self, A, b=np.zeros(1)):
+    def predict(self, A, b=0):
         """Predict the mean and covariance of a linear transformation.
 
         if:         x ~ mixN(mu, Sigma, logA)
@@ -300,10 +346,11 @@ def predict(self, A, b=np.zeros(1)):
         -------
         mixture_normal shape (..., k)
         """
-        A = np.array(A)
-        b = np.array(b)
-        dist = super().predict(A[..., None, :, :], b[..., None, :])
-        return mixture_normal(self.logA, dist.mean, dist.cov, dist.shape)
+        dist = super().predict(
+            np.expand_dims(np.atleast_2d(A), axis=-3),
+            np.expand_dims(np.atleast_1d(b), axis=-2),
+        )
+        return mixture_normal(self.logA, dist.mean, dist.cov, dist.shape, dist.dim)
 
     def marginalise(self, indices):
         """Marginalise over indices.
@@ -318,7 +365,7 @@ def marginalise(self, indices):
         mixture_normal shape (*shape, dim - len(indices))
         """
         dist = super().marginalise(indices)
-        return mixture_normal(self.logA, dist.mean, dist.cov, self.shape)
+        return mixture_normal(self.logA, dist.mean, dist.cov, self.shape, dist.dim)
 
     def condition(self, indices, values):
         """Condition on indices with values.
@@ -342,7 +389,7 @@ def condition(self, indices, values):
         logA = super(marginal.__class__, marginal).logpdf(np.zeros(marginal.dim))
         logA -= logsumexp(logA, axis=-1)[..., None]
         logA += self.logA
-        return mixture_normal(logA, dist.mean, dist.cov, dist.shape)
+        return mixture_normal(logA, dist.mean, dist.cov, dist.shape, dist.dim)
 
     def bijector(self, x, inverse=False):
         """Bijector between U([0, 1])^d and the distribution.
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index c91a9e0..6f06d27 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -10,91 +10,102 @@
 
 @pytest.mark.parametrize("dim", dims)
 @pytest.mark.parametrize("shape", shapes)
-@pytest.mark.parametrize("mean_shape", shapes)
-@pytest.mark.parametrize("cov_shape", shapes)
+@pytest.mark.parametrize("mean_shape", shapes + ["scalar_mean"])
+@pytest.mark.parametrize("cov_shape", shapes + ["scalar_cov", "vector_cov"])
 class TestMultivariateNormal(object):
     cls = multivariate_normal
 
     def random(self, dim, shape, mean_shape, cov_shape):
-        mean = np.random.randn(*mean_shape, dim)
-        cov = np.random.randn(*cov_shape, dim, dim)
-        cov = np.einsum("...ij,...kj->...ik", cov, cov) + dim * np.eye(dim)
+        if mean_shape is "scalar_mean":
+            mean = np.random.randn()
+            mean_shape = shape
+        else:
+            mean = np.random.randn(*mean_shape, dim)
+        if cov_shape is "scalar_cov":
+            cov = np.random.randn() ** 2
+            cov_shape = shape
+        elif cov_shape is "vector_cov":
+            cov = np.random.randn(dim) ** 2
+            cov_shape = shape
+        else:
+            cov = np.random.randn(*cov_shape, dim, dim)
+            cov = np.einsum("...ij,...kj->...ik", cov, cov) + dim * np.eye(dim)
         dist = self.cls(mean, cov, shape)
         assert dist.dim == dim
         assert dist.shape == np.broadcast_shapes(shape, mean_shape, cov_shape)
-        assert (dist.mean == mean).all()
-        assert (dist.cov == cov).all()
+        assert np.all(dist.mean == mean)
+        assert np.all(dist.cov == cov)
         return dist
 
-    def test_defaults(self, dim, shape, mean_shape, cov_shape):
-        dist_0 = self.random(dim, shape, mean_shape, cov_shape)
-
-        # Default arguments
-        dist = self.cls()
-        assert dist.shape == ()
-        assert dist.dim == 1
-        assert (dist.mean == np.zeros(1)).all()
-        assert (dist.cov == np.eye(1)).all()
-
-        dist = self.cls(dim=dim)
-        assert dist.shape == ()
-        assert dist.dim == dim
-        assert (dist.mean == np.zeros(dim)).all()
-        assert (dist.cov == np.eye(dim)).all()
-
-        dist = self.cls(shape=shape)
-        assert dist.shape == shape
-        assert dist.dim == 1
-        assert (dist.mean == np.zeros(1)).all()
-        assert (dist.cov == np.eye(1)).all()
-
-        dist = self.cls(shape=shape, dim=dim)
-        assert dist.shape == shape
-        assert dist.dim == dim
-        assert (dist.mean == np.zeros(dim)).all()
-        assert (dist.cov == np.eye(dim)).all()
-
-        # inference from mean or cov
-        dist = self.cls(mean=dist_0.mean)
-        assert dist.shape == dist_0.mean.shape[:-1]
-        assert dist.dim == dim
-        assert (dist.mean == dist_0.mean).all()
-        assert (dist.cov == np.eye(dim)).all()
-
-        dist = self.cls(cov=dist_0.cov)
-        assert dist.shape == dist_0.cov.shape[:-2]
-        assert dist.dim == dim
-        assert (dist.mean == np.zeros(dim)).all()
-        assert (dist.cov == dist_0.cov).all()
-
-        # mean broadcasting
-        mean = np.random.randn()
-        dist = self.cls(mean, shape=shape, dim=dim)
-        assert dist.dim == dim
-        assert dist.shape == shape
-        assert (dist.mean == np.ones(dim) * mean).all()
-        assert (dist.cov == np.eye(dim)).all()
-
-        dist = self.cls(mean, dist_0.cov)
-        assert dist.dim == dim
-        assert dist.shape == dist_0.cov.shape[:-2]
-        assert (dist.mean == np.ones(dim) * mean).all()
-        assert (dist.cov == dist_0.cov).all()
-
-        # cov broadcasting
-        cov = np.random.randn() ** 2
-        dist = self.cls(dist_0.mean, cov)
-        assert dist.dim == dim
-        assert dist.shape == dist_0.mean.shape[:-1]
-        assert (dist.mean == dist_0.mean).all()
-        assert (dist.cov == cov * np.eye(dim)).all()
-
-        cov = np.random.randn(dim) ** 2
-        dist = self.cls(dist_0.mean, cov)
-        assert dist.dim == dim
-        assert dist.shape == dist_0.mean.shape[:-1]
-        assert (dist.mean == dist_0.mean).all()
-        assert (dist.cov == np.diag(cov)).all()
+    #    def test_defaults(self, dim, shape, mean_shape, cov_shape):
+    #        dist_0 = self.random(dim, shape, mean_shape, cov_shape)
+    #
+    #        # Default arguments
+    #        dist = self.cls()
+    #        assert dist.shape == ()
+    #        assert dist.dim == 1
+    #        assert (dist.mean == np.zeros(1)).all()
+    #        assert (dist.cov == np.eye(1)).all()
+    #
+    #        dist = self.cls(dim=dim)
+    #        assert dist.shape == ()
+    #        assert dist.dim == dim
+    #        assert (dist.mean == np.zeros(dim)).all()
+    #        assert (dist.cov == np.eye(dim)).all()
+    #
+    #        dist = self.cls(shape=shape)
+    #        assert dist.shape == shape
+    #        assert dist.dim == 1
+    #        assert (dist.mean == np.zeros(1)).all()
+    #        assert (dist.cov == np.eye(1)).all()
+    #
+    #        dist = self.cls(shape=shape, dim=dim)
+    #        assert dist.shape == shape
+    #        assert dist.dim == dim
+    #        assert (dist.mean == np.zeros(dim)).all()
+    #        assert (dist.cov == np.eye(dim)).all()
+    #
+    #        # inference from mean or cov
+    #        dist = self.cls(mean=dist_0.mean)
+    #        assert dist.shape == dist_0.mean.shape[:-1]
+    #        assert dist.dim == dim
+    #        assert (dist.mean == dist_0.mean).all()
+    #        assert (dist.cov == np.eye(dim)).all()
+    #
+    #        dist = self.cls(cov=dist_0.cov)
+    #        assert dist.shape == dist_0.cov.shape[:-2]
+    #        assert dist.dim == dim
+    #        assert (dist.mean == np.zeros(dim)).all()
+    #        assert (dist.cov == dist_0.cov).all()
+    #
+    #        # mean broadcasting
+    #        mean = np.random.randn()
+    #        dist = self.cls(mean, shape=shape, dim=dim)
+    #        assert dist.dim == dim
+    #        assert dist.shape == shape
+    #        assert (dist.mean == np.ones(dim) * mean).all()
+    #        assert (dist.cov == np.eye(dim)).all()
+    #
+    #        dist = self.cls(mean, dist_0.cov)
+    #        assert dist.dim == dim
+    #        assert dist.shape == dist_0.cov.shape[:-2]
+    #        assert (dist.mean == np.ones(dim) * mean).all()
+    #        assert (dist.cov == dist_0.cov).all()
+    #
+    #        # cov broadcasting
+    #        cov = np.random.randn() ** 2
+    #        dist = self.cls(dist_0.mean, cov)
+    #        assert dist.dim == dim
+    #        assert dist.shape == dist_0.mean.shape[:-1]
+    #        assert (dist.mean == dist_0.mean).all()
+    #        assert (dist.cov == cov * np.eye(dim)).all()
+    #
+    #        cov = np.random.randn(dim) ** 2
+    #        dist = self.cls(dist_0.mean, cov)
+    #        assert dist.dim == dim
+    #        assert dist.shape == dist_0.mean.shape[:-1]
+    #        assert (dist.mean == dist_0.mean).all()
+    #        assert (dist.cov == np.diag(cov)).all()
 
     @pytest.mark.parametrize("size", sizes)
     def test_logpdf(self, dim, shape, mean_shape, cov_shape, size):
@@ -122,22 +133,22 @@ def test_predict(self, dim, shape, mean_shape, cov_shape, k, A_shape, b_shape):
         assert dist_2.shape == np.broadcast_shapes(
             dist.shape, A.shape[:-2], b.shape[:-1]
         )
-        assert dist_2.cov.shape[:-2] == np.broadcast_shapes(
-            dist.cov.shape[:-2], A.shape[:-2]
+        assert np.shape(dist_2.cov)[:-2] == np.broadcast_shapes(
+            np.shape(dist.cov)[:-2], A.shape[:-2]
         )
-        assert dist_2.mean.shape[:-1] == np.broadcast_shapes(
-            dist.mean.shape[:-1], A.shape[:-2], b.shape[:-1]
+        assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
+            np.shape(dist.mean)[:-1], A.shape[:-2], b.shape[:-1]
         )
         assert dist_2.dim == k
 
         dist_2 = dist.predict(A)
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == np.broadcast_shapes(dist.shape, A.shape[:-2])
-        assert dist_2.cov.shape[:-2] == np.broadcast_shapes(
-            dist.cov.shape[:-2], A.shape[:-2]
+        assert np.shape(dist_2.cov)[:-2] == np.broadcast_shapes(
+            np.shape(dist.cov)[:-2], A.shape[:-2]
         )
-        assert dist_2.mean.shape[:-1] == np.broadcast_shapes(
-            dist.mean.shape[:-1], A.shape[:-2]
+        assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
+            np.shape(dist.mean)[:-1], A.shape[:-2]
         )
         assert dist_2.dim == k
 
@@ -151,8 +162,8 @@ def test_marginalise(self, dim, shape, mean_shape, cov_shape, p):
 
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == dist.shape
-        assert dist_2.cov.shape[:-2] == dist.cov.shape[:-2]
-        assert dist_2.mean.shape[:-1] == dist.mean.shape[:-1]
+        assert np.shape(dist_2.cov)[:-2] == np.shape(dist.cov)[:-2]
+        assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
         assert dist_2.dim == dim - p
 
     @pytest.mark.parametrize("values_shape", shapes)
@@ -167,9 +178,9 @@ def test_condition(self, dim, shape, mean_shape, cov_shape, p, values_shape):
 
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == np.broadcast_shapes(dist.shape, values_shape)
-        assert dist_2.cov.shape[:-2] == dist.cov.shape[:-2]
-        assert dist_2.mean.shape[:-1] == np.broadcast_shapes(
-            dist.mean.shape[:-1], dist.cov.shape[:-2], values_shape
+        assert np.shape(dist_2.cov)[:-2] == np.shape(dist.cov)[:-2]
+        assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
+            np.shape(dist.mean)[:-1], np.shape(dist.cov)[:-2], values_shape
         )
         assert dist_2.dim == dim - p
 
@@ -188,17 +199,27 @@ def test_bijector(self, dim, shape, mean_shape, cov_shape, x_shape):
 
 @pytest.mark.parametrize("shape", shapes)
 @pytest.mark.parametrize("logA_shape", shapes)
-@pytest.mark.parametrize("mean_shape", shapes)
-@pytest.mark.parametrize("cov_shape", shapes)
+@pytest.mark.parametrize("mean_shape", shapes + ["scalar_mean"])
+@pytest.mark.parametrize("cov_shape", shapes + ["scalar_cov", "vector_cov"])
 @pytest.mark.parametrize("dim", dims)
 class TestMixtureNormal(object):
     cls = mixture_normal
 
     def random(self, dim, shape, logA_shape, mean_shape, cov_shape):
         logA = np.random.randn(*logA_shape)
-        mean = np.random.randn(*mean_shape, dim)
-        cov = np.random.randn(*cov_shape, dim, dim)
-        cov = np.einsum("...ij,...kj->...ik", cov, cov) + dim * np.eye(dim)
+        if mean_shape is None:
+            mean = np.random.randn()
+        else:
+            mean = np.random.randn(*mean_shape, dim)
+        if cov_shape is "scalar_cov":
+            cov = np.random.randn() ** 2
+            cov_shape = shape
+        elif cov_shape is "vector_cov":
+            cov = np.random.randn(dim) ** 2
+            cov_shape = shape
+        else:
+            cov = np.random.randn(*cov_shape, dim, dim)
+            cov = np.einsum("...ij,...kj->...ik", cov, cov) + dim * np.eye(dim)
 
         dist = self.cls(logA, mean, cov, shape)
 
@@ -206,9 +227,9 @@ def random(self, dim, shape, logA_shape, mean_shape, cov_shape):
         assert dist.shape == np.broadcast_shapes(
             shape, logA_shape, mean_shape, cov_shape
         )
-        assert (dist.logA == logA).all()
-        assert (dist.mean == mean).all()
-        assert (dist.cov == cov).all()
+        assert np.all(dist.logA == logA)
+        assert np.all(dist.mean == mean)
+        assert np.all(dist.cov == cov)
         return dist
 
     @pytest.mark.parametrize("size", sizes)
@@ -239,22 +260,22 @@ def test_predict(
         assert dist_2.shape == np.broadcast_shapes(
             dist.shape, A.shape[:-2] + (1,), b.shape[:-1] + (1,)
         )
-        assert dist_2.cov.shape[:-2] == np.broadcast_shapes(
-            dist.cov.shape[:-2], A.shape[:-2] + (1,)
+        assert np.shape(dist_2.cov)[:-2] == np.broadcast_shapes(
+            np.shape(dist.cov)[:-2], A.shape[:-2] + (1,)
         )
-        assert dist_2.mean.shape[:-1] == np.broadcast_shapes(
-            dist.mean.shape[:-1], A.shape[:-2] + (1,), b.shape[:-1] + (1,)
+        assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
+            np.shape(dist.mean)[:-1], A.shape[:-2] + (1,), b.shape[:-1] + (1,)
         )
         assert dist_2.dim == k
 
         dist_2 = dist.predict(A)
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == np.broadcast_shapes(dist.shape, A.shape[:-2] + (1,))
-        assert dist_2.cov.shape[:-2] == np.broadcast_shapes(
-            dist.cov.shape[:-2], A.shape[:-2] + (1,)
+        assert np.shape(dist_2.cov)[:-2] == np.broadcast_shapes(
+            np.shape(dist.cov)[:-2], A.shape[:-2] + (1,)
         )
-        assert dist_2.mean.shape[:-1] == np.broadcast_shapes(
-            dist.mean.shape[:-1], A.shape[:-2] + (1,)
+        assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
+            np.shape(dist.mean)[:-1], A.shape[:-2] + (1,)
         )
         assert dist_2.dim == k
 
@@ -268,9 +289,9 @@ def test_marginalise(self, dim, shape, logA_shape, mean_shape, cov_shape, p):
 
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == dist.shape
-        assert dist_2.cov.shape[:-2] == dist.cov.shape[:-2]
-        assert dist_2.mean.shape[:-1] == dist.mean.shape[:-1]
-        assert dist_2.logA.shape == dist.logA.shape
+        assert np.shape(dist_2.cov)[:-2] == np.shape(dist.cov)[:-2]
+        assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
+        assert np.shape(dist_2.logA) == np.shape(dist.logA)
         assert dist_2.dim == dim - p
 
     @pytest.mark.parametrize("values_shape", shapes)
@@ -287,9 +308,9 @@ def test_condition(
 
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == np.broadcast_shapes(dist.shape, values_shape[:-1] + (1,))
-        assert dist_2.cov.shape[:-2] == dist.cov.shape[:-2]
-        assert dist_2.mean.shape[:-1] == np.broadcast_shapes(
-            dist.mean.shape[:-1], dist.cov.shape[:-2], values_shape[:-1] + (1,)
+        assert np.shape(dist_2.cov)[:-2] == np.shape(dist.cov)[:-2]
+        assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
+            np.shape(dist.mean)[:-1], np.shape(dist.cov)[:-2], values_shape[:-1] + (1,)
         )
         assert dist_2.dim == dim - p
 

From 5636294fa7dbd59747f7109a37c3784d22360f5a Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Tue, 12 Dec 2023 02:16:27 +0000
Subject: [PATCH 019/117] lazy covariance tests now completing

---
 lsbi/stats_1.py       |  26 +++++----
 tests/test_stats_1.py | 127 ++++++++++++------------------------------
 2 files changed, 51 insertions(+), 102 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index 30fdfd4..595b2dd 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -195,13 +195,15 @@ def condition(self, indices, values):
                 inv(self.cov[..., k, :][..., :, k]),
                 self.cov[..., k, :][..., :, i],
             )
+            return multivariate_normal(mean, cov, self.shape, sum(i))
         else:
             mean = mean_i
             if len(np.shape(self.cov)) == 1:
                 cov = self.cov[i]
             else:
                 cov = self.cov
-        return multivariate_normal(mean, cov, self.shape, sum(i))
+            shape = np.broadcast_shapes(self.shape, values.shape[:-1])
+            return multivariate_normal(mean, cov, shape, sum(i))
 
     def _bar(self, indices):
         """Return the indices not in the given indices."""
@@ -239,7 +241,7 @@ def bijector(self, x, inverse=False):
             if len(np.shape(self.cov)) > 1:
                 y = np.einsum("...jk,...k->...j", inv(cholesky(self.cov)), x - mean)
             else:
-                y = (x - mean) / sqrt(self.cov)
+                y = (x - mean) / np.sqrt(self.cov)
             return scipy.stats.norm.cdf(y)
         else:
             y = scipy.stats.norm.ppf(x)
@@ -312,20 +314,24 @@ def rvs(self, size=1):
         -------
         rvs : array_like, shape (*size, *shape[:-1], dim)
         """
-        # TODO Fix this for self.cov and self.logA
         if self.shape == ():
             return super().rvs(size)
         size = np.atleast_1d(np.array(size, dtype=int))
         p = np.exp(self.logA - logsumexp(self.logA, axis=-1)[..., None])
         p = np.broadcast_to(p, self.shape)
         i = choice(size, p)
-        L = cholesky(self.cov)
-        L = np.broadcast_to(L, (*self.shape, self.dim, self.dim))
-        L = np.choose(i[..., None, None], np.moveaxis(L, -3, 0))
-        mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
-        mean = np.choose(i[..., None], np.moveaxis(mean, -2, 0))
-        x = np.random.randn(*size, *self.shape[:-1], self.dim)
-        return mean + np.einsum("...ij,...j->...i", L, x)
+        if len(np.shape(self.cov)) > 1:
+            L = cholesky(self.cov)
+            L = np.broadcast_to(L, (*self.shape, self.dim, self.dim))
+            L = np.choose(i[..., None, None], np.moveaxis(L, -3, 0))
+            mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
+            mean = np.choose(i[..., None], np.moveaxis(mean, -2, 0))
+            x = np.random.randn(*size, *self.shape[:-1], self.dim)
+            return mean + np.einsum("...ij,...j->...i", L, x)
+        else:
+            # TODO Fix this for self.cov and self.logA
+            x = np.random.randn(*size, *self.shape[:-1], self.dim)
+            return mean + np.sqrt(self.cov) * x
 
     def predict(self, A, b=0):
         """Predict the mean and covariance of a linear transformation.
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index 6f06d27..257c64e 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -16,97 +16,28 @@ class TestMultivariateNormal(object):
     cls = multivariate_normal
 
     def random(self, dim, shape, mean_shape, cov_shape):
-        if mean_shape is "scalar_mean":
+        if mean_shape == "scalar_mean":
             mean = np.random.randn()
             mean_shape = shape
         else:
             mean = np.random.randn(*mean_shape, dim)
-        if cov_shape is "scalar_cov":
+        if cov_shape == "scalar_cov":
             cov = np.random.randn() ** 2
             cov_shape = shape
-        elif cov_shape is "vector_cov":
+        elif cov_shape == "vector_cov":
             cov = np.random.randn(dim) ** 2
             cov_shape = shape
         else:
             cov = np.random.randn(*cov_shape, dim, dim)
             cov = np.einsum("...ij,...kj->...ik", cov, cov) + dim * np.eye(dim)
-        dist = self.cls(mean, cov, shape)
+        dist = self.cls(mean, cov, shape, dim)
+
         assert dist.dim == dim
         assert dist.shape == np.broadcast_shapes(shape, mean_shape, cov_shape)
         assert np.all(dist.mean == mean)
         assert np.all(dist.cov == cov)
         return dist
 
-    #    def test_defaults(self, dim, shape, mean_shape, cov_shape):
-    #        dist_0 = self.random(dim, shape, mean_shape, cov_shape)
-    #
-    #        # Default arguments
-    #        dist = self.cls()
-    #        assert dist.shape == ()
-    #        assert dist.dim == 1
-    #        assert (dist.mean == np.zeros(1)).all()
-    #        assert (dist.cov == np.eye(1)).all()
-    #
-    #        dist = self.cls(dim=dim)
-    #        assert dist.shape == ()
-    #        assert dist.dim == dim
-    #        assert (dist.mean == np.zeros(dim)).all()
-    #        assert (dist.cov == np.eye(dim)).all()
-    #
-    #        dist = self.cls(shape=shape)
-    #        assert dist.shape == shape
-    #        assert dist.dim == 1
-    #        assert (dist.mean == np.zeros(1)).all()
-    #        assert (dist.cov == np.eye(1)).all()
-    #
-    #        dist = self.cls(shape=shape, dim=dim)
-    #        assert dist.shape == shape
-    #        assert dist.dim == dim
-    #        assert (dist.mean == np.zeros(dim)).all()
-    #        assert (dist.cov == np.eye(dim)).all()
-    #
-    #        # inference from mean or cov
-    #        dist = self.cls(mean=dist_0.mean)
-    #        assert dist.shape == dist_0.mean.shape[:-1]
-    #        assert dist.dim == dim
-    #        assert (dist.mean == dist_0.mean).all()
-    #        assert (dist.cov == np.eye(dim)).all()
-    #
-    #        dist = self.cls(cov=dist_0.cov)
-    #        assert dist.shape == dist_0.cov.shape[:-2]
-    #        assert dist.dim == dim
-    #        assert (dist.mean == np.zeros(dim)).all()
-    #        assert (dist.cov == dist_0.cov).all()
-    #
-    #        # mean broadcasting
-    #        mean = np.random.randn()
-    #        dist = self.cls(mean, shape=shape, dim=dim)
-    #        assert dist.dim == dim
-    #        assert dist.shape == shape
-    #        assert (dist.mean == np.ones(dim) * mean).all()
-    #        assert (dist.cov == np.eye(dim)).all()
-    #
-    #        dist = self.cls(mean, dist_0.cov)
-    #        assert dist.dim == dim
-    #        assert dist.shape == dist_0.cov.shape[:-2]
-    #        assert (dist.mean == np.ones(dim) * mean).all()
-    #        assert (dist.cov == dist_0.cov).all()
-    #
-    #        # cov broadcasting
-    #        cov = np.random.randn() ** 2
-    #        dist = self.cls(dist_0.mean, cov)
-    #        assert dist.dim == dim
-    #        assert dist.shape == dist_0.mean.shape[:-1]
-    #        assert (dist.mean == dist_0.mean).all()
-    #        assert (dist.cov == cov * np.eye(dim)).all()
-    #
-    #        cov = np.random.randn(dim) ** 2
-    #        dist = self.cls(dist_0.mean, cov)
-    #        assert dist.dim == dim
-    #        assert dist.shape == dist_0.mean.shape[:-1]
-    #        assert (dist.mean == dist_0.mean).all()
-    #        assert (dist.cov == np.diag(cov)).all()
-
     @pytest.mark.parametrize("size", sizes)
     def test_logpdf(self, dim, shape, mean_shape, cov_shape, size):
         dist = self.random(dim, shape, mean_shape, cov_shape)
@@ -154,8 +85,8 @@ def test_predict(self, dim, shape, mean_shape, cov_shape, k, A_shape, b_shape):
 
     @pytest.mark.parametrize("p", dims)
     def test_marginalise(self, dim, shape, mean_shape, cov_shape, p):
-        if dim <= p:
-            pytest.skip("dim <= p")
+        if dim < p:
+            pytest.skip("dim < p")
         i = np.random.choice(dim, p, replace=False)
         dist = self.random(dim, shape, mean_shape, cov_shape)
         dist_2 = dist.marginalise(i)
@@ -169,8 +100,9 @@ def test_marginalise(self, dim, shape, mean_shape, cov_shape, p):
     @pytest.mark.parametrize("values_shape", shapes)
     @pytest.mark.parametrize("p", dims)
     def test_condition(self, dim, shape, mean_shape, cov_shape, p, values_shape):
-        if dim <= p:
-            pytest.skip("dim <= p")
+        if dim < p:
+            pytest.skip("dim < p")
+
         indices = np.random.choice(dim, p, replace=False)
         values = np.random.randn(*values_shape, p)
         dist = self.random(dim, shape, mean_shape, cov_shape)
@@ -179,9 +111,12 @@ def test_condition(self, dim, shape, mean_shape, cov_shape, p, values_shape):
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == np.broadcast_shapes(dist.shape, values_shape)
         assert np.shape(dist_2.cov)[:-2] == np.shape(dist.cov)[:-2]
-        assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
-            np.shape(dist.mean)[:-1], np.shape(dist.cov)[:-2], values_shape
-        )
+        if cov_shape == "scalar_cov" or cov_shape == "vector_cov":
+            assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
+        else:
+            assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
+                np.shape(dist.mean)[:-1], np.shape(dist.cov)[:-2], values_shape
+            )
         assert dist_2.dim == dim - p
 
     @pytest.mark.parametrize("x_shape", shapes)
@@ -207,21 +142,23 @@ class TestMixtureNormal(object):
 
     def random(self, dim, shape, logA_shape, mean_shape, cov_shape):
         logA = np.random.randn(*logA_shape)
-        if mean_shape is None:
+        if mean_shape == "scalar_mean":
             mean = np.random.randn()
+            mean_shape = shape
         else:
             mean = np.random.randn(*mean_shape, dim)
-        if cov_shape is "scalar_cov":
+
+        if cov_shape == "scalar_cov":
             cov = np.random.randn() ** 2
             cov_shape = shape
-        elif cov_shape is "vector_cov":
+        elif cov_shape == "vector_cov":
             cov = np.random.randn(dim) ** 2
             cov_shape = shape
         else:
             cov = np.random.randn(*cov_shape, dim, dim)
             cov = np.einsum("...ij,...kj->...ik", cov, cov) + dim * np.eye(dim)
 
-        dist = self.cls(logA, mean, cov, shape)
+        dist = self.cls(logA, mean, cov, shape, dim)
 
         assert dist.dim == dim
         assert dist.shape == np.broadcast_shapes(
@@ -281,8 +218,8 @@ def test_predict(
 
     @pytest.mark.parametrize("p", dims)
     def test_marginalise(self, dim, shape, logA_shape, mean_shape, cov_shape, p):
-        if dim <= p:
-            pytest.skip("dim <= p")
+        if dim < p:
+            pytest.skip("dim < p")
         i = np.random.choice(dim, p, replace=False)
         dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape)
         dist_2 = dist.marginalise(i)
@@ -299,8 +236,8 @@ def test_marginalise(self, dim, shape, logA_shape, mean_shape, cov_shape, p):
     def test_condition(
         self, dim, shape, logA_shape, mean_shape, cov_shape, p, values_shape
     ):
-        if dim <= p:
-            pytest.skip("dim <= p")
+        if dim < p:
+            pytest.skip("dim < p")
         indices = np.random.choice(dim, p, replace=False)
         values = np.random.randn(*values_shape[:-1], p)
         dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape)
@@ -309,9 +246,15 @@ def test_condition(
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == np.broadcast_shapes(dist.shape, values_shape[:-1] + (1,))
         assert np.shape(dist_2.cov)[:-2] == np.shape(dist.cov)[:-2]
-        assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
-            np.shape(dist.mean)[:-1], np.shape(dist.cov)[:-2], values_shape[:-1] + (1,)
-        )
+        if cov_shape == "scalar_cov" or cov_shape == "vector_cov":
+            assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
+        else:
+            assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
+                np.shape(dist.mean)[:-1],
+                np.shape(dist.cov)[:-2],
+                values_shape[:-1] + (1,),
+            )
+        assert np.shape(dist_2.logA) == dist_2.shape
         assert dist_2.dim == dim - p
 
     @pytest.mark.parametrize("x_shape", shapes)

From a7e4304c24b1edd5986887563b829ba87a52c5c6 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Tue, 12 Dec 2023 02:26:33 +0000
Subject: [PATCH 020/117] Fixed rvs

---
 lsbi/stats_1.py       | 14 +++++++-------
 tests/test_stats_1.py | 32 +++++++++++++++++++-------------
 2 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index 595b2dd..68110df 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -320,17 +320,17 @@ def rvs(self, size=1):
         p = np.exp(self.logA - logsumexp(self.logA, axis=-1)[..., None])
         p = np.broadcast_to(p, self.shape)
         i = choice(size, p)
+        mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
+        mean = np.choose(i[..., None], np.moveaxis(mean, -2, 0))
+        x = np.random.randn(*size, *self.shape[:-1], self.dim)
         if len(np.shape(self.cov)) > 1:
             L = cholesky(self.cov)
             L = np.broadcast_to(L, (*self.shape, self.dim, self.dim))
             L = np.choose(i[..., None, None], np.moveaxis(L, -3, 0))
-            mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
-            mean = np.choose(i[..., None], np.moveaxis(mean, -2, 0))
-            x = np.random.randn(*size, *self.shape[:-1], self.dim)
             return mean + np.einsum("...ij,...j->...i", L, x)
+        elif len(np.shape(self.cov)) == 1:
+            return mean + np.choose(i[..., None], np.sqrt(self.cov)) * x
         else:
-            # TODO Fix this for self.cov and self.logA
-            x = np.random.randn(*size, *self.shape[:-1], self.dim)
             return mean + np.sqrt(self.cov) * x
 
     def predict(self, A, b=0):
@@ -432,8 +432,8 @@ def bijector(self, x, inverse=False):
             dist = self.marginalise(np.s_[i + 1 :]).condition(
                 np.s_[:-1], theta[..., :i]
             )
-            m = dist.mean[..., 0]
-            c = dist.cov[..., 0, 0]
+            m = np.atleast_1d(dist.mean)[..., 0]
+            c = np.atleast_2d(dist.cov)[..., 0, 0]
             A = np.exp(dist.logA - logsumexp(dist.logA, axis=-1)[..., None])
             m = np.broadcast_to(m, dist.shape)
 
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index 257c64e..d4af76d 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -5,26 +5,26 @@
 
 shapes = [(2, 3, 4), (3, 4), (4,), ()]
 sizes = [(8, 7, 6), (7, 6), (6,), ()]
-dims = [1, 2, 5]
+dims = [0, 1, 2, 5]
 
 
 @pytest.mark.parametrize("dim", dims)
 @pytest.mark.parametrize("shape", shapes)
-@pytest.mark.parametrize("mean_shape", shapes + ["scalar_mean"])
-@pytest.mark.parametrize("cov_shape", shapes + ["scalar_cov", "vector_cov"])
+@pytest.mark.parametrize("mean_shape", shapes + ["scalar"])
+@pytest.mark.parametrize("cov_shape", shapes + ["scalar", "vector"])
 class TestMultivariateNormal(object):
     cls = multivariate_normal
 
     def random(self, dim, shape, mean_shape, cov_shape):
-        if mean_shape == "scalar_mean":
+        if mean_shape == "scalar":
             mean = np.random.randn()
             mean_shape = shape
         else:
             mean = np.random.randn(*mean_shape, dim)
-        if cov_shape == "scalar_cov":
+        if cov_shape == "scalar":
             cov = np.random.randn() ** 2
             cov_shape = shape
-        elif cov_shape == "vector_cov":
+        elif cov_shape == "vector":
             cov = np.random.randn(dim) ** 2
             cov_shape = shape
         else:
@@ -111,7 +111,7 @@ def test_condition(self, dim, shape, mean_shape, cov_shape, p, values_shape):
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == np.broadcast_shapes(dist.shape, values_shape)
         assert np.shape(dist_2.cov)[:-2] == np.shape(dist.cov)[:-2]
-        if cov_shape == "scalar_cov" or cov_shape == "vector_cov":
+        if cov_shape == "scalar" or cov_shape == "vector":
             assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
         else:
             assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
@@ -134,24 +134,24 @@ def test_bijector(self, dim, shape, mean_shape, cov_shape, x_shape):
 
 @pytest.mark.parametrize("shape", shapes)
 @pytest.mark.parametrize("logA_shape", shapes)
-@pytest.mark.parametrize("mean_shape", shapes + ["scalar_mean"])
-@pytest.mark.parametrize("cov_shape", shapes + ["scalar_cov", "vector_cov"])
+@pytest.mark.parametrize("mean_shape", shapes + ["scalar"])
+@pytest.mark.parametrize("cov_shape", shapes + ["scalar", "vector"])
 @pytest.mark.parametrize("dim", dims)
 class TestMixtureNormal(object):
     cls = mixture_normal
 
     def random(self, dim, shape, logA_shape, mean_shape, cov_shape):
         logA = np.random.randn(*logA_shape)
-        if mean_shape == "scalar_mean":
+        if mean_shape == "scalar":
             mean = np.random.randn()
             mean_shape = shape
         else:
             mean = np.random.randn(*mean_shape, dim)
 
-        if cov_shape == "scalar_cov":
+        if cov_shape == "scalar":
             cov = np.random.randn() ** 2
             cov_shape = shape
-        elif cov_shape == "vector_cov":
+        elif cov_shape == "vector":
             cov = np.random.randn(dim) ** 2
             cov_shape = shape
         else:
@@ -178,6 +178,12 @@ def test_logpdf(self, dim, shape, logA_shape, mean_shape, cov_shape, size):
 
     @pytest.mark.parametrize("size", sizes)
     def test_rvs(self, dim, shape, logA_shape, mean_shape, cov_shape, size):
+        dim = 5
+        shape = (2, 3, 4)
+        logA_shape = (2, 3, 4)
+        mean_shape = (2, 3, 4)
+        cov_shape = "vector"
+        size = (8, 7, 6)
         dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape)
         x = dist.rvs(size)
         assert x.shape == size + dist.shape[:-1] + (dim,)
@@ -246,7 +252,7 @@ def test_condition(
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == np.broadcast_shapes(dist.shape, values_shape[:-1] + (1,))
         assert np.shape(dist_2.cov)[:-2] == np.shape(dist.cov)[:-2]
-        if cov_shape == "scalar_cov" or cov_shape == "vector_cov":
+        if cov_shape == "scalar" or cov_shape == "vector":
             assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
         else:
             assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(

From a080991857db438d904b7ba2750b352e19fb0b90 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Tue, 12 Dec 2023 03:49:17 +0000
Subject: [PATCH 021/117] A and b now lazily evaluating

---
 lsbi/stats_1.py       | 22 ++++++++----
 tests/test_stats_1.py | 79 ++++++++++++++++++++++++++++---------------
 2 files changed, 68 insertions(+), 33 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index 68110df..2f501e9 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -103,7 +103,7 @@ def rvs(self, size=1):
         else:
             return self.mean + np.sqrt(self.cov) * x
 
-    def predict(self, A, b=0):
+    def predict(self, A=1, b=0):
         """Predict the mean and covariance of a linear transformation.
 
         if:         x ~ N(mu, Sigma)
@@ -122,12 +122,22 @@ def predict(self, A, b=0):
         -------
         multivariate_normal shape (..., k)
         """
-        mean = np.einsum("...qn,...n->...q", A, np.atleast_1d(self.mean)) + b
-        if len(np.shape(self.cov)) > 1:
-            cov = np.einsum("...qn,...nm,...pm->...qp", A, self.cov, A)
+        if len(np.shape(A)) > 1:
+            mean = np.einsum("...qn,...n->...q", A, np.atleast_1d(self.mean)) + b
+            if len(np.shape(self.cov)) > 1:
+                cov = np.einsum("...qn,...nm,...pm->...qp", A, self.cov, A)
+            else:
+                cov = np.einsum("...qn,...pn->...qp", A, A * self.cov)
         else:
-            cov = np.einsum("...qn,...pn->...qp", A, A * self.cov)
-        return multivariate_normal(mean, cov, self.shape, A.shape[-2])
+            mean = A * self.mean + b
+            if len(np.shape(A)) == 1:
+                cov = A[:, None] * self.cov * A
+            else:
+                cov = A * self.cov * A
+        dim = np.max([*np.shape(mean)[-1:], *np.shape(cov)[-2:], -1])
+        if dim == -1:
+            dim = self.dim
+        return multivariate_normal(mean, cov, self.shape, dim)
 
     def marginalise(self, indices):
         """Marginalise over indices.
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index d4af76d..d8cf1b8 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -5,7 +5,7 @@
 
 shapes = [(2, 3, 4), (3, 4), (4,), ()]
 sizes = [(8, 7, 6), (7, 6), (6,), ()]
-dims = [0, 1, 2, 5]
+dims = [1, 5]
 
 
 @pytest.mark.parametrize("dim", dims)
@@ -51,35 +51,54 @@ def test_rvs(self, dim, shape, mean_shape, cov_shape, size):
         x = dist.rvs(size)
         assert x.shape == size + dist.shape + (dim,)
 
-    @pytest.mark.parametrize("A_shape", shapes)
-    @pytest.mark.parametrize("b_shape", shapes)
+    @pytest.mark.parametrize("A_shape", shapes + ["vector", "scalar"])
+    @pytest.mark.parametrize("b_shape", shapes + ["scalar"])
     @pytest.mark.parametrize("k", dims)
     def test_predict(self, dim, shape, mean_shape, cov_shape, k, A_shape, b_shape):
+        dim = 2
+        shape = (2, 3, 4)
+        mean_shape = (4,)
+        cov_shape = (2, 3, 4)
+        k = 1
+        A_shape = (2, 3, 4)
+        b_shape = (2, 3, 4)
         dist = self.random(dim, shape, mean_shape, cov_shape)
-        A = np.random.randn(*A_shape, k, dim)
-        b = np.random.randn(*b_shape, k)
+        if A_shape == "scalar":
+            A = np.random.randn()
+            A_shape = shape
+        elif A_shape == "vector":
+            A = np.random.randn(dim)
+            A_shape = shape
+        else:
+            A = np.random.randn(*A_shape, k, dim)
+
+        if b_shape == "scalar":
+            b = np.random.randn()
+            b_shape = shape
+        else:
+            b = np.random.randn(*b_shape, k)
 
         dist_2 = dist.predict(A, b)
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == np.broadcast_shapes(
-            dist.shape, A.shape[:-2], b.shape[:-1]
+            dist.shape, np.shape(A)[:-2], np.shape(b)[:-1]
         )
         assert np.shape(dist_2.cov)[:-2] == np.broadcast_shapes(
-            np.shape(dist.cov)[:-2], A.shape[:-2]
+            np.shape(dist.cov)[:-2], np.shape(A)[:-2]
         )
         assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
-            np.shape(dist.mean)[:-1], A.shape[:-2], b.shape[:-1]
+            np.shape(dist.mean)[:-1], np.shape(A)[:-2], np.shape(b)[:-1]
         )
         assert dist_2.dim == k
 
         dist_2 = dist.predict(A)
         assert isinstance(dist_2, self.cls)
-        assert dist_2.shape == np.broadcast_shapes(dist.shape, A.shape[:-2])
+        assert dist_2.shape == np.broadcast_shapes(dist.shape, np.shape(A)[:-2])
         assert np.shape(dist_2.cov)[:-2] == np.broadcast_shapes(
-            np.shape(dist.cov)[:-2], A.shape[:-2]
+            np.shape(dist.cov)[:-2], np.shape(A)[:-2]
         )
         assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
-            np.shape(dist.mean)[:-1], A.shape[:-2]
+            np.shape(dist.mean)[:-1], np.shape(A)[:-2]
         )
         assert dist_2.dim == k
 
@@ -178,47 +197,53 @@ def test_logpdf(self, dim, shape, logA_shape, mean_shape, cov_shape, size):
 
     @pytest.mark.parametrize("size", sizes)
     def test_rvs(self, dim, shape, logA_shape, mean_shape, cov_shape, size):
-        dim = 5
-        shape = (2, 3, 4)
-        logA_shape = (2, 3, 4)
-        mean_shape = (2, 3, 4)
-        cov_shape = "vector"
-        size = (8, 7, 6)
         dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape)
         x = dist.rvs(size)
         assert x.shape == size + dist.shape[:-1] + (dim,)
 
-    @pytest.mark.parametrize("A_shape", shapes)
-    @pytest.mark.parametrize("b_shape", shapes)
+    @pytest.mark.parametrize("A_shape", shapes + ["vector", "scalar"])
+    @pytest.mark.parametrize("b_shape", shapes + ["scalar"])
     @pytest.mark.parametrize("k", dims)
     def test_predict(
         self, dim, shape, logA_shape, mean_shape, cov_shape, k, A_shape, b_shape
     ):
         dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape)
-        A = np.random.randn(*A_shape[:-1], k, dim)
-        b = np.random.randn(*b_shape[:-1], k)
+        if A_shape == "scalar":
+            A = np.random.randn()
+            A_shape = shape
+        elif A_shape == "vector":
+            A = np.random.randn(dim)
+            A_shape = shape
+        else:
+            A = np.random.randn(*A_shape[:-1], k, dim)
+
+        if b_shape == "scalar":
+            b = np.random.randn()
+            b_shape = shape
+        else:
+            b = np.random.randn(*b_shape[:-1], k)
 
         dist_2 = dist.predict(A, b)
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == np.broadcast_shapes(
-            dist.shape, A.shape[:-2] + (1,), b.shape[:-1] + (1,)
+            dist.shape, np.shape(A)[:-2] + (1,), np.shape(b)[:-1] + (1,)
         )
         assert np.shape(dist_2.cov)[:-2] == np.broadcast_shapes(
-            np.shape(dist.cov)[:-2], A.shape[:-2] + (1,)
+            np.shape(dist.cov)[:-2], np.shape(A)[:-2] + (1,)
         )
         assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
-            np.shape(dist.mean)[:-1], A.shape[:-2] + (1,), b.shape[:-1] + (1,)
+            np.shape(dist.mean)[:-1], np.shape(A)[:-2] + (1,), np.shape(b)[:-1] + (1,)
         )
         assert dist_2.dim == k
 
         dist_2 = dist.predict(A)
         assert isinstance(dist_2, self.cls)
-        assert dist_2.shape == np.broadcast_shapes(dist.shape, A.shape[:-2] + (1,))
+        assert dist_2.shape == np.broadcast_shapes(dist.shape, np.shape(A)[:-2] + (1,))
         assert np.shape(dist_2.cov)[:-2] == np.broadcast_shapes(
-            np.shape(dist.cov)[:-2], A.shape[:-2] + (1,)
+            np.shape(dist.cov)[:-2], np.shape(A)[:-2] + (1,)
         )
         assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
-            np.shape(dist.mean)[:-1], A.shape[:-2] + (1,)
+            np.shape(dist.mean)[:-1], np.shape(A)[:-2] + (1,)
         )
         assert dist_2.dim == k
 

From eaae7d12162d7c4893995dd7b76a4eb05d19e6fd Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Tue, 12 Dec 2023 16:57:37 +0000
Subject: [PATCH 022/117] Lazy A and b now working

---
 lsbi/stats_1.py       | 14 +++++++------
 tests/test_stats_1.py | 47 ++++++++++++++++++++++++-------------------
 2 files changed, 34 insertions(+), 27 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index 2f501e9..3a3a90e 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -134,7 +134,7 @@ def predict(self, A=1, b=0):
                 cov = A[:, None] * self.cov * A
             else:
                 cov = A * self.cov * A
-        dim = np.max([*np.shape(mean)[-1:], *np.shape(cov)[-2:], -1])
+        dim = np.max([*np.shape(A)[-2:-1], *np.shape(b)[-1:], -1])
         if dim == -1:
             dim = self.dim
         return multivariate_normal(mean, cov, self.shape, dim)
@@ -343,7 +343,7 @@ def rvs(self, size=1):
         else:
             return mean + np.sqrt(self.cov) * x
 
-    def predict(self, A, b=0):
+    def predict(self, A=1, b=0):
         """Predict the mean and covariance of a linear transformation.
 
         if:         x ~ mixN(mu, Sigma, logA)
@@ -362,10 +362,12 @@ def predict(self, A, b=0):
         -------
         mixture_normal shape (..., k)
         """
-        dist = super().predict(
-            np.expand_dims(np.atleast_2d(A), axis=-3),
-            np.expand_dims(np.atleast_1d(b), axis=-2),
-        )
+        # TODO this is not what we want
+        if len(np.shape(A)) > 1:
+            A = np.expand_dims(A, axis=-3)
+        if len(np.shape(b)) > 0:
+            b = np.expand_dims(b, axis=-2)
+        dist = super().predict(A, b)
         return mixture_normal(self.logA, dist.mean, dist.cov, dist.shape, dist.dim)
 
     def marginalise(self, indices):
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index d8cf1b8..feadc9b 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -5,7 +5,7 @@
 
 shapes = [(2, 3, 4), (3, 4), (4,), ()]
 sizes = [(8, 7, 6), (7, 6), (6,), ()]
-dims = [1, 5]
+dims = [1, 2, 5]
 
 
 @pytest.mark.parametrize("dim", dims)
@@ -55,14 +55,19 @@ def test_rvs(self, dim, shape, mean_shape, cov_shape, size):
     @pytest.mark.parametrize("b_shape", shapes + ["scalar"])
     @pytest.mark.parametrize("k", dims)
     def test_predict(self, dim, shape, mean_shape, cov_shape, k, A_shape, b_shape):
-        dim = 2
-        shape = (2, 3, 4)
-        mean_shape = (4,)
-        cov_shape = (2, 3, 4)
-        k = 1
-        A_shape = (2, 3, 4)
-        b_shape = (2, 3, 4)
+        if (A_shape == "vector" or A_shape == "scalar") and (
+            b_shape != "scalar" or k != dim
+        ):
+            pytest.skip("Non broadcastable A and b")
+
         dist = self.random(dim, shape, mean_shape, cov_shape)
+
+        if b_shape == "scalar":
+            b = np.random.randn()
+            b_shape = shape
+        else:
+            b = np.random.randn(*b_shape, k)
+
         if A_shape == "scalar":
             A = np.random.randn()
             A_shape = shape
@@ -72,12 +77,6 @@ def test_predict(self, dim, shape, mean_shape, cov_shape, k, A_shape, b_shape):
         else:
             A = np.random.randn(*A_shape, k, dim)
 
-        if b_shape == "scalar":
-            b = np.random.randn()
-            b_shape = shape
-        else:
-            b = np.random.randn(*b_shape, k)
-
         dist_2 = dist.predict(A, b)
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == np.broadcast_shapes(
@@ -151,11 +150,11 @@ def test_bijector(self, dim, shape, mean_shape, cov_shape, x_shape):
         assert x.shape == np.broadcast_shapes(dist.shape + (dim,), x.shape)
 
 
+@pytest.mark.parametrize("dim", dims)
 @pytest.mark.parametrize("shape", shapes)
 @pytest.mark.parametrize("logA_shape", shapes)
 @pytest.mark.parametrize("mean_shape", shapes + ["scalar"])
 @pytest.mark.parametrize("cov_shape", shapes + ["scalar", "vector"])
-@pytest.mark.parametrize("dim", dims)
 class TestMixtureNormal(object):
     cls = mixture_normal
 
@@ -207,7 +206,19 @@ def test_rvs(self, dim, shape, logA_shape, mean_shape, cov_shape, size):
     def test_predict(
         self, dim, shape, logA_shape, mean_shape, cov_shape, k, A_shape, b_shape
     ):
+        if (A_shape == "vector" or A_shape == "scalar") and (
+            b_shape != "scalar" or k != dim
+        ):
+            pytest.skip("Non broadcastable A and b")
+
         dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape)
+
+        if b_shape == "scalar":
+            b = np.random.randn()
+            b_shape = shape
+        else:
+            b = np.random.randn(*b_shape[:-1], k)
+
         if A_shape == "scalar":
             A = np.random.randn()
             A_shape = shape
@@ -217,12 +228,6 @@ def test_predict(
         else:
             A = np.random.randn(*A_shape[:-1], k, dim)
 
-        if b_shape == "scalar":
-            b = np.random.randn()
-            b_shape = shape
-        else:
-            b = np.random.randn(*b_shape[:-1], k)
-
         dist_2 = dist.predict(A, b)
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == np.broadcast_shapes(

From 403e48cb65ba76736c4cad4fad9c5a7253335122 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sun, 17 Dec 2023 23:01:56 +0000
Subject: [PATCH 023/117] Tests passing

---
 lsbi/stats_1.py       |  3 +--
 tests/test_stats_1.py | 49 +++++++++++++++++++++----------------------
 2 files changed, 25 insertions(+), 27 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index 3a3a90e..db7308d 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -338,9 +338,8 @@ def rvs(self, size=1):
             L = np.broadcast_to(L, (*self.shape, self.dim, self.dim))
             L = np.choose(i[..., None, None], np.moveaxis(L, -3, 0))
             return mean + np.einsum("...ij,...j->...i", L, x)
-        elif len(np.shape(self.cov)) == 1:
-            return mean + np.choose(i[..., None], np.sqrt(self.cov)) * x
         else:
+            # Do we want the ability to broadcast scalars over mixtures?
             return mean + np.sqrt(self.cov) * x
 
     def predict(self, A=1, b=0):
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index feadc9b..465a714 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -18,22 +18,21 @@ class TestMultivariateNormal(object):
     def random(self, dim, shape, mean_shape, cov_shape):
         if mean_shape == "scalar":
             mean = np.random.randn()
-            mean_shape = shape
         else:
             mean = np.random.randn(*mean_shape, dim)
         if cov_shape == "scalar":
             cov = np.random.randn() ** 2
-            cov_shape = shape
         elif cov_shape == "vector":
             cov = np.random.randn(dim) ** 2
-            cov_shape = shape
         else:
             cov = np.random.randn(*cov_shape, dim, dim)
             cov = np.einsum("...ij,...kj->...ik", cov, cov) + dim * np.eye(dim)
         dist = self.cls(mean, cov, shape, dim)
 
         assert dist.dim == dim
-        assert dist.shape == np.broadcast_shapes(shape, mean_shape, cov_shape)
+        assert dist.shape == np.broadcast_shapes(
+            shape, np.shape(np.atleast_1d(mean))[:-1], np.shape(np.atleast_2d(cov))[:-2]
+        )
         assert np.all(dist.mean == mean)
         assert np.all(dist.cov == cov)
         return dist
@@ -64,16 +63,13 @@ def test_predict(self, dim, shape, mean_shape, cov_shape, k, A_shape, b_shape):
 
         if b_shape == "scalar":
             b = np.random.randn()
-            b_shape = shape
         else:
             b = np.random.randn(*b_shape, k)
 
         if A_shape == "scalar":
             A = np.random.randn()
-            A_shape = shape
         elif A_shape == "vector":
             A = np.random.randn(dim)
-            A_shape = shape
         else:
             A = np.random.randn(*A_shape, k, dim)
 
@@ -162,16 +158,13 @@ def random(self, dim, shape, logA_shape, mean_shape, cov_shape):
         logA = np.random.randn(*logA_shape)
         if mean_shape == "scalar":
             mean = np.random.randn()
-            mean_shape = shape
         else:
             mean = np.random.randn(*mean_shape, dim)
 
         if cov_shape == "scalar":
             cov = np.random.randn() ** 2
-            cov_shape = shape
         elif cov_shape == "vector":
             cov = np.random.randn(dim) ** 2
-            cov_shape = shape
         else:
             cov = np.random.randn(*cov_shape, dim, dim)
             cov = np.einsum("...ij,...kj->...ik", cov, cov) + dim * np.eye(dim)
@@ -180,7 +173,10 @@ def random(self, dim, shape, logA_shape, mean_shape, cov_shape):
 
         assert dist.dim == dim
         assert dist.shape == np.broadcast_shapes(
-            shape, logA_shape, mean_shape, cov_shape
+            shape,
+            logA_shape,
+            np.shape(np.atleast_1d(mean))[:-1],
+            np.shape(np.atleast_2d(cov))[:-2],
         )
         assert np.all(dist.logA == logA)
         assert np.all(dist.mean == mean)
@@ -215,40 +211,43 @@ def test_predict(
 
         if b_shape == "scalar":
             b = np.random.randn()
-            b_shape = shape
         else:
             b = np.random.randn(*b_shape[:-1], k)
 
         if A_shape == "scalar":
             A = np.random.randn()
-            A_shape = shape
         elif A_shape == "vector":
             A = np.random.randn(dim)
-            A_shape = shape
         else:
             A = np.random.randn(*A_shape[:-1], k, dim)
 
         dist_2 = dist.predict(A, b)
         assert isinstance(dist_2, self.cls)
-        assert dist_2.shape == np.broadcast_shapes(
-            dist.shape, np.shape(A)[:-2] + (1,), np.shape(b)[:-1] + (1,)
+        assert dist_2.shape[:-1] == np.broadcast_shapes(
+            dist.shape[:-1],
+            np.shape(np.atleast_2d(A))[:-2],
+            np.shape(np.atleast_1d(b))[:-1],
         )
-        assert np.shape(dist_2.cov)[:-2] == np.broadcast_shapes(
-            np.shape(dist.cov)[:-2], np.shape(A)[:-2] + (1,)
+        assert np.shape(dist_2.cov)[:-3] == np.broadcast_shapes(
+            np.shape(dist.cov)[:-3], np.shape(np.atleast_2d(A))[:-2]
         )
-        assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
-            np.shape(dist.mean)[:-1], np.shape(A)[:-2] + (1,), np.shape(b)[:-1] + (1,)
+        assert np.shape(dist_2.mean)[:-2] == np.broadcast_shapes(
+            np.shape(dist.mean)[:-2],
+            np.shape(np.atleast_2d(A))[:-2],
+            np.shape(np.atleast_1d(b))[:-1],
         )
         assert dist_2.dim == k
 
         dist_2 = dist.predict(A)
         assert isinstance(dist_2, self.cls)
-        assert dist_2.shape == np.broadcast_shapes(dist.shape, np.shape(A)[:-2] + (1,))
-        assert np.shape(dist_2.cov)[:-2] == np.broadcast_shapes(
-            np.shape(dist.cov)[:-2], np.shape(A)[:-2] + (1,)
+        assert dist_2.shape[:-1] == np.broadcast_shapes(
+            dist.shape[:-1], np.shape(np.atleast_2d(A))[:-2]
         )
-        assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
-            np.shape(dist.mean)[:-1], np.shape(A)[:-2] + (1,)
+        assert np.shape(dist_2.cov)[:-3] == np.broadcast_shapes(
+            np.shape(dist.cov)[:-3], np.shape(np.atleast_2d(A))[:-2]
+        )
+        assert np.shape(dist_2.mean)[:-2] == np.broadcast_shapes(
+            np.shape(dist.mean)[:-2], np.shape(np.atleast_2d(A))[:-2]
         )
         assert dist_2.dim == k
 

From e86652a83547bab791f37f333574ad82062cb0f8 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Mon, 18 Dec 2023 17:58:08 +0000
Subject: [PATCH 024/117] re-graded to not bother with checking for scalar
 means and covariances

---
 lsbi/stats_1.py | 42 +++++++++++++++---------------------------
 1 file changed, 15 insertions(+), 27 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index db7308d..b2b8b88 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -130,8 +130,12 @@ def predict(self, A=1, b=0):
                 cov = np.einsum("...qn,...pn->...qp", A, A * self.cov)
         else:
             mean = A * self.mean + b
-            if len(np.shape(A)) == 1:
-                cov = A[:, None] * self.cov * A
+            if len(np.shape(self.cov)) > 1:
+                cov = (
+                    self.cov
+                    * np.atleast_1d(A)[..., None]
+                    * np.atleast_1d(A)[..., None, :]
+                )
             else:
                 cov = A * self.cov * A
         dim = np.max([*np.shape(A)[-2:-1], *np.shape(b)[-1:], -1])
@@ -152,17 +156,12 @@ def marginalise(self, indices):
         multivariate_normal shape (*shape, dim - len(indices))
         """
         i = self._bar(indices)
-        if len(np.shape(self.mean)) > 0:
-            mean = self.mean[..., i]
-        else:
-            mean = self.mean
+        mean = (np.ones(self.dim) * self.mean)[..., i]
 
         if len(np.shape(self.cov)) > 1:
             cov = self.cov[..., i, :][..., i]
-        elif len(np.shape(self.cov)) == 1:
-            cov = self.cov[i]
         else:
-            cov = self.cov
+            cov = (np.ones(self.dim) * self.cov)[i]
 
         return multivariate_normal(mean, cov, self.shape, sum(i))
 
@@ -184,20 +183,14 @@ def condition(self, indices, values):
         """
         i = self._bar(indices)
         k = indices
-
-        if len(np.shape(self.mean)) > 0:
-            mean_i = self.mean[..., i]
-            mean_k = self.mean[..., k]
-        else:
-            mean_i = self.mean
-            mean_k = self.mean
+        mean = (np.ones(self.dim) * self.mean)[..., i]
 
         if len(np.shape(self.cov)) > 1:
-            mean = mean_i + np.einsum(
+            mean = mean + np.einsum(
                 "...ja,...ab,...b->...j",
                 self.cov[..., i, :][..., :, k],
                 inv(self.cov[..., k, :][..., :, k]),
-                values - mean_k,
+                values - (np.ones(self.dim) * self.mean)[..., k],
             )
             cov = self.cov[..., i, :][..., :, i] - np.einsum(
                 "...ja,...ab,...bk->...jk",
@@ -205,15 +198,12 @@ def condition(self, indices, values):
                 inv(self.cov[..., k, :][..., :, k]),
                 self.cov[..., k, :][..., :, i],
             )
-            return multivariate_normal(mean, cov, self.shape, sum(i))
+            shape = self.shape
         else:
-            mean = mean_i
-            if len(np.shape(self.cov)) == 1:
-                cov = self.cov[i]
-            else:
-                cov = self.cov
+            cov = (np.ones(self.dim) * self.cov)[i]
             shape = np.broadcast_shapes(self.shape, values.shape[:-1])
-            return multivariate_normal(mean, cov, shape, sum(i))
+
+        return multivariate_normal(mean, cov, shape, sum(i))
 
     def _bar(self, indices):
         """Return the indices not in the given indices."""
@@ -339,7 +329,6 @@ def rvs(self, size=1):
             L = np.choose(i[..., None, None], np.moveaxis(L, -3, 0))
             return mean + np.einsum("...ij,...j->...i", L, x)
         else:
-            # Do we want the ability to broadcast scalars over mixtures?
             return mean + np.sqrt(self.cov) * x
 
     def predict(self, A=1, b=0):
@@ -361,7 +350,6 @@ def predict(self, A=1, b=0):
         -------
         mixture_normal shape (..., k)
         """
-        # TODO this is not what we want
         if len(np.shape(A)) > 1:
             A = np.expand_dims(A, axis=-3)
         if len(np.shape(b)) > 0:

From 735ad0d15e21e643564fd00384130a2e850ed2d5 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Tue, 26 Dec 2023 16:27:25 +0000
Subject: [PATCH 025/117] Minor update to stats

---
 lsbi/stats_1.py | 37 ++++++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index b2b8b88..9060026 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -1,4 +1,6 @@
 """Extensions to scipy.stats functions."""
+from copy import deepcopy
+
 import numpy as np
 import scipy.stats
 from numpy.linalg import cholesky, inv
@@ -83,12 +85,12 @@ def logpdf(self, x):
             norm = -np.log(2 * np.pi * np.ones(self.dim) * self.cov).sum() / 2
         return norm - chi2 / 2
 
-    def rvs(self, size=1):
+    def rvs(self, size=()):
         """Draw random samples from the distribution.
 
         Parameters
         ----------
-        size : int or tuple of ints, optional, default=1
+        size : int or tuple of ints, optional, default=()
             Number of samples to draw.
 
         Returns
@@ -271,7 +273,7 @@ class mixture_normal(multivariate_normal):
 
     def __init__(self, logA=0, mean=0, cov=1, shape=(), dim=0):
         self.logA = logA
-        super().__init__(mean, cov, shape, dim)
+        super().__init__(mean=mean, cov=cov, shape=shape, dim=dim)
 
     @property
     def shape(self):
@@ -303,7 +305,7 @@ def logpdf(self, x):
         logA = self.logA - logsumexp(self.logA, axis=-1)[..., None]
         return logsumexp(logpdf + logA, axis=-1)
 
-    def rvs(self, size=1):
+    def rvs(self, size=()):
         """Draw random samples from the distribution.
 
         Parameters
@@ -315,7 +317,7 @@ def rvs(self, size=1):
         rvs : array_like, shape (*size, *shape[:-1], dim)
         """
         if self.shape == ():
-            return super().rvs(size)
+            return super().rvs(size=size)
         size = np.atleast_1d(np.array(size, dtype=int))
         p = np.exp(self.logA - logsumexp(self.logA, axis=-1)[..., None])
         p = np.broadcast_to(p, self.shape)
@@ -389,12 +391,29 @@ def condition(self, indices, values):
         mixture_normal shape (*shape, len(indices))
         """
         dist = super().condition(indices, values[..., None, :])
-        marginal = self.marginalise(self._bar(indices))
-        marginal.mean = marginal.mean - values[..., None, :]
-        logA = super(marginal.__class__, marginal).logpdf(np.zeros(marginal.dim))
+        logA = self.marginalise(self._bar(indices)).weights(values)
+        return mixture_normal(logA, dist.mean, dist.cov, dist.shape, dist.dim)
+
+    def weights(self, values):
+        """Compute the conditional weights of the mixture.
+
+        Parameters
+        ----------
+        values : array_like shape (..., dim)
+            Values to condition on.
+
+        where self.shape[:-1] is broadcastable to ...
+
+        Returns
+        -------
+        weights : array_like shape (*shape, n)
+        """
+        copy = deepcopy(self)
+        copy.mean = copy.mean - values[..., None, :]
+        logA = super(copy.__class__, copy).logpdf(np.zeros(copy.dim))
         logA -= logsumexp(logA, axis=-1)[..., None]
         logA += self.logA
-        return mixture_normal(logA, dist.mean, dist.cov, dist.shape, dist.dim)
+        return logA
 
     def bijector(self, x, inverse=False):
         """Bijector between U([0, 1])^d and the distribution.

From a6895f07d1879442c4d0c94e1eb58dad4772fd9e Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 27 Dec 2023 23:34:36 +0000
Subject: [PATCH 026/117] First draft of model

---
 lsbi/model_1.py       | 572 ++++++++++++++++++++++++++++++++++++++++++
 tests/test_model_1.py |  85 +++++++
 tests/test_stats_1.py |   6 +-
 3 files changed, 660 insertions(+), 3 deletions(-)
 create mode 100644 lsbi/model_1.py
 create mode 100644 tests/test_model_1.py

diff --git a/lsbi/model_1.py b/lsbi/model_1.py
new file mode 100644
index 0000000..33d3e5b
--- /dev/null
+++ b/lsbi/model_1.py
@@ -0,0 +1,572 @@
+"""Gaussian models for linear Bayesian inference."""
+import numpy as np
+from numpy.linalg import inv, solve
+
+from lsbi.stats_1 import mixture_normal, multivariate_normal
+from lsbi.utils import logdet
+
+
+class LinearModel(object):
+    """A multilinear model.
+
+    D|theta ~ N( m + M theta, C )
+    theta   ~ N( mu, Sigma )
+
+    Defined by:
+        Parameters:       theta (..., n,)
+        Data:             D     (..., d,)
+        Model:            M     (..., d, n)
+        Prior mean:       mu    (..., n,)
+        Prior covariance: Sigma (..., n, n)
+        Data mean:        m     (..., d,)
+        Data covariance:  C     (..., d, d)
+
+    where the ellipses indicate arbitrary (broadcastable) additional copies.
+
+    Parameters
+    ----------
+    M : array_like, optional
+        if ndim>=2: model matrices
+        if ndim==1: model matrix with vector diagonal for all components
+        if ndim==0: scalar * rectangular identity matrix for all components
+        Defaults to rectangular identity matrix
+    m : array_like, optional
+        if ndim>=1: data means
+        if ndim==0: scalar * unit vector for all components
+        Defaults to 0 for all components
+    C : array_like, optional
+        if ndim>=2: data covariances
+        if ndim==1: data covariance with vector diagonal for all components
+        if ndim==0: scalar * identity matrix for all components
+        Defaults to rectangular identity matrix
+    mu : array_like, optional
+        if ndim>=1: prior means
+        if ndim==0: scalar * unit vector for all components
+        Defaults to 0 for all components
+        Prior mean, defaults to zero vector
+    Sigma : array_like, optional
+        if ndim>=2: prior covariances
+        if ndim==1: prior covariance with vector diagonal for all components
+        if ndim==0: scalar * identity matrix for all components
+        Defaults to k copies of identity matrices
+    n : int, optional
+        Number of parameters, defaults to automatically inferred value
+    d : int, optional
+        Number of data dimensions, defaults to automatically inferred value
+    shape : (), optional
+        Number of mixture components, defaults to automatically inferred value
+    """
+
+    def __init__(self, M=1, m=0, C=1, mu=0, Sigma=1, shape=(), n=1, d=1):
+        self.M = M
+        self.m = m
+        self.C = C
+        self.mu = mu
+        self.Sigma = Sigma
+        self._shape = shape
+        self._n = n
+        self._d = d
+
+    @property
+    def shape(self):
+        """Shape of the distribution."""
+        return np.broadcast_shapes(
+            np.atleast_2d(self.M).shape[:-2],
+            np.atleast_1d(self.m).shape[:-1],
+            np.atleast_2d(self.C).shape[:-2],
+            np.atleast_1d(self.mu).shape[:-1],
+            np.atleast_2d(self.Sigma).shape[:-2],
+            self._shape,
+        )
+
+    @property
+    def n(self):
+        """Dimension of the distribution."""
+        return np.max(
+            [
+                *np.shape(self.M)[-1:],
+                *np.shape(self.Sigma)[-2:],
+                *np.shape(self.mu)[-1:],
+                self._n,
+            ]
+        )
+
+    @property
+    def d(self):
+        """Dimensionality of data space len(D)."""
+        return np.max(
+            [
+                *np.shape(self.M)[-2:-1],
+                *np.shape(self.C)[-2:],
+                *np.shape(self.m)[-1:],
+                self._d,
+            ]
+        )
+
+    @classmethod
+    def from_joint(cls, means, covs, n):
+        """Construct model from joint distribution."""
+        mu = means[:, -n:]
+        Sigma = covs[:, -n:, -n:]
+        M = solve(Sigma, covs[:, -n:, :-n]).transpose(0, 2, 1)
+        m = means[:, :-n] - np.einsum("ija,ia->ij", M, mu)
+        C = covs[:, :-n, :-n] - np.einsum("ija,iab,ikb->ijk", M, Sigma, M)
+        return cls(M=M, m=m, C=C, mu=mu, Sigma=Sigma)
+
+    def likelihood(self, theta):
+        """P(D|theta) as a scipy distribution object.
+
+        D|theta ~ N( m + M theta, C )
+        theta   ~ N( mu, Sigma )
+
+        Parameters
+        ----------
+        theta : array_like, shape (k, n)
+        """
+        #        if len(np.shape(self.M)) > 1:
+        #            M = self.M
+        #        else:
+        #            M = self.M * np.eye(self.d, self.n)
+        if len(np.shape(self.M)) > 1:
+            mu = self.m + np.einsum("...ja,...a->...j", self.M, theta)
+        else:
+            mu = self.m + self.M * theta
+        return multivariate_normal(mu, self.C, self.shape, self.d)
+
+    def prior(self):
+        """P(theta) as a scipy distribution object.
+
+        theta ~ N( mu, Sigma )
+        """
+        return multivariate_normal(self.mu, self.Sigma, self.shape, self.n)
+
+    def posterior(self, D):
+        """P(theta|D) as a scipy distribution object.
+
+        theta|D ~ N( mu + S M'C^{-1}(D - m - M mu), S )
+        S = (Sigma^{-1} + M'C^{-1}M)^{-1}
+
+        Parameters
+        ----------
+        D : array_like, shape (d,)
+        """
+        if len(np.shape(self.M)) > 1:
+            values = D - self.m - np.einsum("...ja,...a->...j", self.M, self.mu)
+
+            if len(self.shape(self.C)) > 1:
+                MinvCM = np.einsum(
+                    "...ja,...ab,...kb->...jk", self.M, inv(self.C), self.M
+                )
+            else:
+                MinvCM = np.einsum(
+                    "...ja,...kb->...jk", self.M, self.M / np.array(self.C)[:, None]
+                )
+
+            if len(np.shape(self.Sigma)) > 1:
+                Sigma = inv(inv(self.Sigma) + MinvCM)
+            else:
+                Sigma = inv(np.eye(self.d) / self.Sigma + MinvCM)
+
+            if len(np.shape(self.C)) > 1:
+                mu = self.mu + np.einsum(
+                    "...ja,...ba,...bc,...c->...j", Sigma, self.M, inv(self.C), values
+                )
+            else:
+                mu = self.mu + np.einsum(
+                    "...ja,...ac,...c->...j",
+                    Sigma,
+                    self.M / np.array(self.C)[:, None],
+                    values,
+                )
+        else:
+            values = D * np.ones(self.d)
+            values[: self.n] = values[: self.n] - self.m - self.M * self.mu
+
+            if len(self.shape(self.C)) > 1:
+                MinvCM = (
+                    np.atleast_1d(self.M)[..., None]
+                    * inv(self.C)
+                    * np.atleast_1d(self.M)[..., None, :]
+                )
+                if len(np.shape(self.Sigma)) > 1:
+                    Sigma = inv(inv(self.Sigma) + MinvCM)
+                else:
+                    Sigma = inv(np.eye(self.d) / self.Sigma + MinvCM)
+
+                mu = self.mu + np.einsum(
+                    "...ja,...ba,...bc,...c->...j", Sigma, self.M, inv(self.C), values
+                )
+            else:
+                MinvCM = self.M / np.atleast_1d(self.C)[: self.n] * self.M
+                if len(np.shape(self.Sigma)) > 1:
+                    Sigma = inv(inv(self.Sigma) + np.eye(self.n) * MinvCM)
+                else:
+                    Sigma = 1 / (1 / self.Sigma + MinvCM)
+
+                mu = self.mu + np.einsum(
+                    "...ja,...ac,...c->...j",
+                    Sigma,
+                    self.M / np.atleast_1d(self.C)[: self.n],
+                    values,
+                )
+
+        return multivariate_normal(mu, Sigma, self.shape, self.n)
+
+    def evidence(self):
+        """P(D) as a scipy distribution object.
+
+        D ~ N( m + M mu, C + M Sigma M' )
+        """
+        if len(np.shape(self.M)) > 1:
+            mu = self.m + np.einsum("...ja,...a->...j", self.M, self.mu)
+
+            if len(np.shape(self.Sigma)) > 1:
+                Sigma = np.einsum(
+                    "...ja,...ab,...kb->...jk", self.M, self.Sigma, self.M
+                )
+            else:
+                Sigma = np.einsum("...ja,...kb->...jk", self.M, self.Sigma * self.M)
+            if len(np.shape(self.C)) > 1:
+                Sigma = self.C + Sigma
+            else:
+                Sigma = self.C * np.eye(self.d) + Sigma
+        else:
+            mu = self.m * np.ones(self.d)
+            mu[: self.n] = mu[: self.n] + self.M * self.mu
+            Sigma = self.C
+
+            if len(np.shape(self.Sigma)) > 1 or len(np.shape(self.C)) > 1:
+                if len(np.shape(self.C)) <= 1:
+                    Sigma = Sigma * np.eye(self.d)
+                Sigma[: self.n, : self.n] = (
+                    Sigma[: self.n, : self.n]
+                    + np.atleast_1d(self.M)[..., None]
+                    * self.Sigma
+                    * np.atleast_1d(self.M)[..., None, :]
+                )
+            else:
+                Sigma = Sigma * np.ones(self.d)
+                Sigma[: self.n] = Sigma[: self.n] + self.M * self.Sigma * self.M
+
+        return multivariate_normal(mu, Sigma, self.shape, self.d)
+
+    def joint(self):
+        """P(D, theta) as a scipy distribution object.
+
+        [  D  ] | A ~ N( [m + M mu]   [C + M Sigma M'  M Sigma] )
+        [theta] |      ( [   mu   ] , [   Sigma M'      Sigma ] )
+        """
+        evidence = self.evidence()
+        prior = self.prior()
+        mu = np.block([evidence.mean * np.ones(self.d), prior.mean * np.ones(self.n)])
+        corr = np.einsum(
+            "...ja,...al->...jl",
+            np.atleast_2d(self.M) * np.eye(n, d),
+            np.atleast_2d(self.Sigma) * n.eye(n),
+        )
+        Sigma = np.block(
+            [
+                [np.atleast_2d(evidence.cov) * np.eye(d), corr],
+                [np.moveaxis(corr, -1, -2), np.atleast_2d(prior.cov) * np.eye(n)],
+            ]
+        )
+        return multivariate_normal(mu, Sigma, self.shape, len(mu))
+
+
+class LinearMixtureModel(object):
+    """A linear mixture model.
+
+    D|theta, A ~ N( m + M theta, C )
+    theta|A    ~ N( mu, Sigma )
+    A          ~ categorical( exp(logA) )
+
+    Defined by:
+        Parameters:          theta (..., n,)
+        Data:                D     (..., d,)
+        Prior means:         mu    (..., k, n)
+        Prior covariances:   Sigma (..., k, n, n)
+        Data means:          m     (..., k, d)
+        Data covariances:    C     (..., k, d, d)
+        log mixture weights: logA  (..., k,)
+
+    Parameters
+    ----------
+    M : array_like, optional
+        if ndim>=2: model matrices
+        if ndim==1: model matrix with vector diagonal for all components
+        if scalar: scalar * rectangular identity matrix for all components
+        Defaults to k copies of rectangular identity matrices
+    m : array_like, optional
+        if ndim>=1: data means
+        if scalar: scalar * unit vector for all components
+        Defaults to 0 for all components
+    C : array_like, optional
+        if ndim>=2: data covariances
+        if ndim==1: data covariance with vector diagonal for all components
+        if scalar: scalar * identity matrix for all components
+        Defaults to k copies of identity matrices
+    mu : array_like, optional
+        if ndim>=1: prior means
+        if scalar: scalar * unit vector for all components
+        Defaults to 0 for all components
+        Prior mean, defaults to zero vector
+    Sigma : array_like, optional
+        if ndim>=2: prior covariances
+        if ndim==1: prior covariance with vector diagonal for all components
+        if scalar: scalar * identity matrix for all components
+        Defaults to k copies of identity matrices
+    logA : array_like, optional
+        if ndim>=1: log mixture weights
+        if scalar: scalar * unit vector
+        Defaults to uniform weights
+    n : int, optional
+        Number of parameters, defaults to automatically inferred value
+    d : int, optional
+        Number of data dimensions, defaults to automatically inferred value
+    k : int, optional
+        Number of mixture components, defaults to automatically inferred value
+    """
+
+    def __init__(self, logA=1, M=1, m=0, C=1, mu=0, Sigma=1, shape=(), n=1, d=1, k=1):
+        self.logA = logA
+        super().__init__(M=M, m=m, C=C, mu=mu, Sigma=Sigma, shape=shape, n=n, d=d)
+
+    @classmethod
+    def from_joint(cls, means, covs, logA, n):
+        """Construct model from joint distribution."""
+        mu = means[:, -n:]
+        Sigma = covs[:, -n:, -n:]
+        M = solve(Sigma, covs[:, -n:, :-n]).transpose(0, 2, 1)
+        m = means[:, :-n] - np.einsum("ija,ia->ij", M, mu)
+        C = covs[:, :-n, :-n] - np.einsum("ija,iab,ikb->ijk", M, Sigma, M)
+        return cls(M=M, m=m, C=C, mu=mu, Sigma=Sigma, logA=logA)
+
+    @property
+    def shape(self):
+        """Shape of the distribution."""
+        return np.broadcast_shapes(
+            np.array(self.logA).shape,
+            np.atleast_2d(self.M).shape[:-2],
+            np.atleast_1d(self.m).shape[:-1],
+            np.atleast_2d(self.C).shape[:-2],
+            np.atleast_1d(self.mu).shape[:-1],
+            np.atleast_2d(self.Sigma).shape[:-2],
+            self._shape,
+        )
+
+    @property
+    def k(self):
+        """Number of mixture components of the distribution."""
+        return np.shape[-1]
+
+    def likelihood(self, theta):
+        """P(D|theta) as a scipy distribution object.
+
+        D|theta,A ~ N( m + M theta, C )
+        theta|A   ~ N( mu, Sigma )
+        A         ~ categorical(exp(logA))
+
+        Parameters
+        ----------
+        theta : array_like, shape (n,)
+        """
+        dist = super().likelihood(theta)
+        logA = self.prior().weights(theta)
+        return mixture_normal(logA, dist.mean, dist.cov, dist.shape, dist.dim)
+
+    def prior(self):
+        """P(theta) as a scipy distribution object.
+
+        theta|A ~ N( mu, Sigma )
+        A       ~ categorical(exp(logA))
+        """
+        dist = super().prior()
+        return mixture_normal(self.logA, dist.mean, dist.cov, dist.shape, dist.dim)
+
+    def posterior(self, D):
+        """P(theta|D) as a scipy distribution object.
+
+        theta|D, A ~ N( mu + S M'C^{-1}(D - m - M mu), S )
+        D|A        ~ N( m + M mu, C + M Sigma M' )
+        A          ~ categorical(exp(logA))
+        S = (Sigma^{-1} + M'C^{-1}M)^{-1}
+
+        Parameters
+        ----------
+        D : array_like, shape (d,)
+        """
+        dist = super().posterior(D)
+        logA = self.evidence().weights(D)
+        return mixture_normal(logA, dist.mean, dist.cov, dist.shape, dist.dim)
+
+    def evidence(self):
+        """P(D) as a scipy distribution object.
+
+        D|A ~ N( m + M mu, C + M Sigma M' )
+        A   ~ categorical(exp(logA))
+        """
+        dist = super().evidence()
+        return mixture_normal(self.logA, dist.mean, dist.cov, dist.shape, dist.dim)
+
+    def joint(self):
+        """P(D, theta) as a scipy distribution object.
+
+        [  D  ] | A ~ N( [m + M mu]   [C + M Sigma M'  M Sigma] )
+        [theta] |      ( [   mu   ] , [   Sigma M'      Sigma ] )
+
+        A           ~ categorical(exp(logA))
+        """
+        dist = super().joint()
+        return mixture_normal(self.logA, dist.mean, dist.cov, dist.shape, dist.dim)
+
+
+class ReducedLinearModel(object):
+    """A model with no data.
+
+    If a Likelihood is Gaussian in the parameters, it is sometimes more
+    clear/efficient to phrase it in terms of a parameter covariance, parameter
+    mean and peak value:
+
+    logL(theta) = logLmax - (theta - mu_L)' Sigma_L^{-1} (theta - mu_L)
+
+    We can link this to a data-based model with the relations:
+
+    Sigma_L = (M' C^{-1} M)^{-1}
+    mu_L = Sigma_L M' C^{-1} (D-m)
+    logLmax =
+    - log|2 pi C|/2 - (D-m)'C^{-1}(C - M (M' C^{-1} M)^{-1} M' )C^{-1}(D-m)/2
+
+    Parameters
+    ----------
+    mu_L : array_like
+        Likelihood peak
+    Sigma_L : array_like
+        Likelihood covariance
+    logLmax : float, optional
+        Likelihood maximum, defaults to zero
+    mu_pi : array_like, optional
+        Prior mean, defaults to zero vector
+    Sigma_pi : array_like, optional
+        Prior covariance, defaults to identity matrix
+    """
+
+    def __init__(self, *args, **kwargs):
+        self.mu_L = np.atleast_1d(kwargs.pop("mu_L"))
+        self.Sigma_L = np.atleast_2d(kwargs.pop("Sigma_L", None))
+        self.logLmax = kwargs.pop("logLmax", 0)
+        self.mu_pi = np.atleast_1d(kwargs.pop("mu_pi", np.zeros_like(self.mu_L)))
+        self.Sigma_pi = np.atleast_2d(kwargs.pop("Sigma_pi", np.eye(len(self.mu_pi))))
+        self.Sigma_P = inv(inv(self.Sigma_pi) + inv(self.Sigma_L))
+        self.mu_P = self.Sigma_P @ (
+            solve(self.Sigma_pi, self.mu_pi) + solve(self.Sigma_L, self.mu_L)
+        )
+
+    def prior(self):
+        """P(theta) as a scipy distribution object."""
+        return multivariate_normal(self.mu_pi, self.Sigma_pi)
+
+    def posterior(self):
+        """P(theta|D) as a scipy distribution object."""
+        return multivariate_normal(self.mu_P, self.Sigma_P)
+
+    def logpi(self, theta):
+        """P(theta) as a scalar."""
+        return self.prior().logpdf(theta)
+
+    def logP(self, theta):
+        """P(theta|D) as a scalar."""
+        return self.posterior().logpdf(theta)
+
+    def logL(self, theta):
+        """P(D|theta) as a scalar."""
+        return (
+            self.logLmax
+            + multivariate_normal(self.mu_L, self.Sigma_L).logpdf(theta)
+            + logdet(2 * np.pi * self.Sigma_L) / 2
+        )
+
+    def logZ(self):
+        """P(D) as a scalar."""
+        return (
+            self.logLmax
+            + logdet(self.Sigma_P) / 2
+            - logdet(self.Sigma_pi) / 2
+            - (self.mu_P - self.mu_pi)
+            @ solve(self.Sigma_pi, self.mu_P - self.mu_pi)
+            / 2
+            - (self.mu_P - self.mu_L) @ solve(self.Sigma_L, self.mu_P - self.mu_L) / 2
+        )
+
+    def DKL(self):
+        """D_KL(P(theta|D)||P(theta)) the Kullback-Leibler divergence."""
+        return (
+            logdet(self.Sigma_pi)
+            - logdet(self.Sigma_P)
+            + np.trace(inv(self.Sigma_pi) @ self.Sigma_P - 1)
+            + (self.mu_P - self.mu_pi) @ solve(self.Sigma_pi, self.mu_P - self.mu_pi)
+        ) / 2
+
+
+class ReducedLinearModelUniformPrior(object):
+    """A model with no data.
+
+    Gaussian likelihood in the parameters
+
+    logL(theta) = logLmax - (theta - mu_L)' Sigma_L^{-1} (theta - mu_L)
+
+    Uniform prior
+
+    We can link this to a data-based model with the relations:
+
+    Sigma_L = (M' C^{-1} M)^{-1}
+    mu_L = Sigma_L M' C^{-1} (D-m)
+    logLmax =
+    -log|2 pi C|/2 - (D-m)'C^{-1}(C - M (M' C^{-1} M)^{-1} M' )C^{-1}(D-m)/2
+
+    Parameters
+    ----------
+    mu_L : array_like
+        Likelihood peak
+    Sigma_L : array_like
+        Likelihood covariance
+    logLmax : float, optional
+        Likelihood maximum, defaults to zero
+    logV : float, optional
+        log prior volume, defaults to zero
+    """
+
+    def __init__(self, *args, **kwargs):
+        self.mu_L = np.atleast_1d(kwargs.pop("mu_L"))
+        self.Sigma_L = np.atleast_2d(kwargs.pop("Sigma_L"))
+        self.logLmax = kwargs.pop("logLmax", 0)
+        self.logV = kwargs.pop("logV", 0)
+        self.Sigma_P = self.Sigma_L
+        self.mu_P = self.mu_L
+
+    def posterior(self):
+        """P(theta|D) as a scipy distribution object."""
+        return multivariate_normal(self.mu_P, self.Sigma_P)
+
+    def logpi(self, theta):
+        """P(theta) as a scalar."""
+        return -self.logV
+
+    def logP(self, theta):
+        """P(theta|D) as a scalar."""
+        return self.posterior().logpdf(theta)
+
+    def logL(self, theta):
+        """P(D|theta) as a scalar."""
+        return (
+            self.logLmax
+            + logdet(2 * np.pi * self.Sigma_L) / 2
+            + multivariate_normal(self.mu_L, self.Sigma_L).logpdf(theta)
+        )
+
+    def logZ(self):
+        """P(D) as a scalar."""
+        return self.logLmax + logdet(2 * np.pi * self.Sigma_P) / 2 - self.logV
+
+    def DKL(self):
+        """D_KL(P(theta|D)||P(theta)) the Kullback-Leibler divergence."""
+        return self.logV - logdet(2 * np.pi * np.e * self.Sigma_P) / 2
diff --git a/tests/test_model_1.py b/tests/test_model_1.py
new file mode 100644
index 0000000..73e87f2
--- /dev/null
+++ b/tests/test_model_1.py
@@ -0,0 +1,85 @@
+import numpy as np
+import pytest
+
+from lsbi.model_1 import LinearMixtureModel, LinearModel
+
+shapes = [(2, 3), (3,), ()]
+sizes = [(6, 5), (5,), ()]
+dims = [1, 2, 4]
+
+
+@pytest.mark.parametrize("d", dims)
+@pytest.mark.parametrize("n", dims)
+@pytest.mark.parametrize("shape", shapes)
+@pytest.mark.parametrize("m_shape", shapes + ["scalar"])
+@pytest.mark.parametrize("mu_shape", shapes + ["scalar"])
+@pytest.mark.parametrize("M_shape", shapes + ["scalar", "vector"])
+@pytest.mark.parametrize("C_shape", shapes + ["scalar", "vector"])
+@pytest.mark.parametrize("Sigma_shape", shapes + ["scalar", "vector"])
+class TestLinearModel(object):
+    cls = LinearModel
+
+    def random(self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d):
+        if M_shape == "scalar":
+            M = np.random.randn()
+        elif M_shape == "vector":
+            M = np.random.randn(n)
+        else:
+            M = np.random.randn(*M_shape, d, n)
+
+        if m_shape == "scalar":
+            m = np.random.randn()
+        else:
+            m = np.random.randn(*m_shape, d)
+
+        if C_shape == "scalar":
+            C = np.random.randn() ** 2
+        elif C_shape == "vector":
+            C = np.random.randn(d) ** 2
+        else:
+            C = np.random.randn(*C_shape, d, d)
+            C = np.einsum("...ij,...kj->...ik", C, C) + d * np.eye(d)
+
+        if mu_shape == "scalar":
+            mu = np.random.randn()
+        else:
+            mu = np.random.randn(*mu_shape, n)
+
+        if Sigma_shape == "scalar":
+            Sigma = np.random.randn() ** 2
+        elif Sigma_shape == "vector":
+            Sigma = np.random.randn(n) ** 2
+        else:
+            Sigma = np.random.randn(*Sigma_shape, n, n)
+            Sigma = np.einsum("...ij,...kj->...ik", Sigma, Sigma) + n * np.eye(n)
+
+        model = self.cls(M, m, C, mu, Sigma, shape, n, d)
+
+        assert model.d == d
+        assert model.n == n
+        model.prior()
+        assert model.shape == np.broadcast_shapes(
+            shape,
+            np.shape(np.atleast_2d(M))[:-2],
+            np.shape(np.atleast_1d(m))[:-1],
+            np.shape(np.atleast_1d(mu))[:-1],
+            np.shape(np.atleast_2d(C))[:-2],
+            np.shape(np.atleast_2d(Sigma))[:-2],
+        )
+        assert np.all(model.M == M)
+        assert np.all(model.m == m)
+        assert np.all(model.C == C)
+        assert np.all(model.mu == mu)
+        assert np.all(model.Sigma == Sigma)
+        return model
+
+    @pytest.mark.parametrize("theta_shape", shapes)
+    def test_likelihood(
+        self, theta_shape, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+    ):
+        model = self.random(
+            M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+        )
+        theta = np.random.randn(*theta_shape, n)
+        dist = model.likelihood(theta)
+        assert dist.shape == np.broadcast_shapes(shape, theta_shape)
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index 465a714..ba6604f 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -3,9 +3,9 @@
 
 from lsbi.stats_1 import mixture_normal, multivariate_normal
 
-shapes = [(2, 3, 4), (3, 4), (4,), ()]
-sizes = [(8, 7, 6), (7, 6), (6,), ()]
-dims = [1, 2, 5]
+shapes = [(2, 3), (3,), ()]
+sizes = [(6, 5), (5,), ()]
+dims = [1, 2, 4]
 
 
 @pytest.mark.parametrize("dim", dims)

From e62f52cd6e93eacf326dae2265f68f7073e030c4 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Thu, 28 Dec 2023 08:38:37 +0000
Subject: [PATCH 027/117] Begun switch to diagonal_cov strategy

---
 lsbi/stats_1.py       | 121 +++++++++++++++++++++-------------
 tests/test_stats_1.py | 149 +++++++++++++++++++++++++++---------------
 2 files changed, 174 insertions(+), 96 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index 9060026..6372c86 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -31,20 +31,30 @@ class multivariate_normal(object):
     shape: tuple, optional, default=()
         Shape of the distribution. Useful for forcing a broadcast beyond that
         inferred by mean and cov shapes
+
+    dim: int, optional, default=0
+        Dimension of the distribution. Useful for forcing a broadcast beyond that
+        inferred by mean and cov shapes
+
+    diagonal_cov: bool, optional, default=False
+        If True, cov is interpreted as the diagonal of the covariance matrix.
     """
 
-    def __init__(self, mean=0, cov=1, shape=(), dim=0):
+    def __init__(self, mean=0, cov=1, shape=(), dim=0, diagonal_cov=False):
         self.mean = mean
         self.cov = cov
         self._shape = shape
         self._dim = dim
+        self.diagonal_cov = diagonal_cov
+        if len(np.shape(self.cov)) < 2:
+            self.diagonal_cov = True
 
     @property
     def shape(self):
         """Shape of the distribution."""
         return np.broadcast_shapes(
-            np.atleast_1d(self.mean).shape[:-1],
-            np.atleast_2d(self.cov).shape[:-2],
+            np.shape(self.mean)[:-1],
+            np.shape(self.cov)[: -2 + self.diagonal_cov],
             self._shape,
         )
 
@@ -54,7 +64,7 @@ def dim(self):
         return np.max(
             [
                 *np.shape(self.mean)[-1:],
-                *np.shape(self.cov)[-2:],
+                *np.shape(self.cov)[-2 + self.diagonal_cov :],
                 self._dim,
             ]
         )
@@ -77,12 +87,12 @@ def logpdf(self, x):
         size = x.shape[:-1]
         mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
         dx = x.reshape(*size, *np.ones_like(self.shape), self.dim) - mean
-        if len(np.shape(self.cov)) > 1:
-            chi2 = np.einsum("...j,...jk,...k->...", dx, inv(self.cov), dx)
-            norm = -logdet(2 * np.pi * self.cov) / 2
-        else:
+        if self.diagonal_cov:
             chi2 = (dx**2 / self.cov).sum(axis=-1)
             norm = -np.log(2 * np.pi * np.ones(self.dim) * self.cov).sum() / 2
+        else:
+            chi2 = np.einsum("...j,...jk,...k->...", dx, inv(self.cov), dx)
+            norm = -logdet(2 * np.pi * self.cov) / 2
         return norm - chi2 / 2
 
     def rvs(self, size=()):
@@ -100,10 +110,10 @@ def rvs(self, size=()):
         """
         size = np.atleast_1d(size)
         x = np.random.randn(*size, *self.shape, self.dim)
-        if len(np.shape(self.cov)) > 1:
-            return self.mean + np.einsum("...jk,...k->...j", cholesky(self.cov), x)
-        else:
+        if self.diagonal_cov:
             return self.mean + np.sqrt(self.cov) * x
+        else:
+            return self.mean + np.einsum("...jk,...k->...j", cholesky(self.cov), x)
 
     def predict(self, A=1, b=0):
         """Predict the mean and covariance of a linear transformation.
@@ -124,26 +134,28 @@ def predict(self, A=1, b=0):
         -------
         multivariate_normal shape (..., k)
         """
+        diagonal_cov = self.diagonal_cov
         if len(np.shape(A)) > 1:
-            mean = np.einsum("...qn,...n->...q", A, np.atleast_1d(self.mean)) + b
-            if len(np.shape(self.cov)) > 1:
-                cov = np.einsum("...qn,...nm,...pm->...qp", A, self.cov, A)
-            else:
+            mean = np.einsum("...qn,...n->...q", A, self.mean) + b
+            if self.diagonal_cov:
                 cov = np.einsum("...qn,...pn->...qp", A, A * self.cov)
+                diagonal_cov = False
+            else:
+                cov = np.einsum("...qn,...nm,...pm->...qp", A, self.cov, A)
         else:
             mean = A * self.mean + b
-            if len(np.shape(self.cov)) > 1:
+            if self.diagonal_cov:
+                cov = A * self.cov * A
+            else:
                 cov = (
                     self.cov
                     * np.atleast_1d(A)[..., None]
                     * np.atleast_1d(A)[..., None, :]
                 )
-            else:
-                cov = A * self.cov * A
         dim = np.max([*np.shape(A)[-2:-1], *np.shape(b)[-1:], -1])
         if dim == -1:
             dim = self.dim
-        return multivariate_normal(mean, cov, self.shape, dim)
+        return multivariate_normal(mean, cov, self.shape, dim, diagonal_cov)
 
     def marginalise(self, indices):
         """Marginalise over indices.
@@ -160,12 +172,12 @@ def marginalise(self, indices):
         i = self._bar(indices)
         mean = (np.ones(self.dim) * self.mean)[..., i]
 
-        if len(np.shape(self.cov)) > 1:
-            cov = self.cov[..., i, :][..., i]
-        else:
+        if self.diagonal_cov:
             cov = (np.ones(self.dim) * self.cov)[i]
+        else:
+            cov = self.cov[..., i, :][..., i]
 
-        return multivariate_normal(mean, cov, self.shape, sum(i))
+        return multivariate_normal(mean, cov, self.shape, sum(i), self.diagonal_cov)
 
     def condition(self, indices, values):
         """Condition on indices with values.
@@ -187,7 +199,10 @@ def condition(self, indices, values):
         k = indices
         mean = (np.ones(self.dim) * self.mean)[..., i]
 
-        if len(np.shape(self.cov)) > 1:
+        if self.diagonal_cov:
+            cov = (np.ones(self.dim) * self.cov)[i]
+            shape = np.broadcast_shapes(self.shape, values.shape[:-1])
+        else:
             mean = mean + np.einsum(
                 "...ja,...ab,...b->...j",
                 self.cov[..., i, :][..., :, k],
@@ -201,11 +216,8 @@ def condition(self, indices, values):
                 self.cov[..., k, :][..., :, i],
             )
             shape = self.shape
-        else:
-            cov = (np.ones(self.dim) * self.cov)[i]
-            shape = np.broadcast_shapes(self.shape, values.shape[:-1])
 
-        return multivariate_normal(mean, cov, shape, sum(i))
+        return multivariate_normal(mean, cov, shape, sum(i), self.diagonal_cov)
 
     def _bar(self, indices):
         """Return the indices not in the given indices."""
@@ -240,18 +252,18 @@ def bijector(self, x, inverse=False):
         x = np.array(x)
         mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
         if inverse:
-            if len(np.shape(self.cov)) > 1:
-                y = np.einsum("...jk,...k->...j", inv(cholesky(self.cov)), x - mean)
-            else:
+            if self.diagonal_cov:
                 y = (x - mean) / np.sqrt(self.cov)
+            else:
+                y = np.einsum("...jk,...k->...j", inv(cholesky(self.cov)), x - mean)
             return scipy.stats.norm.cdf(y)
         else:
             y = scipy.stats.norm.ppf(x)
-            if len(np.shape(self.cov)) > 1:
+            if self.diagonal_cov:
+                return mean + np.sqrt(self.cov) * y
+            else:
                 L = cholesky(self.cov)
                 return mean + np.einsum("...jk,...k->...j", L, y)
-            else:
-                return mean + np.sqrt(self.cov) * y
 
 
 class mixture_normal(multivariate_normal):
@@ -269,19 +281,32 @@ class mixture_normal(multivariate_normal):
 
     logA: array_like, shape (..., n)
         Log of the mixing weights.
+
+    shape: tuple, optional, default=()
+        Shape of the distribution. Useful for forcing a broadcast beyond that
+        inferred by mean and cov shapes
+
+    dim: int, optional, default=0
+        Dimension of the distribution. Useful for forcing a broadcast beyond that
+        inferred by mean and cov shapes
+
+    diagonal_cov: bool, optional, default=False
+        If True, cov is interpreted as the diagonal of the covariance matrix.
     """
 
-    def __init__(self, logA=0, mean=0, cov=1, shape=(), dim=0):
+    def __init__(self, logA=0, mean=0, cov=1, shape=(), dim=0, diagonal_cov=False):
         self.logA = logA
-        super().__init__(mean=mean, cov=cov, shape=shape, dim=dim)
+        super().__init__(
+            mean=mean, cov=cov, shape=shape, dim=dim, diagonal_cov=diagonal_cov
+        )
 
     @property
     def shape(self):
         """Shape of the distribution."""
         return np.broadcast_shapes(
-            np.array(self.logA).shape,
-            np.atleast_1d(self.mean).shape[:-1],
-            np.atleast_2d(self.cov).shape[:-2],
+            np.shape(self.logA),
+            np.shape(self.mean)[:-1],
+            np.shape(self.cov)[: -2 + self.diagonal_cov],
             self._shape,
         )
 
@@ -325,13 +350,13 @@ def rvs(self, size=()):
         mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
         mean = np.choose(i[..., None], np.moveaxis(mean, -2, 0))
         x = np.random.randn(*size, *self.shape[:-1], self.dim)
-        if len(np.shape(self.cov)) > 1:
+        if self.diagonal_cov:
+            return mean + np.sqrt(self.cov) * x
+        else:
             L = cholesky(self.cov)
             L = np.broadcast_to(L, (*self.shape, self.dim, self.dim))
             L = np.choose(i[..., None, None], np.moveaxis(L, -3, 0))
             return mean + np.einsum("...ij,...j->...i", L, x)
-        else:
-            return mean + np.sqrt(self.cov) * x
 
     def predict(self, A=1, b=0):
         """Predict the mean and covariance of a linear transformation.
@@ -357,7 +382,9 @@ def predict(self, A=1, b=0):
         if len(np.shape(b)) > 0:
             b = np.expand_dims(b, axis=-2)
         dist = super().predict(A, b)
-        return mixture_normal(self.logA, dist.mean, dist.cov, dist.shape, dist.dim)
+        return mixture_normal(
+            self.logA, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal_cov
+        )
 
     def marginalise(self, indices):
         """Marginalise over indices.
@@ -372,7 +399,9 @@ def marginalise(self, indices):
         mixture_normal shape (*shape, dim - len(indices))
         """
         dist = super().marginalise(indices)
-        return mixture_normal(self.logA, dist.mean, dist.cov, self.shape, dist.dim)
+        return mixture_normal(
+            self.logA, dist.mean, dist.cov, self.shape, dist.dim, dist.diagonal_cov
+        )
 
     def condition(self, indices, values):
         """Condition on indices with values.
@@ -392,7 +421,9 @@ def condition(self, indices, values):
         """
         dist = super().condition(indices, values[..., None, :])
         logA = self.marginalise(self._bar(indices)).weights(values)
-        return mixture_normal(logA, dist.mean, dist.cov, dist.shape, dist.dim)
+        return mixture_normal(
+            logA, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal_cov
+        )
 
     def weights(self, values):
         """Compute the conditional weights of the mixture.
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index ba6604f..88749f6 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -11,55 +11,60 @@
 @pytest.mark.parametrize("dim", dims)
 @pytest.mark.parametrize("shape", shapes)
 @pytest.mark.parametrize("mean_shape", shapes + ["scalar"])
-@pytest.mark.parametrize("cov_shape", shapes + ["scalar", "vector"])
+@pytest.mark.parametrize("cov_shape", shapes + ["scalar"])
+@pytest.mark.parametrize("diagonal_cov", [True, False])
 class TestMultivariateNormal(object):
     cls = multivariate_normal
 
-    def random(self, dim, shape, mean_shape, cov_shape):
+    def random(self, dim, shape, mean_shape, cov_shape, diagonal_cov):
         if mean_shape == "scalar":
             mean = np.random.randn()
         else:
             mean = np.random.randn(*mean_shape, dim)
+
         if cov_shape == "scalar":
             cov = np.random.randn() ** 2
-        elif cov_shape == "vector":
-            cov = np.random.randn(dim) ** 2
+        elif diagonal_cov:
+            cov = np.random.randn(*cov_shape, dim) ** 2
         else:
             cov = np.random.randn(*cov_shape, dim, dim)
             cov = np.einsum("...ij,...kj->...ik", cov, cov) + dim * np.eye(dim)
-        dist = self.cls(mean, cov, shape, dim)
+
+        dist = self.cls(mean, cov, shape, dim, diagonal_cov)
 
         assert dist.dim == dim
         assert dist.shape == np.broadcast_shapes(
-            shape, np.shape(np.atleast_1d(mean))[:-1], np.shape(np.atleast_2d(cov))[:-2]
+            shape, np.shape(mean)[:-1], np.shape(cov)[: -2 + diagonal_cov]
         )
         assert np.all(dist.mean == mean)
         assert np.all(dist.cov == cov)
         return dist
 
     @pytest.mark.parametrize("size", sizes)
-    def test_logpdf(self, dim, shape, mean_shape, cov_shape, size):
-        dist = self.random(dim, shape, mean_shape, cov_shape)
+    def test_logpdf(self, dim, shape, mean_shape, cov_shape, diagonal_cov, size):
+        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
         x = np.random.randn(*size, dim)
         logpdf = dist.logpdf(x)
         assert logpdf.shape == size + dist.shape
 
     @pytest.mark.parametrize("size", sizes)
-    def test_rvs(self, dim, shape, mean_shape, cov_shape, size):
-        dist = self.random(dim, shape, mean_shape, cov_shape)
+    def test_rvs(self, dim, shape, mean_shape, cov_shape, diagonal_cov, size):
+        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
         x = dist.rvs(size)
         assert x.shape == size + dist.shape + (dim,)
 
     @pytest.mark.parametrize("A_shape", shapes + ["vector", "scalar"])
     @pytest.mark.parametrize("b_shape", shapes + ["scalar"])
     @pytest.mark.parametrize("k", dims)
-    def test_predict(self, dim, shape, mean_shape, cov_shape, k, A_shape, b_shape):
+    def test_predict(
+        self, dim, shape, mean_shape, cov_shape, diagonal_cov, k, A_shape, b_shape
+    ):
         if (A_shape == "vector" or A_shape == "scalar") and (
             b_shape != "scalar" or k != dim
         ):
             pytest.skip("Non broadcastable A and b")
 
-        dist = self.random(dim, shape, mean_shape, cov_shape)
+        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
 
         if b_shape == "scalar":
             b = np.random.randn()
@@ -78,8 +83,8 @@ def test_predict(self, dim, shape, mean_shape, cov_shape, k, A_shape, b_shape):
         assert dist_2.shape == np.broadcast_shapes(
             dist.shape, np.shape(A)[:-2], np.shape(b)[:-1]
         )
-        assert np.shape(dist_2.cov)[:-2] == np.broadcast_shapes(
-            np.shape(dist.cov)[:-2], np.shape(A)[:-2]
+        assert np.shape(dist_2.cov)[: -2 + diagonal_cov] == np.broadcast_shapes(
+            np.shape(dist.cov)[: -2 + diagonal_cov], np.shape(A)[:-2]
         )
         assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
             np.shape(dist.mean)[:-1], np.shape(A)[:-2], np.shape(b)[:-1]
@@ -89,8 +94,8 @@ def test_predict(self, dim, shape, mean_shape, cov_shape, k, A_shape, b_shape):
         dist_2 = dist.predict(A)
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == np.broadcast_shapes(dist.shape, np.shape(A)[:-2])
-        assert np.shape(dist_2.cov)[:-2] == np.broadcast_shapes(
-            np.shape(dist.cov)[:-2], np.shape(A)[:-2]
+        assert np.shape(dist_2.cov)[: -2 + diagonal_cov] == np.broadcast_shapes(
+            np.shape(dist.cov)[: -2 + diagonal_cov], np.shape(A)[:-2]
         )
         assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
             np.shape(dist.mean)[:-1], np.shape(A)[:-2]
@@ -98,44 +103,54 @@ def test_predict(self, dim, shape, mean_shape, cov_shape, k, A_shape, b_shape):
         assert dist_2.dim == k
 
     @pytest.mark.parametrize("p", dims)
-    def test_marginalise(self, dim, shape, mean_shape, cov_shape, p):
+    def test_marginalise(self, dim, shape, mean_shape, cov_shape, diagonal_cov, p):
         if dim < p:
             pytest.skip("dim < p")
         i = np.random.choice(dim, p, replace=False)
-        dist = self.random(dim, shape, mean_shape, cov_shape)
+        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
         dist_2 = dist.marginalise(i)
 
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == dist.shape
-        assert np.shape(dist_2.cov)[:-2] == np.shape(dist.cov)[:-2]
+        assert (
+            np.shape(dist_2.cov)[: -2 + diagonal_cov]
+            == np.shape(dist.cov)[: -2 + diagonal_cov]
+        )
         assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
         assert dist_2.dim == dim - p
 
     @pytest.mark.parametrize("values_shape", shapes)
     @pytest.mark.parametrize("p", dims)
-    def test_condition(self, dim, shape, mean_shape, cov_shape, p, values_shape):
+    def test_condition(
+        self, dim, shape, mean_shape, cov_shape, diagonal_cov, p, values_shape
+    ):
         if dim < p:
             pytest.skip("dim < p")
 
         indices = np.random.choice(dim, p, replace=False)
         values = np.random.randn(*values_shape, p)
-        dist = self.random(dim, shape, mean_shape, cov_shape)
+        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
         dist_2 = dist.condition(indices, values)
 
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == np.broadcast_shapes(dist.shape, values_shape)
-        assert np.shape(dist_2.cov)[:-2] == np.shape(dist.cov)[:-2]
+        assert (
+            np.shape(dist_2.cov)[: -2 + diagonal_cov]
+            == np.shape(dist.cov)[: -2 + diagonal_cov]
+        )
         if cov_shape == "scalar" or cov_shape == "vector":
             assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
         else:
             assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
-                np.shape(dist.mean)[:-1], np.shape(dist.cov)[:-2], values_shape
+                np.shape(dist.mean)[:-1],
+                np.shape(dist.cov)[: -2 + diagonal_cov],
+                values_shape,
             )
         assert dist_2.dim == dim - p
 
     @pytest.mark.parametrize("x_shape", shapes)
-    def test_bijector(self, dim, shape, mean_shape, cov_shape, x_shape):
-        dist = self.random(dim, shape, mean_shape, cov_shape)
+    def test_bijector(self, dim, shape, mean_shape, cov_shape, diagonal_cov, x_shape):
+        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
         x = np.random.rand(*x_shape, dim)
         y = dist.bijector(x)
         assert y.shape == np.broadcast_shapes(dist.shape + (dim,), x.shape)
@@ -150,11 +165,12 @@ def test_bijector(self, dim, shape, mean_shape, cov_shape, x_shape):
 @pytest.mark.parametrize("shape", shapes)
 @pytest.mark.parametrize("logA_shape", shapes)
 @pytest.mark.parametrize("mean_shape", shapes + ["scalar"])
-@pytest.mark.parametrize("cov_shape", shapes + ["scalar", "vector"])
+@pytest.mark.parametrize("cov_shape", shapes + ["scalar"])
+@pytest.mark.parametrize("diagonal_cov", [True, False])
 class TestMixtureNormal(object):
     cls = mixture_normal
 
-    def random(self, dim, shape, logA_shape, mean_shape, cov_shape):
+    def random(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov):
         logA = np.random.randn(*logA_shape)
         if mean_shape == "scalar":
             mean = np.random.randn()
@@ -163,20 +179,20 @@ def random(self, dim, shape, logA_shape, mean_shape, cov_shape):
 
         if cov_shape == "scalar":
             cov = np.random.randn() ** 2
-        elif cov_shape == "vector":
-            cov = np.random.randn(dim) ** 2
+        elif diagonal_cov:
+            cov = np.random.randn(*cov_shape, dim) ** 2
         else:
             cov = np.random.randn(*cov_shape, dim, dim)
             cov = np.einsum("...ij,...kj->...ik", cov, cov) + dim * np.eye(dim)
 
-        dist = self.cls(logA, mean, cov, shape, dim)
+        dist = self.cls(logA, mean, cov, shape, dim, diagonal_cov)
 
         assert dist.dim == dim
         assert dist.shape == np.broadcast_shapes(
             shape,
             logA_shape,
-            np.shape(np.atleast_1d(mean))[:-1],
-            np.shape(np.atleast_2d(cov))[:-2],
+            np.shape(mean)[:-1],
+            np.shape(cov)[:-2],
         )
         assert np.all(dist.logA == logA)
         assert np.all(dist.mean == mean)
@@ -184,15 +200,19 @@ def random(self, dim, shape, logA_shape, mean_shape, cov_shape):
         return dist
 
     @pytest.mark.parametrize("size", sizes)
-    def test_logpdf(self, dim, shape, logA_shape, mean_shape, cov_shape, size):
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape)
+    def test_logpdf(
+        self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov, size
+    ):
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
         x = np.random.randn(*size, dim)
         logpdf = dist.logpdf(x)
         assert logpdf.shape == size + dist.shape[:-1]
 
     @pytest.mark.parametrize("size", sizes)
-    def test_rvs(self, dim, shape, logA_shape, mean_shape, cov_shape, size):
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape)
+    def test_rvs(
+        self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov, size
+    ):
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
         x = dist.rvs(size)
         assert x.shape == size + dist.shape[:-1] + (dim,)
 
@@ -200,14 +220,23 @@ def test_rvs(self, dim, shape, logA_shape, mean_shape, cov_shape, size):
     @pytest.mark.parametrize("b_shape", shapes + ["scalar"])
     @pytest.mark.parametrize("k", dims)
     def test_predict(
-        self, dim, shape, logA_shape, mean_shape, cov_shape, k, A_shape, b_shape
+        self,
+        dim,
+        shape,
+        logA_shape,
+        mean_shape,
+        cov_shape,
+        diagonal_cov,
+        k,
+        A_shape,
+        b_shape,
     ):
         if (A_shape == "vector" or A_shape == "scalar") and (
             b_shape != "scalar" or k != dim
         ):
             pytest.skip("Non broadcastable A and b")
 
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape)
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
 
         if b_shape == "scalar":
             b = np.random.randn()
@@ -228,8 +257,8 @@ def test_predict(
             np.shape(np.atleast_2d(A))[:-2],
             np.shape(np.atleast_1d(b))[:-1],
         )
-        assert np.shape(dist_2.cov)[:-3] == np.broadcast_shapes(
-            np.shape(dist.cov)[:-3], np.shape(np.atleast_2d(A))[:-2]
+        assert np.shape(dist_2.cov)[: -3 + diagonal_cov] == np.broadcast_shapes(
+            np.shape(dist.cov)[: -3 + diagonal_cov], np.shape(np.atleast_2d(A))[:-2]
         )
         assert np.shape(dist_2.mean)[:-2] == np.broadcast_shapes(
             np.shape(dist.mean)[:-2],
@@ -243,8 +272,8 @@ def test_predict(
         assert dist_2.shape[:-1] == np.broadcast_shapes(
             dist.shape[:-1], np.shape(np.atleast_2d(A))[:-2]
         )
-        assert np.shape(dist_2.cov)[:-3] == np.broadcast_shapes(
-            np.shape(dist.cov)[:-3], np.shape(np.atleast_2d(A))[:-2]
+        assert np.shape(dist_2.cov)[: -3 + diagonal_cov] == np.broadcast_shapes(
+            np.shape(dist.cov)[: -3 + diagonal_cov], np.shape(np.atleast_2d(A))[:-2]
         )
         assert np.shape(dist_2.mean)[:-2] == np.broadcast_shapes(
             np.shape(dist.mean)[:-2], np.shape(np.atleast_2d(A))[:-2]
@@ -252,16 +281,21 @@ def test_predict(
         assert dist_2.dim == k
 
     @pytest.mark.parametrize("p", dims)
-    def test_marginalise(self, dim, shape, logA_shape, mean_shape, cov_shape, p):
+    def test_marginalise(
+        self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov, p
+    ):
         if dim < p:
             pytest.skip("dim < p")
         i = np.random.choice(dim, p, replace=False)
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape)
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
         dist_2 = dist.marginalise(i)
 
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == dist.shape
-        assert np.shape(dist_2.cov)[:-2] == np.shape(dist.cov)[:-2]
+        assert (
+            np.shape(dist_2.cov)[: -2 + diagonal_cov]
+            == np.shape(dist.cov)[: -2 + diagonal_cov]
+        )
         assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
         assert np.shape(dist_2.logA) == np.shape(dist.logA)
         assert dist_2.dim == dim - p
@@ -269,32 +303,45 @@ def test_marginalise(self, dim, shape, logA_shape, mean_shape, cov_shape, p):
     @pytest.mark.parametrize("values_shape", shapes)
     @pytest.mark.parametrize("p", dims)
     def test_condition(
-        self, dim, shape, logA_shape, mean_shape, cov_shape, p, values_shape
+        self,
+        dim,
+        shape,
+        logA_shape,
+        mean_shape,
+        cov_shape,
+        diagonal_cov,
+        p,
+        values_shape,
     ):
         if dim < p:
             pytest.skip("dim < p")
         indices = np.random.choice(dim, p, replace=False)
         values = np.random.randn(*values_shape[:-1], p)
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape)
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
         dist_2 = dist.condition(indices, values)
 
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == np.broadcast_shapes(dist.shape, values_shape[:-1] + (1,))
-        assert np.shape(dist_2.cov)[:-2] == np.shape(dist.cov)[:-2]
+        assert (
+            np.shape(dist_2.cov)[: -2 + diagonal_cov]
+            == np.shape(dist.cov)[: -2 + diagonal_cov]
+        )
         if cov_shape == "scalar" or cov_shape == "vector":
             assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
         else:
             assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
                 np.shape(dist.mean)[:-1],
-                np.shape(dist.cov)[:-2],
+                np.shape(dist.cov)[: -2 + diagonal_cov],
                 values_shape[:-1] + (1,),
             )
         assert np.shape(dist_2.logA) == dist_2.shape
         assert dist_2.dim == dim - p
 
     @pytest.mark.parametrize("x_shape", shapes)
-    def test_bijector(self, dim, shape, logA_shape, mean_shape, cov_shape, x_shape):
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape)
+    def test_bijector(
+        self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov, x_shape
+    ):
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
         x = np.random.rand(*x_shape[:-1], dim)
         y = dist.bijector(x)
         assert y.shape == np.broadcast_shapes(x.shape, dist.shape[:-1] + (dim,))

From 03881eb928e71165a3fdfe4f3728445523cb05d1 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sat, 6 Jan 2024 15:31:02 +0000
Subject: [PATCH 028/117] Removed special treatment of M for now

---
 lsbi/model_1.py       | 24 +++++++++++-------------
 tests/test_model_1.py | 41 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 51 insertions(+), 14 deletions(-)

diff --git a/lsbi/model_1.py b/lsbi/model_1.py
index 33d3e5b..273464c 100644
--- a/lsbi/model_1.py
+++ b/lsbi/model_1.py
@@ -123,14 +123,12 @@ def likelihood(self, theta):
         ----------
         theta : array_like, shape (k, n)
         """
-        #        if len(np.shape(self.M)) > 1:
-        #            M = self.M
-        #        else:
-        #            M = self.M * np.eye(self.d, self.n)
         if len(np.shape(self.M)) > 1:
-            mu = self.m + np.einsum("...ja,...a->...j", self.M, theta)
+            M = self.M
         else:
-            mu = self.m + self.M * theta
+            M = self.M * np.eye(self.d, self.n)
+
+        mu = self.m + np.einsum("...ja,...a->...j", M, theta)
         return multivariate_normal(mu, self.C, self.shape, self.d)
 
     def prior(self):
@@ -153,9 +151,9 @@ def posterior(self, D):
         if len(np.shape(self.M)) > 1:
             values = D - self.m - np.einsum("...ja,...a->...j", self.M, self.mu)
 
-            if len(self.shape(self.C)) > 1:
+            if len(np.shape(self.C)) > 1:
                 MinvCM = np.einsum(
-                    "...ja,...ab,...kb->...jk", self.M, inv(self.C), self.M
+                    "...aj,...ab,...bk->...jk", self.M, inv(self.C), self.M
                 )
             else:
                 MinvCM = np.einsum(
@@ -182,7 +180,7 @@ def posterior(self, D):
             values = D * np.ones(self.d)
             values[: self.n] = values[: self.n] - self.m - self.M * self.mu
 
-            if len(self.shape(self.C)) > 1:
+            if len(np.shape(self.C)) > 1:
                 MinvCM = (
                     np.atleast_1d(self.M)[..., None]
                     * inv(self.C)
@@ -261,13 +259,13 @@ def joint(self):
         mu = np.block([evidence.mean * np.ones(self.d), prior.mean * np.ones(self.n)])
         corr = np.einsum(
             "...ja,...al->...jl",
-            np.atleast_2d(self.M) * np.eye(n, d),
-            np.atleast_2d(self.Sigma) * n.eye(n),
+            np.atleast_2d(self.M) * np.eye(self.n, self.d),
+            np.atleast_2d(self.Sigma) * np.eye(self.n),
         )
         Sigma = np.block(
             [
-                [np.atleast_2d(evidence.cov) * np.eye(d), corr],
-                [np.moveaxis(corr, -1, -2), np.atleast_2d(prior.cov) * np.eye(n)],
+                [np.atleast_2d(evidence.cov) * np.eye(self.d), corr],
+                [np.moveaxis(corr, -1, -2), np.atleast_2d(prior.cov) * np.eye(self.n)],
             ]
         )
         return multivariate_normal(mu, Sigma, self.shape, len(mu))
diff --git a/tests/test_model_1.py b/tests/test_model_1.py
index 73e87f2..3cfe3e4 100644
--- a/tests/test_model_1.py
+++ b/tests/test_model_1.py
@@ -82,4 +82,43 @@ def test_likelihood(
         )
         theta = np.random.randn(*theta_shape, n)
         dist = model.likelihood(theta)
-        assert dist.shape == np.broadcast_shapes(shape, theta_shape)
+        assert dist.shape == np.broadcast_shapes(model.shape, theta_shape)
+        assert dist.dim == model.d
+
+    def test_prior(self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d):
+        model = self.random(
+            M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+        )
+        dist = model.prior()
+        assert dist.shape == model.shape
+        assert dist.dim == model.n
+
+    @pytest.mark.parametrize("D_shape", shapes)
+    def test_posterior(
+        self, D_shape, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+    ):
+        model = self.random(
+            M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+        )
+        D = np.random.randn(*D_shape, d)
+        dist = model.posterior(D)
+        assert dist.shape == np.broadcast_shapes(model.shape, D_shape)
+        assert dist.dim == model.n
+
+    def test_evidence(
+        self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+    ):
+        model = self.random(
+            M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+        )
+        dist = model.evidence()
+        assert dist.shape == model.shape
+        assert dist.dim == model.d
+
+    def test_joint(self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d):
+        model = self.random(
+            M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+        )
+        dist = model.joint()
+        assert dist.shape == model.shape
+        assert dist.dim == model.n + model.d

From c9fe98a68543f969bf93d1fac0794a97ba6a784d Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sat, 6 Jan 2024 17:00:00 +0000
Subject: [PATCH 029/117] Tests now passing

---
 lsbi/model_1.py       | 158 ++++++++++++++----------------------------
 lsbi/utils.py         |   8 +++
 tests/test_model_1.py |   9 ++-
 3 files changed, 67 insertions(+), 108 deletions(-)

diff --git a/lsbi/model_1.py b/lsbi/model_1.py
index 273464c..50c3b95 100644
--- a/lsbi/model_1.py
+++ b/lsbi/model_1.py
@@ -3,7 +3,7 @@
 from numpy.linalg import inv, solve
 
 from lsbi.stats_1 import mixture_normal, multivariate_normal
-from lsbi.utils import logdet
+from lsbi.utils import logdet, matrix
 
 
 class LinearModel(object):
@@ -123,11 +123,7 @@ def likelihood(self, theta):
         ----------
         theta : array_like, shape (k, n)
         """
-        if len(np.shape(self.M)) > 1:
-            M = self.M
-        else:
-            M = self.M * np.eye(self.d, self.n)
-
+        M = matrix(self.M, self.d, self.n)
         mu = self.m + np.einsum("...ja,...a->...j", M, theta)
         return multivariate_normal(mu, self.C, self.shape, self.d)
 
@@ -148,65 +144,35 @@ def posterior(self, D):
         ----------
         D : array_like, shape (d,)
         """
-        if len(np.shape(self.M)) > 1:
-            values = D - self.m - np.einsum("...ja,...a->...j", self.M, self.mu)
-
-            if len(np.shape(self.C)) > 1:
-                MinvCM = np.einsum(
-                    "...aj,...ab,...bk->...jk", self.M, inv(self.C), self.M
-                )
-            else:
-                MinvCM = np.einsum(
-                    "...ja,...kb->...jk", self.M, self.M / np.array(self.C)[:, None]
-                )
-
-            if len(np.shape(self.Sigma)) > 1:
-                Sigma = inv(inv(self.Sigma) + MinvCM)
-            else:
-                Sigma = inv(np.eye(self.d) / self.Sigma + MinvCM)
-
-            if len(np.shape(self.C)) > 1:
-                mu = self.mu + np.einsum(
-                    "...ja,...ba,...bc,...c->...j", Sigma, self.M, inv(self.C), values
-                )
-            else:
-                mu = self.mu + np.einsum(
-                    "...ja,...ac,...c->...j",
-                    Sigma,
-                    self.M / np.array(self.C)[:, None],
-                    values,
-                )
+        M = matrix(self.M, self.d, self.n)
+
+        if len(np.shape(self.C)) > 1:
+            MinvCM = np.einsum("...aj,...ab,...bk->...jk", M, inv(self.C), M)
+        else:
+            MinvCM = np.einsum(
+                "...aj,...bk->...jk", M, M / np.atleast_1d(self.C)[:, None]
+            )
+
+        if len(np.shape(self.Sigma)) > 1:
+            Sigma = inv(inv(self.Sigma) + MinvCM)
         else:
-            values = D * np.ones(self.d)
-            values[: self.n] = values[: self.n] - self.m - self.M * self.mu
-
-            if len(np.shape(self.C)) > 1:
-                MinvCM = (
-                    np.atleast_1d(self.M)[..., None]
-                    * inv(self.C)
-                    * np.atleast_1d(self.M)[..., None, :]
-                )
-                if len(np.shape(self.Sigma)) > 1:
-                    Sigma = inv(inv(self.Sigma) + MinvCM)
-                else:
-                    Sigma = inv(np.eye(self.d) / self.Sigma + MinvCM)
-
-                mu = self.mu + np.einsum(
-                    "...ja,...ba,...bc,...c->...j", Sigma, self.M, inv(self.C), values
-                )
-            else:
-                MinvCM = self.M / np.atleast_1d(self.C)[: self.n] * self.M
-                if len(np.shape(self.Sigma)) > 1:
-                    Sigma = inv(inv(self.Sigma) + np.eye(self.n) * MinvCM)
-                else:
-                    Sigma = 1 / (1 / self.Sigma + MinvCM)
-
-                mu = self.mu + np.einsum(
-                    "...ja,...ac,...c->...j",
-                    Sigma,
-                    self.M / np.atleast_1d(self.C)[: self.n],
-                    values,
-                )
+            Sigma = inv(np.eye(self.n) / self.Sigma + MinvCM)
+
+        values = (
+            D - self.m - np.einsum("...ja,...a->...j", M, self.mu * np.ones(self.n))
+        )
+
+        if len(np.shape(self.C)) > 1:
+            mu = self.mu + np.einsum(
+                "...ja,...ba,...bc,...c->...j", Sigma, M, inv(self.C), values
+            )
+        else:
+            mu = self.mu + np.einsum(
+                "...ja,...ca,...c->...j",
+                Sigma,
+                M / np.atleast_1d(self.C)[:, None],
+                values,
+            )
 
         return multivariate_normal(mu, Sigma, self.shape, self.n)
 
@@ -215,37 +181,16 @@ def evidence(self):
 
         D ~ N( m + M mu, C + M Sigma M' )
         """
-        if len(np.shape(self.M)) > 1:
-            mu = self.m + np.einsum("...ja,...a->...j", self.M, self.mu)
-
-            if len(np.shape(self.Sigma)) > 1:
-                Sigma = np.einsum(
-                    "...ja,...ab,...kb->...jk", self.M, self.Sigma, self.M
-                )
-            else:
-                Sigma = np.einsum("...ja,...kb->...jk", self.M, self.Sigma * self.M)
-            if len(np.shape(self.C)) > 1:
-                Sigma = self.C + Sigma
-            else:
-                Sigma = self.C * np.eye(self.d) + Sigma
+        M = matrix(self.M, self.d, self.n)
+        mu = self.m + np.einsum("...ja,...a->...j", M, self.mu * np.ones(self.n))
+        if len(np.shape(self.Sigma)) > 1:
+            Sigma = np.einsum("...ja,...ab,...kb->...jk", M, self.Sigma, M)
         else:
-            mu = self.m * np.ones(self.d)
-            mu[: self.n] = mu[: self.n] + self.M * self.mu
-            Sigma = self.C
-
-            if len(np.shape(self.Sigma)) > 1 or len(np.shape(self.C)) > 1:
-                if len(np.shape(self.C)) <= 1:
-                    Sigma = Sigma * np.eye(self.d)
-                Sigma[: self.n, : self.n] = (
-                    Sigma[: self.n, : self.n]
-                    + np.atleast_1d(self.M)[..., None]
-                    * self.Sigma
-                    * np.atleast_1d(self.M)[..., None, :]
-                )
-            else:
-                Sigma = Sigma * np.ones(self.d)
-                Sigma[: self.n] = Sigma[: self.n] + self.M * self.Sigma * self.M
-
+            Sigma = np.einsum("...ja,...kb->...jk", M, self.Sigma * M)
+        if len(np.shape(self.C)) > 1:
+            Sigma = self.C + Sigma
+        else:
+            Sigma = self.C * np.eye(self.d) + Sigma
         return multivariate_normal(mu, Sigma, self.shape, self.d)
 
     def joint(self):
@@ -256,19 +201,18 @@ def joint(self):
         """
         evidence = self.evidence()
         prior = self.prior()
-        mu = np.block([evidence.mean * np.ones(self.d), prior.mean * np.ones(self.n)])
-        corr = np.einsum(
-            "...ja,...al->...jl",
-            np.atleast_2d(self.M) * np.eye(self.n, self.d),
-            np.atleast_2d(self.Sigma) * np.eye(self.n),
-        )
-        Sigma = np.block(
-            [
-                [np.atleast_2d(evidence.cov) * np.eye(self.d), corr],
-                [np.moveaxis(corr, -1, -2), np.atleast_2d(prior.cov) * np.eye(self.n)],
-            ]
-        )
-        return multivariate_normal(mu, Sigma, self.shape, len(mu))
+        a = np.broadcast_to(evidence.mean, self.shape + (self.d,))
+        b = np.broadcast_to(prior.mean, self.shape + (self.n,))
+        mu = np.block([a, b])
+        M = matrix(self.M, self.d, self.n)
+        Sigma = matrix(self.Sigma, self.n)
+        corr = np.einsum("...ja,...al->...jl", M, Sigma)
+        A = np.broadcast_to(matrix(evidence.cov, self.d), self.shape + (self.d, self.d))
+        D = np.broadcast_to(matrix(prior.cov, self.n), self.shape + (self.n, self.n))
+        B = np.broadcast_to(corr, self.shape + (self.d, self.n))
+        C = np.moveaxis(B, -1, -2)
+        Sigma = np.block([[A, B], [C, D]])
+        return multivariate_normal(mu, Sigma, self.shape, self.n + self.d)
 
 
 class LinearMixtureModel(object):
diff --git a/lsbi/utils.py b/lsbi/utils.py
index 09ce54c..514149d 100644
--- a/lsbi/utils.py
+++ b/lsbi/utils.py
@@ -13,6 +13,14 @@ def quantise(f, x, tol=1e-8):
     return np.where(np.abs(y) < tol, 0, y)
 
 
+def matrix(M, *args):
+    """Convert M to a matrix."""
+    if len(np.shape(M)) > 1:
+        return M
+    else:
+        return M * np.eye(*args)
+
+
 def bisect(f, a, b, args=(), tol=1e-8):
     """Vectorised simple bisection search.
 
diff --git a/tests/test_model_1.py b/tests/test_model_1.py
index 3cfe3e4..72313df 100644
--- a/tests/test_model_1.py
+++ b/tests/test_model_1.py
@@ -4,7 +4,6 @@
 from lsbi.model_1 import LinearMixtureModel, LinearModel
 
 shapes = [(2, 3), (3,), ()]
-sizes = [(6, 5), (5,), ()]
 dims = [1, 2, 4]
 
 
@@ -116,6 +115,14 @@ def test_evidence(
         assert dist.dim == model.d
 
     def test_joint(self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d):
+        M_shape = (3,)
+        m_shape = (3,)
+        C_shape = (3,)
+        mu_shape = (3,)
+        Sigma_shape = (3,)
+        shape = (3,)
+        n = 1
+        d = 1
         model = self.random(
             M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
         )

From 6ecffaa4e3582237ec8641ba9ef006f18b69d90a Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sat, 6 Jan 2024 19:14:24 +0000
Subject: [PATCH 030/117] Reinstated more efficient calculations

---
 lsbi/model_1.py       | 67 +++++++++++++++++++++++++------------------
 tests/test_model_1.py |  8 ------
 2 files changed, 39 insertions(+), 36 deletions(-)

diff --git a/lsbi/model_1.py b/lsbi/model_1.py
index 50c3b95..ccbf495 100644
--- a/lsbi/model_1.py
+++ b/lsbi/model_1.py
@@ -145,34 +145,38 @@ def posterior(self, D):
         D : array_like, shape (d,)
         """
         M = matrix(self.M, self.d, self.n)
-
-        if len(np.shape(self.C)) > 1:
-            MinvCM = np.einsum("...aj,...ab,...bk->...jk", M, inv(self.C), M)
-        else:
-            MinvCM = np.einsum(
-                "...aj,...bk->...jk", M, M / np.atleast_1d(self.C)[:, None]
-            )
-
-        if len(np.shape(self.Sigma)) > 1:
-            Sigma = inv(inv(self.Sigma) + MinvCM)
-        else:
-            Sigma = inv(np.eye(self.n) / self.Sigma + MinvCM)
-
         values = (
             D - self.m - np.einsum("...ja,...a->...j", M, self.mu * np.ones(self.n))
         )
 
-        if len(np.shape(self.C)) > 1:
+        if (
+            len(np.shape(self.Sigma)) > 1
+            or len(np.shape(self.C)) > 1
+            or len(np.shape(self.M)) > 1
+        ):
+            if len(np.shape(self.C)) > 1:
+                invC = inv(self.C)
+            else:
+                invC = np.eye(self.d) / self.C
+
+            if len(np.shape(self.Sigma)) > 1:
+                invSigma = inv(self.Sigma)
+            else:
+                invSigma = np.eye(self.n) / self.Sigma
+
+            Sigma = inv(invSigma + np.einsum("...aj,...ab,...bk->...jk", M, invC, M))
             mu = self.mu + np.einsum(
-                "...ja,...ba,...bc,...c->...j", Sigma, M, inv(self.C), values
+                "...ja,...ba,...bc,...c->...j", Sigma, M, invC, values
             )
         else:
-            mu = self.mu + np.einsum(
-                "...ja,...ca,...c->...j",
-                Sigma,
-                M / np.atleast_1d(self.C)[:, None],
-                values,
-            )
+            dim = min(self.n, self.d)
+            C = np.atleast_1d(self.C)[:dim]
+            M = np.atleast_1d(self.M)[:dim]
+            Sigma = np.ones(self.n) * self.Sigma
+            Sigma[:dim] = 1 / (1 / Sigma[:dim] + M**2 / C)
+
+            mu = np.broadcast_to(self.mu, values.shape[:-1] + (self.n,)).copy()
+            mu[..., :dim] = mu[..., :dim] + Sigma[:dim] * M / C * values[..., :dim]
 
         return multivariate_normal(mu, Sigma, self.shape, self.n)
 
@@ -183,14 +187,21 @@ def evidence(self):
         """
         M = matrix(self.M, self.d, self.n)
         mu = self.m + np.einsum("...ja,...a->...j", M, self.mu * np.ones(self.n))
-        if len(np.shape(self.Sigma)) > 1:
-            Sigma = np.einsum("...ja,...ab,...kb->...jk", M, self.Sigma, M)
-        else:
-            Sigma = np.einsum("...ja,...kb->...jk", M, self.Sigma * M)
-        if len(np.shape(self.C)) > 1:
-            Sigma = self.C + Sigma
+        if (
+            len(np.shape(self.Sigma)) > 1
+            or len(np.shape(self.C)) > 1
+            or len(np.shape(self.M)) > 1
+        ):
+            Sigma = matrix(self.C, self.d) + np.einsum(
+                "...ja,...ab,...kb->...jk", M, matrix(self.Sigma, self.n), M
+            )
         else:
-            Sigma = self.C * np.eye(self.d) + Sigma
+            dim = min(self.n, self.d)
+            Sigma = self.C * np.ones(self.d)
+            M = np.atleast_1d(self.M)[:dim]
+            S = np.atleast_1d(self.Sigma)[:dim]
+            Sigma[:dim] += S * M**2
+
         return multivariate_normal(mu, Sigma, self.shape, self.d)
 
     def joint(self):
diff --git a/tests/test_model_1.py b/tests/test_model_1.py
index 72313df..cb5ca72 100644
--- a/tests/test_model_1.py
+++ b/tests/test_model_1.py
@@ -115,14 +115,6 @@ def test_evidence(
         assert dist.dim == model.d
 
     def test_joint(self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d):
-        M_shape = (3,)
-        m_shape = (3,)
-        C_shape = (3,)
-        mu_shape = (3,)
-        Sigma_shape = (3,)
-        shape = (3,)
-        n = 1
-        d = 1
         model = self.random(
             M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
         )

From 1d29ff4c86766673c7dec7b958a8c1fd1dc993c8 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Mon, 8 Jan 2024 16:23:32 +0000
Subject: [PATCH 031/117] Major test suite completed

---
 lsbi/model_1.py       |   8 +--
 tests/test_model_1.py | 135 ++++++++++++++++++++++++++++++++++++++----
 2 files changed, 126 insertions(+), 17 deletions(-)

diff --git a/lsbi/model_1.py b/lsbi/model_1.py
index ccbf495..b4da3e1 100644
--- a/lsbi/model_1.py
+++ b/lsbi/model_1.py
@@ -226,7 +226,7 @@ def joint(self):
         return multivariate_normal(mu, Sigma, self.shape, self.n + self.d)
 
 
-class LinearMixtureModel(object):
+class LinearMixtureModel(LinearModel):
     """A linear mixture model.
 
     D|theta, A ~ N( m + M theta, C )
@@ -310,7 +310,7 @@ def shape(self):
     @property
     def k(self):
         """Number of mixture components of the distribution."""
-        return np.shape[-1]
+        return self.shape[-1]
 
     def likelihood(self, theta):
         """P(D|theta) as a scipy distribution object.
@@ -323,7 +323,7 @@ def likelihood(self, theta):
         ----------
         theta : array_like, shape (n,)
         """
-        dist = super().likelihood(theta)
+        dist = super().likelihood(theta[..., None, :])
         logA = self.prior().weights(theta)
         return mixture_normal(logA, dist.mean, dist.cov, dist.shape, dist.dim)
 
@@ -348,7 +348,7 @@ def posterior(self, D):
         ----------
         D : array_like, shape (d,)
         """
-        dist = super().posterior(D)
+        dist = super().posterior(D[..., None, :])
         logA = self.evidence().weights(D)
         return mixture_normal(logA, dist.mean, dist.cov, dist.shape, dist.dim)
 
diff --git a/tests/test_model_1.py b/tests/test_model_1.py
index cb5ca72..2452f1c 100644
--- a/tests/test_model_1.py
+++ b/tests/test_model_1.py
@@ -16,8 +16,6 @@
 @pytest.mark.parametrize("C_shape", shapes + ["scalar", "vector"])
 @pytest.mark.parametrize("Sigma_shape", shapes + ["scalar", "vector"])
 class TestLinearModel(object):
-    cls = LinearModel
-
     def random(self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d):
         if M_shape == "scalar":
             M = np.random.randn()
@@ -52,19 +50,9 @@ def random(self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d):
             Sigma = np.random.randn(*Sigma_shape, n, n)
             Sigma = np.einsum("...ij,...kj->...ik", Sigma, Sigma) + n * np.eye(n)
 
-        model = self.cls(M, m, C, mu, Sigma, shape, n, d)
-
+        model = LinearModel(M, m, C, mu, Sigma, shape, n, d)
         assert model.d == d
         assert model.n == n
-        model.prior()
-        assert model.shape == np.broadcast_shapes(
-            shape,
-            np.shape(np.atleast_2d(M))[:-2],
-            np.shape(np.atleast_1d(m))[:-1],
-            np.shape(np.atleast_1d(mu))[:-1],
-            np.shape(np.atleast_2d(C))[:-2],
-            np.shape(np.atleast_2d(Sigma))[:-2],
-        )
         assert np.all(model.M == M)
         assert np.all(model.m == m)
         assert np.all(model.C == C)
@@ -72,6 +60,19 @@ def random(self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d):
         assert np.all(model.Sigma == Sigma)
         return model
 
+    def test_init(self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d):
+        model = self.random(
+            M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+        )
+        assert model.shape == np.broadcast_shapes(
+            shape,
+            np.shape(np.atleast_2d(model.M))[:-2],
+            np.shape(np.atleast_1d(model.m))[:-1],
+            np.shape(np.atleast_1d(model.mu))[:-1],
+            np.shape(np.atleast_2d(model.C))[:-2],
+            np.shape(np.atleast_2d(model.Sigma))[:-2],
+        )
+
     @pytest.mark.parametrize("theta_shape", shapes)
     def test_likelihood(
         self, theta_shape, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
@@ -121,3 +122,111 @@ def test_joint(self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n,
         dist = model.joint()
         assert dist.shape == model.shape
         assert dist.dim == model.n + model.d
+
+
+@pytest.mark.parametrize("logA_shape", shapes)
+class TestLinearMixtureModel(TestLinearModel):
+    def random(
+        self, logA_shape, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+    ):
+        model = super().random(
+            M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+        )
+        logA = np.random.randn(*logA_shape)
+        model = LinearMixtureModel(
+            logA, model.M, model.m, model.C, model.mu, model.Sigma, shape, n, d
+        )
+        assert np.all(model.logA == logA)
+        return model
+
+    def test_init(
+        self, logA_shape, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+    ):
+        model = self.random(
+            logA_shape, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+        )
+        assert model.shape == np.broadcast_shapes(
+            shape,
+            np.shape(np.atleast_2d(model.M))[:-2],
+            np.shape(np.atleast_1d(model.m))[:-1],
+            np.shape(np.atleast_1d(model.mu))[:-1],
+            np.shape(np.atleast_2d(model.C))[:-2],
+            np.shape(np.atleast_2d(model.Sigma))[:-2],
+            np.shape(model.logA),
+        )
+
+    @pytest.mark.parametrize("theta_shape", shapes)
+    def test_likelihood(
+        self,
+        theta_shape,
+        M_shape,
+        m_shape,
+        C_shape,
+        mu_shape,
+        Sigma_shape,
+        logA_shape,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            logA_shape, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+        )
+        theta = np.random.randn(*theta_shape[:-1], n)
+        dist = model.likelihood(theta)
+        if model.shape != ():
+            assert dist.shape == np.broadcast_shapes(model.shape, theta_shape)
+        assert dist.dim == model.d
+
+    def test_prior(
+        self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, logA_shape, shape, n, d
+    ):
+        model = self.random(
+            logA_shape, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+        )
+        dist = model.prior()
+        assert dist.shape == model.shape
+        assert dist.dim == model.n
+
+    @pytest.mark.parametrize("D_shape", shapes)
+    def test_posterior(
+        self,
+        D_shape,
+        M_shape,
+        m_shape,
+        C_shape,
+        mu_shape,
+        Sigma_shape,
+        logA_shape,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            logA_shape, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+        )
+        D = np.random.randn(*D_shape[:-1], d)
+        dist = model.posterior(D)
+        if model.shape != ():
+            assert dist.shape == np.broadcast_shapes(model.shape, D_shape)
+        assert dist.dim == model.n
+
+    def test_evidence(
+        self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, logA_shape, shape, n, d
+    ):
+        model = self.random(
+            logA_shape, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+        )
+        dist = model.evidence()
+        assert dist.shape == model.shape
+        assert dist.dim == model.d
+
+    def test_joint(
+        self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, logA_shape, shape, n, d
+    ):
+        model = self.random(
+            logA_shape, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+        )
+        dist = model.joint()
+        assert dist.shape == model.shape
+        assert dist.dim == model.n + model.d

From 634cecf8f2326d5700c719e1e025e89270c1c72b Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Mon, 8 Jan 2024 23:34:13 +0000
Subject: [PATCH 032/117] Test suite now too large to fit in memory. Need a
 different strategy

---
 lsbi/stats_1.py       |   2 +-
 tests/test_model_1.py | 364 +++++++++++++++++++++++++++++++++++++-----
 2 files changed, 329 insertions(+), 37 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index 6372c86..3004f54 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -138,7 +138,7 @@ def predict(self, A=1, b=0):
         if len(np.shape(A)) > 1:
             mean = np.einsum("...qn,...n->...q", A, self.mean) + b
             if self.diagonal_cov:
-                cov = np.einsum("...qn,...pn->...qp", A, A * self.cov)
+                cov = np.einsum("...qn,...pn->...qp", A, A * self.cov[..., None, :])
                 diagonal_cov = False
             else:
                 cov = np.einsum("...qn,...nm,...pm->...qp", A, self.cov, A)
diff --git a/tests/test_model_1.py b/tests/test_model_1.py
index 2452f1c..fd29fd9 100644
--- a/tests/test_model_1.py
+++ b/tests/test_model_1.py
@@ -12,15 +12,31 @@
 @pytest.mark.parametrize("shape", shapes)
 @pytest.mark.parametrize("m_shape", shapes + ["scalar"])
 @pytest.mark.parametrize("mu_shape", shapes + ["scalar"])
-@pytest.mark.parametrize("M_shape", shapes + ["scalar", "vector"])
-@pytest.mark.parametrize("C_shape", shapes + ["scalar", "vector"])
-@pytest.mark.parametrize("Sigma_shape", shapes + ["scalar", "vector"])
+@pytest.mark.parametrize("M_shape", shapes + ["scalar"])
+@pytest.mark.parametrize("C_shape", shapes + ["scalar"])
+@pytest.mark.parametrize("Sigma_shape", shapes + ["scalar"])
+@pytest.mark.parametrize("diag_Sigma", [True, False])
+@pytest.mark.parametrize("diag_C", [True, False])
+@pytest.mark.parametrize("diag_M", [True, False])
 class TestLinearModel(object):
-    def random(self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d):
+    def random(
+        self,
+        M_shape,
+        diag_M,
+        m_shape,
+        C_shape,
+        diag_C,
+        mu_shape,
+        Sigma_shape,
+        diag_Sigma,
+        shape,
+        n,
+        d,
+    ):
         if M_shape == "scalar":
             M = np.random.randn()
-        elif M_shape == "vector":
-            M = np.random.randn(n)
+        elif diag_M:
+            M = np.random.randn(*M_shape, n)
         else:
             M = np.random.randn(*M_shape, d, n)
 
@@ -31,8 +47,8 @@ def random(self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d):
 
         if C_shape == "scalar":
             C = np.random.randn() ** 2
-        elif C_shape == "vector":
-            C = np.random.randn(d) ** 2
+        elif diag_C:
+            C = np.random.randn(*C_shape, d) ** 2
         else:
             C = np.random.randn(*C_shape, d, d)
             C = np.einsum("...ij,...kj->...ik", C, C) + d * np.eye(d)
@@ -44,8 +60,8 @@ def random(self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d):
 
         if Sigma_shape == "scalar":
             Sigma = np.random.randn() ** 2
-        elif Sigma_shape == "vector":
-            Sigma = np.random.randn(n) ** 2
+        elif diag_Sigma:
+            Sigma = np.random.randn(*Sigma_shape, n) ** 2
         else:
             Sigma = np.random.randn(*Sigma_shape, n, n)
             Sigma = np.einsum("...ij,...kj->...ik", Sigma, Sigma) + n * np.eye(n)
@@ -60,9 +76,32 @@ def random(self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d):
         assert np.all(model.Sigma == Sigma)
         return model
 
-    def test_init(self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d):
+    def test_init(
+        self,
+        M_shape,
+        diag_M,
+        m_shape,
+        C_shape,
+        diag_C,
+        mu_shape,
+        Sigma_shape,
+        diag_Sigma,
+        shape,
+        n,
+        d,
+    ):
         model = self.random(
-            M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+            M_shape,
+            diag_M,
+            m_shape,
+            C_shape,
+            diag_C,
+            mu_shape,
+            Sigma_shape,
+            diag_Sigma,
+            shape,
+            n,
+            d,
         )
         assert model.shape == np.broadcast_shapes(
             shape,
@@ -75,19 +114,64 @@ def test_init(self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n,
 
     @pytest.mark.parametrize("theta_shape", shapes)
     def test_likelihood(
-        self, theta_shape, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+        self,
+        theta_shape,
+        M_shape,
+        diag_M,
+        m_shape,
+        C_shape,
+        diag_C,
+        mu_shape,
+        Sigma_shape,
+        diag_Sigma,
+        shape,
+        n,
+        d,
     ):
         model = self.random(
-            M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+            M_shape,
+            diag_M,
+            m_shape,
+            C_shape,
+            diag_C,
+            mu_shape,
+            Sigma_shape,
+            diag_Sigma,
+            shape,
+            n,
+            d,
         )
         theta = np.random.randn(*theta_shape, n)
         dist = model.likelihood(theta)
         assert dist.shape == np.broadcast_shapes(model.shape, theta_shape)
         assert dist.dim == model.d
 
-    def test_prior(self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d):
+    def test_prior(
+        self,
+        M_shape,
+        diag_M,
+        m_shape,
+        C_shape,
+        diag_C,
+        mu_shape,
+        Sigma_shape,
+        diag_Sigma,
+        shape,
+        n,
+        d,
+    ):
         model = self.random(
-            M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+            M_shape,
+            diag_M,
+            m_shape,
+            C_shape,
+            diag_C,
+            mu_shape,
+            Sigma_shape,
+            diag_Sigma,
+            shape,
+            n,
+            d,
         )
         dist = model.prior()
         assert dist.shape == model.shape
@@ -95,10 +179,32 @@ def test_prior(self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n,
 
     @pytest.mark.parametrize("D_shape", shapes)
     def test_posterior(
-        self, D_shape, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+        self,
+        D_shape,
+        M_shape,
+        diag_M,
+        m_shape,
+        C_shape,
+        diag_C,
+        mu_shape,
+        Sigma_shape,
+        diag_Sigma,
+        shape,
+        n,
+        d,
     ):
         model = self.random(
-            M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+            M_shape,
+            diag_M,
+            m_shape,
+            C_shape,
+            diag_C,
+            mu_shape,
+            Sigma_shape,
+            diag_Sigma,
+            shape,
+            n,
+            d,
         )
         D = np.random.randn(*D_shape, d)
         dist = model.posterior(D)
@@ -106,18 +212,62 @@ def test_posterior(
         assert dist.dim == model.n
 
     def test_evidence(
-        self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+        self,
+        M_shape,
+        diag_M,
+        m_shape,
+        C_shape,
+        diag_C,
+        mu_shape,
+        Sigma_shape,
+        diag_Sigma,
+        shape,
+        n,
+        d,
     ):
         model = self.random(
-            M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+            M_shape,
+            diag_M,
+            m_shape,
+            C_shape,
+            diag_C,
+            mu_shape,
+            Sigma_shape,
+            diag_Sigma,
+            shape,
+            n,
+            d,
         )
         dist = model.evidence()
         assert dist.shape == model.shape
         assert dist.dim == model.d
 
-    def test_joint(self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d):
+    def test_joint(
+        self,
+        M_shape,
+        diag_M,
+        m_shape,
+        C_shape,
+        diag_C,
+        mu_shape,
+        Sigma_shape,
+        diag_Sigma,
+        shape,
+        n,
+        d,
+    ):
         model = self.random(
-            M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+            M_shape,
+            diag_M,
+            m_shape,
+            C_shape,
+            diag_C,
+            mu_shape,
+            Sigma_shape,
+            diag_Sigma,
+            shape,
+            n,
+            d,
         )
         dist = model.joint()
         assert dist.shape == model.shape
@@ -127,10 +277,32 @@ def test_joint(self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n,
 @pytest.mark.parametrize("logA_shape", shapes)
 class TestLinearMixtureModel(TestLinearModel):
     def random(
-        self, logA_shape, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+        self,
+        logA_shape,
+        M_shape,
+        diag_M,
+        m_shape,
+        C_shape,
+        diag_C,
+        mu_shape,
+        Sigma_shape,
+        diag_Sigma,
+        shape,
+        n,
+        d,
     ):
         model = super().random(
-            M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+            M_shape,
+            diag_M,
+            m_shape,
+            C_shape,
+            diag_C,
+            mu_shape,
+            Sigma_shape,
+            diag_Sigma,
+            shape,
+            n,
+            d,
         )
         logA = np.random.randn(*logA_shape)
         model = LinearMixtureModel(
@@ -140,10 +312,33 @@ def random(
         return model
 
     def test_init(
-        self, logA_shape, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+        self,
+        logA_shape,
+        M_shape,
+        diag_M,
+        m_shape,
+        C_shape,
+        diag_C,
+        mu_shape,
+        Sigma_shape,
+        diag_Sigma,
+        shape,
+        n,
+        d,
     ):
         model = self.random(
-            logA_shape, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+            logA_shape,
+            M_shape,
+            diag_M,
+            m_shape,
+            C_shape,
+            diag_C,
+            mu_shape,
+            Sigma_shape,
+            diag_Sigma,
+            shape,
+            n,
+            d,
         )
         assert model.shape == np.broadcast_shapes(
             shape,
@@ -159,18 +354,32 @@ def test_init(
     def test_likelihood(
         self,
         theta_shape,
+        logA_shape,
         M_shape,
+        diag_M,
         m_shape,
         C_shape,
+        diag_C,
         mu_shape,
         Sigma_shape,
-        logA_shape,
+        diag_Sigma,
         shape,
         n,
         d,
     ):
         model = self.random(
-            logA_shape, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+            logA_shape,
+            M_shape,
+            diag_M,
+            m_shape,
+            C_shape,
+            diag_C,
+            mu_shape,
+            Sigma_shape,
+            diag_Sigma,
+            shape,
+            n,
+            d,
         )
         theta = np.random.randn(*theta_shape[:-1], n)
         dist = model.likelihood(theta)
@@ -179,10 +388,33 @@ def test_likelihood(
         assert dist.dim == model.d
 
     def test_prior(
-        self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, logA_shape, shape, n, d
+        self,
+        logA_shape,
+        M_shape,
+        diag_M,
+        m_shape,
+        C_shape,
+        diag_C,
+        mu_shape,
+        Sigma_shape,
+        diag_Sigma,
+        shape,
+        n,
+        d,
     ):
         model = self.random(
-            logA_shape, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+            logA_shape,
+            M_shape,
+            diag_M,
+            m_shape,
+            C_shape,
+            diag_C,
+            mu_shape,
+            Sigma_shape,
+            diag_Sigma,
+            shape,
+            n,
+            d,
         )
         dist = model.prior()
         assert dist.shape == model.shape
@@ -192,18 +424,32 @@ def test_prior(
     def test_posterior(
         self,
         D_shape,
+        logA_shape,
         M_shape,
+        diag_M,
         m_shape,
         C_shape,
+        diag_C,
         mu_shape,
         Sigma_shape,
-        logA_shape,
+        diag_Sigma,
         shape,
         n,
         d,
     ):
         model = self.random(
-            logA_shape, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+            logA_shape,
+            M_shape,
+            diag_M,
+            m_shape,
+            C_shape,
+            diag_C,
+            mu_shape,
+            Sigma_shape,
+            diag_Sigma,
+            shape,
+            n,
+            d,
         )
         D = np.random.randn(*D_shape[:-1], d)
         dist = model.posterior(D)
@@ -212,20 +458,66 @@ def test_posterior(
         assert dist.dim == model.n
 
     def test_evidence(
-        self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, logA_shape, shape, n, d
+        self,
+        logA_shape,
+        M_shape,
+        diag_M,
+        m_shape,
+        C_shape,
+        diag_C,
+        mu_shape,
+        Sigma_shape,
+        diag_Sigma,
+        shape,
+        n,
+        d,
     ):
         model = self.random(
-            logA_shape, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+            logA_shape,
+            M_shape,
+            diag_M,
+            m_shape,
+            C_shape,
+            diag_C,
+            mu_shape,
+            Sigma_shape,
+            diag_Sigma,
+            shape,
+            n,
+            d,
         )
         dist = model.evidence()
         assert dist.shape == model.shape
         assert dist.dim == model.d
 
     def test_joint(
-        self, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, logA_shape, shape, n, d
+        self,
+        logA_shape,
+        M_shape,
+        diag_M,
+        m_shape,
+        C_shape,
+        diag_C,
+        mu_shape,
+        Sigma_shape,
+        diag_Sigma,
+        shape,
+        n,
+        d,
     ):
         model = self.random(
-            logA_shape, M_shape, m_shape, C_shape, mu_shape, Sigma_shape, shape, n, d
+            logA_shape,
+            M_shape,
+            diag_M,
+            m_shape,
+            C_shape,
+            diag_C,
+            mu_shape,
+            Sigma_shape,
+            diag_Sigma,
+            shape,
+            n,
+            d,
         )
         dist = model.joint()
         assert dist.shape == model.shape

From c05c31481352ae398e63842f54166306492add55 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sat, 13 Jan 2024 14:49:23 +0000
Subject: [PATCH 033/117] Got stats_1 tests now running

---
 lsbi/stats_1.py       | 20 ++++++++++++++------
 tests/test_stats_1.py | 31 ++++++++++++++++---------------
 2 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index 3004f54..36204c0 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -136,9 +136,11 @@ def predict(self, A=1, b=0):
         """
         diagonal_cov = self.diagonal_cov
         if len(np.shape(A)) > 1:
-            mean = np.einsum("...qn,...n->...q", A, self.mean) + b
+            mean = np.einsum("...qn,...n->...q", A, np.ones(self.dim) * self.mean) + b
             if self.diagonal_cov:
-                cov = np.einsum("...qn,...pn->...qp", A, A * self.cov[..., None, :])
+                cov = np.einsum(
+                    "...qn,...pn->...qp", A, A * np.atleast_1d(self.cov)[..., None, :]
+                )
                 diagonal_cov = False
             else:
                 cov = np.einsum("...qn,...nm,...pm->...qp", A, self.cov, A)
@@ -173,7 +175,7 @@ def marginalise(self, indices):
         mean = (np.ones(self.dim) * self.mean)[..., i]
 
         if self.diagonal_cov:
-            cov = (np.ones(self.dim) * self.cov)[i]
+            cov = (np.ones(self.dim) * self.cov)[..., i]
         else:
             cov = self.cov[..., i, :][..., i]
 
@@ -200,7 +202,7 @@ def condition(self, indices, values):
         mean = (np.ones(self.dim) * self.mean)[..., i]
 
         if self.diagonal_cov:
-            cov = (np.ones(self.dim) * self.cov)[i]
+            cov = (np.ones(self.dim) * self.cov)[..., i]
             shape = np.broadcast_shapes(self.shape, values.shape[:-1])
         else:
             mean = mean + np.einsum(
@@ -351,7 +353,10 @@ def rvs(self, size=()):
         mean = np.choose(i[..., None], np.moveaxis(mean, -2, 0))
         x = np.random.randn(*size, *self.shape[:-1], self.dim)
         if self.diagonal_cov:
-            return mean + np.sqrt(self.cov) * x
+            L = np.sqrt(self.cov)
+            L = np.broadcast_to(L, (*self.shape, self.dim))
+            L = np.choose(i[..., None], np.moveaxis(L, -2, 0))
+            return mean + L * x
         else:
             L = cholesky(self.cov)
             L = np.broadcast_to(L, (*self.shape, self.dim, self.dim))
@@ -482,7 +487,10 @@ def bijector(self, x, inverse=False):
                 np.s_[:-1], theta[..., :i]
             )
             m = np.atleast_1d(dist.mean)[..., 0]
-            c = np.atleast_2d(dist.cov)[..., 0, 0]
+            if dist.diagonal_cov:
+                c = np.atleast_1d(dist.cov)[..., 0]
+            else:
+                c = np.atleast_2d(dist.cov)[..., 0, 0]
             A = np.exp(dist.logA - logsumexp(dist.logA, axis=-1)[..., None])
             m = np.broadcast_to(m, dist.shape)
 
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index 88749f6..450633d 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -83,7 +83,7 @@ def test_predict(
         assert dist_2.shape == np.broadcast_shapes(
             dist.shape, np.shape(A)[:-2], np.shape(b)[:-1]
         )
-        assert np.shape(dist_2.cov)[: -2 + diagonal_cov] == np.broadcast_shapes(
+        assert np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov] == np.broadcast_shapes(
             np.shape(dist.cov)[: -2 + diagonal_cov], np.shape(A)[:-2]
         )
         assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
@@ -94,7 +94,7 @@ def test_predict(
         dist_2 = dist.predict(A)
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == np.broadcast_shapes(dist.shape, np.shape(A)[:-2])
-        assert np.shape(dist_2.cov)[: -2 + diagonal_cov] == np.broadcast_shapes(
+        assert np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov] == np.broadcast_shapes(
             np.shape(dist.cov)[: -2 + diagonal_cov], np.shape(A)[:-2]
         )
         assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
@@ -106,14 +106,14 @@ def test_predict(
     def test_marginalise(self, dim, shape, mean_shape, cov_shape, diagonal_cov, p):
         if dim < p:
             pytest.skip("dim < p")
-        i = np.random.choice(dim, p, replace=False)
+        indices = np.random.choice(dim, p, replace=False)
         dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
-        dist_2 = dist.marginalise(i)
+        dist_2 = dist.marginalise(indices)
 
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == dist.shape
         assert (
-            np.shape(dist_2.cov)[: -2 + diagonal_cov]
+            np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov]
             == np.shape(dist.cov)[: -2 + diagonal_cov]
         )
         assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
@@ -135,10 +135,10 @@ def test_condition(
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == np.broadcast_shapes(dist.shape, values_shape)
         assert (
-            np.shape(dist_2.cov)[: -2 + diagonal_cov]
+            np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov]
             == np.shape(dist.cov)[: -2 + diagonal_cov]
         )
-        if cov_shape == "scalar" or cov_shape == "vector":
+        if cov_shape == "scalar" or diagonal_cov:
             assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
         else:
             assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
@@ -192,7 +192,7 @@ def random(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov):
             shape,
             logA_shape,
             np.shape(mean)[:-1],
-            np.shape(cov)[:-2],
+            np.shape(cov)[: -2 + diagonal_cov],
         )
         assert np.all(dist.logA == logA)
         assert np.all(dist.mean == mean)
@@ -214,6 +214,7 @@ def test_rvs(
     ):
         dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
         x = dist.rvs(size)
+        x.shape
         assert x.shape == size + dist.shape[:-1] + (dim,)
 
     @pytest.mark.parametrize("A_shape", shapes + ["vector", "scalar"])
@@ -257,7 +258,7 @@ def test_predict(
             np.shape(np.atleast_2d(A))[:-2],
             np.shape(np.atleast_1d(b))[:-1],
         )
-        assert np.shape(dist_2.cov)[: -3 + diagonal_cov] == np.broadcast_shapes(
+        assert np.shape(dist_2.cov)[: -3 + dist_2.diagonal_cov] == np.broadcast_shapes(
             np.shape(dist.cov)[: -3 + diagonal_cov], np.shape(np.atleast_2d(A))[:-2]
         )
         assert np.shape(dist_2.mean)[:-2] == np.broadcast_shapes(
@@ -272,7 +273,7 @@ def test_predict(
         assert dist_2.shape[:-1] == np.broadcast_shapes(
             dist.shape[:-1], np.shape(np.atleast_2d(A))[:-2]
         )
-        assert np.shape(dist_2.cov)[: -3 + diagonal_cov] == np.broadcast_shapes(
+        assert np.shape(dist_2.cov)[: -3 + dist_2.diagonal_cov] == np.broadcast_shapes(
             np.shape(dist.cov)[: -3 + diagonal_cov], np.shape(np.atleast_2d(A))[:-2]
         )
         assert np.shape(dist_2.mean)[:-2] == np.broadcast_shapes(
@@ -286,14 +287,14 @@ def test_marginalise(
     ):
         if dim < p:
             pytest.skip("dim < p")
-        i = np.random.choice(dim, p, replace=False)
+        indices = np.random.choice(dim, p, replace=False)
         dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
-        dist_2 = dist.marginalise(i)
+        dist_2 = dist.marginalise(indices)
 
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == dist.shape
         assert (
-            np.shape(dist_2.cov)[: -2 + diagonal_cov]
+            np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov]
             == np.shape(dist.cov)[: -2 + diagonal_cov]
         )
         assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
@@ -323,10 +324,10 @@ def test_condition(
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == np.broadcast_shapes(dist.shape, values_shape[:-1] + (1,))
         assert (
-            np.shape(dist_2.cov)[: -2 + diagonal_cov]
+            np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov]
             == np.shape(dist.cov)[: -2 + diagonal_cov]
         )
-        if cov_shape == "scalar" or cov_shape == "vector":
+        if cov_shape == "scalar" or diagonal_cov:
             assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
         else:
             assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(

From 17a9a64f27ed53532f158dcfbfa2a84266455be0 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sun, 14 Jan 2024 10:32:10 +0000
Subject: [PATCH 034/117] Predict now passing with diagonal_A extension

---
 lsbi/stats_1.py       | 35 ++++++++---------
 tests/test_stats_1.py | 89 +++++++++++++++++++++++++------------------
 2 files changed, 70 insertions(+), 54 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index 36204c0..483747a 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -115,7 +115,7 @@ def rvs(self, size=()):
         else:
             return self.mean + np.einsum("...jk,...k->...j", cholesky(self.cov), x)
 
-    def predict(self, A=1, b=0):
+    def predict(self, A=1, b=0, diagonal_A=False):
         """Predict the mean and covariance of a linear transformation.
 
         if:         x ~ N(mu, Sigma)
@@ -134,17 +134,11 @@ def predict(self, A=1, b=0):
         -------
         multivariate_normal shape (..., k)
         """
+        if len(np.shape(A)) < 2:
+            diagonal_A = True
         diagonal_cov = self.diagonal_cov
-        if len(np.shape(A)) > 1:
-            mean = np.einsum("...qn,...n->...q", A, np.ones(self.dim) * self.mean) + b
-            if self.diagonal_cov:
-                cov = np.einsum(
-                    "...qn,...pn->...qp", A, A * np.atleast_1d(self.cov)[..., None, :]
-                )
-                diagonal_cov = False
-            else:
-                cov = np.einsum("...qn,...nm,...pm->...qp", A, self.cov, A)
-        else:
+        if diagonal_A:
+            dim = self.dim
             mean = A * self.mean + b
             if self.diagonal_cov:
                 cov = A * self.cov * A
@@ -154,9 +148,16 @@ def predict(self, A=1, b=0):
                     * np.atleast_1d(A)[..., None]
                     * np.atleast_1d(A)[..., None, :]
                 )
-        dim = np.max([*np.shape(A)[-2:-1], *np.shape(b)[-1:], -1])
-        if dim == -1:
-            dim = self.dim
+        else:
+            mean = np.einsum("...qn,...n->...q", A, np.ones(self.dim) * self.mean) + b
+            if self.diagonal_cov:
+                cov = np.einsum(
+                    "...qn,...pn->...qp", A, A * np.atleast_1d(self.cov)[..., None, :]
+                )
+                diagonal_cov = False
+            else:
+                cov = np.einsum("...qn,...nm,...pm->...qp", A, self.cov, A)
+            dim = np.shape(A)[-2]
         return multivariate_normal(mean, cov, self.shape, dim, diagonal_cov)
 
     def marginalise(self, indices):
@@ -363,7 +364,7 @@ def rvs(self, size=()):
             L = np.choose(i[..., None, None], np.moveaxis(L, -3, 0))
             return mean + np.einsum("...ij,...j->...i", L, x)
 
-    def predict(self, A=1, b=0):
+    def predict(self, A=1, b=0, diagonal_A=False):
         """Predict the mean and covariance of a linear transformation.
 
         if:         x ~ mixN(mu, Sigma, logA)
@@ -383,10 +384,10 @@ def predict(self, A=1, b=0):
         mixture_normal shape (..., k)
         """
         if len(np.shape(A)) > 1:
-            A = np.expand_dims(A, axis=-3)
+            A = np.expand_dims(A, axis=-3 + diagonal_A)
         if len(np.shape(b)) > 0:
             b = np.expand_dims(b, axis=-2)
-        dist = super().predict(A, b)
+        dist = super().predict(A, b, diagonal_A)
         return mixture_normal(
             self.logA, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal_cov
         )
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index 450633d..7d9a912 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -7,12 +7,17 @@
 sizes = [(6, 5), (5,), ()]
 dims = [1, 2, 4]
 
+tests = []
 
-@pytest.mark.parametrize("dim", dims)
-@pytest.mark.parametrize("shape", shapes)
-@pytest.mark.parametrize("mean_shape", shapes + ["scalar"])
-@pytest.mark.parametrize("cov_shape", shapes + ["scalar"])
-@pytest.mark.parametrize("diagonal_cov", [True, False])
+for dim in dims:
+    for shape in shapes:
+        for mean_shape in shapes + ["scalar"]:
+            for cov_shape in shapes + ["scalar"]:
+                for diagonal_cov in [True, False]:
+                    tests.append((dim, shape, mean_shape, cov_shape, diagonal_cov))
+
+
+@pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
 class TestMultivariateNormal(object):
     cls = multivariate_normal
 
@@ -53,15 +58,23 @@ def test_rvs(self, dim, shape, mean_shape, cov_shape, diagonal_cov, size):
         x = dist.rvs(size)
         assert x.shape == size + dist.shape + (dim,)
 
-    @pytest.mark.parametrize("A_shape", shapes + ["vector", "scalar"])
+    @pytest.mark.parametrize("A_shape", shapes + ["scalar"])
+    @pytest.mark.parametrize("diagonal_A", [True, False])
     @pytest.mark.parametrize("b_shape", shapes + ["scalar"])
     @pytest.mark.parametrize("k", dims)
     def test_predict(
-        self, dim, shape, mean_shape, cov_shape, diagonal_cov, k, A_shape, b_shape
+        self,
+        dim,
+        shape,
+        mean_shape,
+        cov_shape,
+        diagonal_cov,
+        k,
+        A_shape,
+        diagonal_A,
+        b_shape,
     ):
-        if (A_shape == "vector" or A_shape == "scalar") and (
-            b_shape != "scalar" or k != dim
-        ):
+        if (diagonal_A or A_shape == "scalar") and (b_shape != "scalar" or k != dim):
             pytest.skip("Non broadcastable A and b")
 
         dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
@@ -73,32 +86,34 @@ def test_predict(
 
         if A_shape == "scalar":
             A = np.random.randn()
-        elif A_shape == "vector":
-            A = np.random.randn(dim)
+        elif diagonal_A:
+            A = np.random.randn(*A_shape, dim)
         else:
             A = np.random.randn(*A_shape, k, dim)
 
-        dist_2 = dist.predict(A, b)
+        dist_2 = dist.predict(A, b, diagonal_A)
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == np.broadcast_shapes(
-            dist.shape, np.shape(A)[:-2], np.shape(b)[:-1]
+            dist.shape, np.shape(A)[: -2 + diagonal_A], np.shape(b)[:-1]
         )
         assert np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov] == np.broadcast_shapes(
-            np.shape(dist.cov)[: -2 + diagonal_cov], np.shape(A)[:-2]
+            np.shape(dist.cov)[: -2 + diagonal_cov], np.shape(A)[: -2 + diagonal_A]
         )
         assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
-            np.shape(dist.mean)[:-1], np.shape(A)[:-2], np.shape(b)[:-1]
+            np.shape(dist.mean)[:-1], np.shape(A)[: -2 + diagonal_A], np.shape(b)[:-1]
         )
         assert dist_2.dim == k
 
-        dist_2 = dist.predict(A)
+        dist_2 = dist.predict(A, diagonal_A=diagonal_A)
         assert isinstance(dist_2, self.cls)
-        assert dist_2.shape == np.broadcast_shapes(dist.shape, np.shape(A)[:-2])
+        assert dist_2.shape == np.broadcast_shapes(
+            dist.shape, np.shape(A)[: -2 + diagonal_A]
+        )
         assert np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov] == np.broadcast_shapes(
-            np.shape(dist.cov)[: -2 + diagonal_cov], np.shape(A)[:-2]
+            np.shape(dist.cov)[: -2 + diagonal_cov], np.shape(A)[: -2 + diagonal_A]
         )
         assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
-            np.shape(dist.mean)[:-1], np.shape(A)[:-2]
+            np.shape(dist.mean)[:-1], np.shape(A)[: -2 + diagonal_A]
         )
         assert dist_2.dim == k
 
@@ -217,7 +232,8 @@ def test_rvs(
         x.shape
         assert x.shape == size + dist.shape[:-1] + (dim,)
 
-    @pytest.mark.parametrize("A_shape", shapes + ["vector", "scalar"])
+    @pytest.mark.parametrize("A_shape", shapes + ["scalar"])
+    @pytest.mark.parametrize("diagonal_A", [True, False])
     @pytest.mark.parametrize("b_shape", shapes + ["scalar"])
     @pytest.mark.parametrize("k", dims)
     def test_predict(
@@ -228,13 +244,12 @@ def test_predict(
         mean_shape,
         cov_shape,
         diagonal_cov,
-        k,
         A_shape,
+        diagonal_A,
         b_shape,
+        k,
     ):
-        if (A_shape == "vector" or A_shape == "scalar") and (
-            b_shape != "scalar" or k != dim
-        ):
+        if (diagonal_A or A_shape == "scalar") and (b_shape != "scalar" or k != dim):
             pytest.skip("Non broadcastable A and b")
 
         dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
@@ -246,38 +261,38 @@ def test_predict(
 
         if A_shape == "scalar":
             A = np.random.randn()
-        elif A_shape == "vector":
-            A = np.random.randn(dim)
+        elif diagonal_A:
+            A = np.random.randn(*A_shape[:-1], dim)
         else:
             A = np.random.randn(*A_shape[:-1], k, dim)
 
-        dist_2 = dist.predict(A, b)
+        dist_2 = dist.predict(A, b, diagonal_A)
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape[:-1] == np.broadcast_shapes(
             dist.shape[:-1],
-            np.shape(np.atleast_2d(A))[:-2],
-            np.shape(np.atleast_1d(b))[:-1],
+            np.shape(A)[: -2 + diagonal_A],
+            np.shape(b)[:-1],
         )
         assert np.shape(dist_2.cov)[: -3 + dist_2.diagonal_cov] == np.broadcast_shapes(
-            np.shape(dist.cov)[: -3 + diagonal_cov], np.shape(np.atleast_2d(A))[:-2]
+            np.shape(dist.cov)[: -3 + diagonal_cov], np.shape(A)[: -2 + diagonal_A]
         )
         assert np.shape(dist_2.mean)[:-2] == np.broadcast_shapes(
             np.shape(dist.mean)[:-2],
-            np.shape(np.atleast_2d(A))[:-2],
-            np.shape(np.atleast_1d(b))[:-1],
+            np.shape(A)[: -2 + diagonal_A],
+            np.shape(b)[:-1],
         )
         assert dist_2.dim == k
 
-        dist_2 = dist.predict(A)
+        dist_2 = dist.predict(A, diagonal_A=diagonal_A)
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape[:-1] == np.broadcast_shapes(
-            dist.shape[:-1], np.shape(np.atleast_2d(A))[:-2]
+            dist.shape[:-1], np.shape(A)[: -2 + diagonal_A]
         )
         assert np.shape(dist_2.cov)[: -3 + dist_2.diagonal_cov] == np.broadcast_shapes(
-            np.shape(dist.cov)[: -3 + diagonal_cov], np.shape(np.atleast_2d(A))[:-2]
+            np.shape(dist.cov)[: -3 + diagonal_cov], np.shape(A)[: -2 + diagonal_A]
         )
         assert np.shape(dist_2.mean)[:-2] == np.broadcast_shapes(
-            np.shape(dist.mean)[:-2], np.shape(np.atleast_2d(A))[:-2]
+            np.shape(dist.mean)[:-2], np.shape(A)[: -2 + diagonal_A]
         )
         assert dist_2.dim == k
 

From 7fc035b99f5bd8e9220cd00aaaa86899c65e6408 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sun, 14 Jan 2024 10:51:24 +0000
Subject: [PATCH 035/117] mixture predict now allows changing the mixture
 actively

---
 lsbi/stats_1.py       |  4 ----
 tests/test_stats_1.py | 30 +++++++++++++++---------------
 2 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index 483747a..05798a7 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -383,10 +383,6 @@ def predict(self, A=1, b=0, diagonal_A=False):
         -------
         mixture_normal shape (..., k)
         """
-        if len(np.shape(A)) > 1:
-            A = np.expand_dims(A, axis=-3 + diagonal_A)
-        if len(np.shape(b)) > 0:
-            b = np.expand_dims(b, axis=-2)
         dist = super().predict(A, b, diagonal_A)
         return mixture_normal(
             self.logA, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal_cov
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index 7d9a912..1a97fed 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -257,27 +257,27 @@ def test_predict(
         if b_shape == "scalar":
             b = np.random.randn()
         else:
-            b = np.random.randn(*b_shape[:-1], k)
+            b = np.random.randn(*b_shape, k)
 
         if A_shape == "scalar":
             A = np.random.randn()
         elif diagonal_A:
-            A = np.random.randn(*A_shape[:-1], dim)
+            A = np.random.randn(*A_shape, dim)
         else:
-            A = np.random.randn(*A_shape[:-1], k, dim)
+            A = np.random.randn(*A_shape, k, dim)
 
         dist_2 = dist.predict(A, b, diagonal_A)
         assert isinstance(dist_2, self.cls)
-        assert dist_2.shape[:-1] == np.broadcast_shapes(
-            dist.shape[:-1],
+        assert dist_2.shape == np.broadcast_shapes(
+            dist.shape,
             np.shape(A)[: -2 + diagonal_A],
             np.shape(b)[:-1],
         )
-        assert np.shape(dist_2.cov)[: -3 + dist_2.diagonal_cov] == np.broadcast_shapes(
-            np.shape(dist.cov)[: -3 + diagonal_cov], np.shape(A)[: -2 + diagonal_A]
+        assert np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov] == np.broadcast_shapes(
+            np.shape(dist.cov)[: -2 + diagonal_cov], np.shape(A)[: -2 + diagonal_A]
         )
-        assert np.shape(dist_2.mean)[:-2] == np.broadcast_shapes(
-            np.shape(dist.mean)[:-2],
+        assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
+            np.shape(dist.mean)[:-1],
             np.shape(A)[: -2 + diagonal_A],
             np.shape(b)[:-1],
         )
@@ -285,14 +285,14 @@ def test_predict(
 
         dist_2 = dist.predict(A, diagonal_A=diagonal_A)
         assert isinstance(dist_2, self.cls)
-        assert dist_2.shape[:-1] == np.broadcast_shapes(
-            dist.shape[:-1], np.shape(A)[: -2 + diagonal_A]
+        assert dist_2.shape == np.broadcast_shapes(
+            dist.shape, np.shape(A)[: -2 + diagonal_A]
         )
-        assert np.shape(dist_2.cov)[: -3 + dist_2.diagonal_cov] == np.broadcast_shapes(
-            np.shape(dist.cov)[: -3 + diagonal_cov], np.shape(A)[: -2 + diagonal_A]
+        assert np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov] == np.broadcast_shapes(
+            np.shape(dist.cov)[: -2 + diagonal_cov], np.shape(A)[: -2 + diagonal_A]
         )
-        assert np.shape(dist_2.mean)[:-2] == np.broadcast_shapes(
-            np.shape(dist.mean)[:-2], np.shape(A)[: -2 + diagonal_A]
+        assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
+            np.shape(dist.mean)[:-1], np.shape(A)[: -2 + diagonal_A]
         )
         assert dist_2.dim == k
 

From d2b32f37085013bf05e37bdb12a2bdbcf42a4bf9 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sun, 14 Jan 2024 11:12:31 +0000
Subject: [PATCH 036/117] Refactored stats_1

---
 lsbi/stats_1.py | 104 +++++++++++++++---------------------------------
 1 file changed, 32 insertions(+), 72 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index 05798a7..2be9ce3 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -132,33 +132,34 @@ def predict(self, A=1, b=0, diagonal_A=False):
 
         Returns
         -------
-        multivariate_normal shape (..., k)
+        transformed distribution shape (..., k)
         """
         if len(np.shape(A)) < 2:
             diagonal_A = True
-        diagonal_cov = self.diagonal_cov
+        dist = deepcopy(self)
         if diagonal_A:
-            dim = self.dim
-            mean = A * self.mean + b
+            dist.mean = A * self.mean + b
             if self.diagonal_cov:
-                cov = A * self.cov * A
+                dist.cov = A * self.cov * A
             else:
-                cov = (
+                dist.cov = (
                     self.cov
                     * np.atleast_1d(A)[..., None]
                     * np.atleast_1d(A)[..., None, :]
                 )
         else:
-            mean = np.einsum("...qn,...n->...q", A, np.ones(self.dim) * self.mean) + b
+            dist.mean = (
+                np.einsum("...qn,...n->...q", A, np.ones(self.dim) * self.mean) + b
+            )
             if self.diagonal_cov:
-                cov = np.einsum(
+                dist.cov = np.einsum(
                     "...qn,...pn->...qp", A, A * np.atleast_1d(self.cov)[..., None, :]
                 )
-                diagonal_cov = False
+                dist.diagonal_cov = False
             else:
-                cov = np.einsum("...qn,...nm,...pm->...qp", A, self.cov, A)
-            dim = np.shape(A)[-2]
-        return multivariate_normal(mean, cov, self.shape, dim, diagonal_cov)
+                dist.cov = np.einsum("...qn,...nm,...pm->...qp", A, self.cov, A)
+            dist._dim = np.shape(A)[-2]
+        return dist
 
     def marginalise(self, indices):
         """Marginalise over indices.
@@ -170,17 +171,19 @@ def marginalise(self, indices):
 
         Returns
         -------
-        multivariate_normal shape (*shape, dim - len(indices))
+        marginalised distribution, shape (*shape, dim - len(indices))
         """
+        dist = deepcopy(self)
         i = self._bar(indices)
-        mean = (np.ones(self.dim) * self.mean)[..., i]
+        dist.mean = (np.ones(self.dim) * self.mean)[..., i]
 
         if self.diagonal_cov:
-            cov = (np.ones(self.dim) * self.cov)[..., i]
+            dist.cov = (np.ones(self.dim) * self.cov)[..., i]
         else:
-            cov = self.cov[..., i, :][..., i]
+            dist.cov = self.cov[..., i, :][..., i]
 
-        return multivariate_normal(mean, cov, self.shape, sum(i), self.diagonal_cov)
+        dist._dim = sum(i)
+        return dist
 
     def condition(self, indices, values):
         """Condition on indices with values.
@@ -196,31 +199,31 @@ def condition(self, indices, values):
 
         Returns
         -------
-        multivariate_normal shape (..., len(indices))
+        conditioned distribution shape (..., len(indices))
         """
         i = self._bar(indices)
         k = indices
-        mean = (np.ones(self.dim) * self.mean)[..., i]
+        dist = deepcopy(self)
+        dist.mean = (np.ones(self.dim) * self.mean)[..., i]
 
         if self.diagonal_cov:
-            cov = (np.ones(self.dim) * self.cov)[..., i]
-            shape = np.broadcast_shapes(self.shape, values.shape[:-1])
+            dist.cov = (np.ones(self.dim) * self.cov)[..., i]
+            dist._shape = np.broadcast_shapes(self.shape, values.shape[:-1])
         else:
-            mean = mean + np.einsum(
+            dist.mean = dist.mean + np.einsum(
                 "...ja,...ab,...b->...j",
                 self.cov[..., i, :][..., :, k],
                 inv(self.cov[..., k, :][..., :, k]),
                 values - (np.ones(self.dim) * self.mean)[..., k],
             )
-            cov = self.cov[..., i, :][..., :, i] - np.einsum(
+            dist.cov = self.cov[..., i, :][..., :, i] - np.einsum(
                 "...ja,...ab,...bk->...jk",
                 self.cov[..., i, :][..., :, k],
                 inv(self.cov[..., k, :][..., :, k]),
                 self.cov[..., k, :][..., :, i],
             )
-            shape = self.shape
-
-        return multivariate_normal(mean, cov, shape, sum(i), self.diagonal_cov)
+        dist._dim = sum(i)
+        return dist
 
     def _bar(self, indices):
         """Return the indices not in the given indices."""
@@ -364,47 +367,6 @@ def rvs(self, size=()):
             L = np.choose(i[..., None, None], np.moveaxis(L, -3, 0))
             return mean + np.einsum("...ij,...j->...i", L, x)
 
-    def predict(self, A=1, b=0, diagonal_A=False):
-        """Predict the mean and covariance of a linear transformation.
-
-        if:         x ~ mixN(mu, Sigma, logA)
-        then:  Ax + b ~ mixN(A mu + b, A Sigma A^T, logA)
-
-        Parameters
-        ----------
-        A : array_like, shape (..., k, dim)
-            Linear transformation matrix.
-        b : array_like, shape (..., k), optional
-            Linear transformation vector.
-
-        where self.shape[:-1] is broadcastable to ...
-
-        Returns
-        -------
-        mixture_normal shape (..., k)
-        """
-        dist = super().predict(A, b, diagonal_A)
-        return mixture_normal(
-            self.logA, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal_cov
-        )
-
-    def marginalise(self, indices):
-        """Marginalise over indices.
-
-        Parameters
-        ----------
-        indices : array_like
-            Indices to marginalise.
-
-        Returns
-        -------
-        mixture_normal shape (*shape, dim - len(indices))
-        """
-        dist = super().marginalise(indices)
-        return mixture_normal(
-            self.logA, dist.mean, dist.cov, self.shape, dist.dim, dist.diagonal_cov
-        )
-
     def condition(self, indices, values):
         """Condition on indices with values.
 
@@ -419,13 +381,11 @@ def condition(self, indices, values):
 
         Returns
         -------
-        mixture_normal shape (*shape, len(indices))
+        conditioned distribution, shape (*shape, len(indices))
         """
         dist = super().condition(indices, values[..., None, :])
-        logA = self.marginalise(self._bar(indices)).weights(values)
-        return mixture_normal(
-            logA, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal_cov
-        )
+        dist.logA = self.marginalise(self._bar(indices)).weights(values)
+        return dist
 
     def weights(self, values):
         """Compute the conditional weights of the mixture.

From cbdbbb99ab2407971140c79a7315c6384c7dc250 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sun, 14 Jan 2024 12:54:46 +0000
Subject: [PATCH 037/117] stats_1 no longer needs skips

---
 lsbi/stats_1.py       |   4 +-
 tests/test_stats_1.py | 123 +++++++++++++++++++++---------------------
 2 files changed, 62 insertions(+), 65 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index 2be9ce3..aa95c1b 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -302,9 +302,7 @@ class mixture_normal(multivariate_normal):
 
     def __init__(self, logA=0, mean=0, cov=1, shape=(), dim=0, diagonal_cov=False):
         self.logA = logA
-        super().__init__(
-            mean=mean, cov=cov, shape=shape, dim=dim, diagonal_cov=diagonal_cov
-        )
+        super().__init__(mean, cov, shape, dim, diagonal_cov)
 
     @property
     def shape(self):
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index 1a97fed..5892027 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -8,6 +8,8 @@
 dims = [1, 2, 4]
 
 tests = []
+A_tests = []
+p_tests = []
 
 for dim in dims:
     for shape in shapes:
@@ -15,9 +17,36 @@
             for cov_shape in shapes + ["scalar"]:
                 for diagonal_cov in [True, False]:
                     tests.append((dim, shape, mean_shape, cov_shape, diagonal_cov))
+                    for A_shape in shapes + ["scalar"]:
+                        for diagonal_A in [True, False]:
+                            for b_shape in shapes + ["scalar"]:
+                                for k in dims:
+                                    if (diagonal_A or A_shape == "scalar") and (
+                                        b_shape != "scalar" or k != dim
+                                    ):
+                                        continue
+                                    A_tests.append(
+                                        (
+                                            dim,
+                                            shape,
+                                            mean_shape,
+                                            cov_shape,
+                                            diagonal_cov,
+                                            A_shape,
+                                            diagonal_A,
+                                            b_shape,
+                                            k,
+                                        )
+                                    )
+
+                    for p in dims:
+                        if dim < p:
+                            continue
+                        p_tests.append(
+                            (dim, shape, mean_shape, cov_shape, diagonal_cov, p)
+                        )
 
 
-@pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
 class TestMultivariateNormal(object):
     cls = multivariate_normal
 
@@ -35,17 +64,15 @@ def random(self, dim, shape, mean_shape, cov_shape, diagonal_cov):
             cov = np.random.randn(*cov_shape, dim, dim)
             cov = np.einsum("...ij,...kj->...ik", cov, cov) + dim * np.eye(dim)
 
-        dist = self.cls(mean, cov, shape, dim, diagonal_cov)
+        dist = multivariate_normal(mean, cov, shape, dim, diagonal_cov)
 
         assert dist.dim == dim
-        assert dist.shape == np.broadcast_shapes(
-            shape, np.shape(mean)[:-1], np.shape(cov)[: -2 + diagonal_cov]
-        )
         assert np.all(dist.mean == mean)
         assert np.all(dist.cov == cov)
         return dist
 
     @pytest.mark.parametrize("size", sizes)
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
     def test_logpdf(self, dim, shape, mean_shape, cov_shape, diagonal_cov, size):
         dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
         x = np.random.randn(*size, dim)
@@ -53,15 +80,16 @@ def test_logpdf(self, dim, shape, mean_shape, cov_shape, diagonal_cov, size):
         assert logpdf.shape == size + dist.shape
 
     @pytest.mark.parametrize("size", sizes)
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
     def test_rvs(self, dim, shape, mean_shape, cov_shape, diagonal_cov, size):
         dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
         x = dist.rvs(size)
         assert x.shape == size + dist.shape + (dim,)
 
-    @pytest.mark.parametrize("A_shape", shapes + ["scalar"])
-    @pytest.mark.parametrize("diagonal_A", [True, False])
-    @pytest.mark.parametrize("b_shape", shapes + ["scalar"])
-    @pytest.mark.parametrize("k", dims)
+    @pytest.mark.parametrize(
+        "dim, shape, mean_shape, cov_shape, diagonal_cov, A_shape, diagonal_A, b_shape, k",
+        A_tests,
+    )
     def test_predict(
         self,
         dim,
@@ -74,9 +102,6 @@ def test_predict(
         diagonal_A,
         b_shape,
     ):
-        if (diagonal_A or A_shape == "scalar") and (b_shape != "scalar" or k != dim):
-            pytest.skip("Non broadcastable A and b")
-
         dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
 
         if b_shape == "scalar":
@@ -117,10 +142,10 @@ def test_predict(
         )
         assert dist_2.dim == k
 
-    @pytest.mark.parametrize("p", dims)
+    @pytest.mark.parametrize(
+        "dim, shape, mean_shape, cov_shape, diagonal_cov, p", p_tests
+    )
     def test_marginalise(self, dim, shape, mean_shape, cov_shape, diagonal_cov, p):
-        if dim < p:
-            pytest.skip("dim < p")
         indices = np.random.choice(dim, p, replace=False)
         dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
         dist_2 = dist.marginalise(indices)
@@ -135,13 +160,12 @@ def test_marginalise(self, dim, shape, mean_shape, cov_shape, diagonal_cov, p):
         assert dist_2.dim == dim - p
 
     @pytest.mark.parametrize("values_shape", shapes)
-    @pytest.mark.parametrize("p", dims)
+    @pytest.mark.parametrize(
+        "dim, shape, mean_shape, cov_shape, diagonal_cov, p", p_tests
+    )
     def test_condition(
         self, dim, shape, mean_shape, cov_shape, diagonal_cov, p, values_shape
     ):
-        if dim < p:
-            pytest.skip("dim < p")
-
         indices = np.random.choice(dim, p, replace=False)
         values = np.random.randn(*values_shape, p)
         dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
@@ -164,6 +188,7 @@ def test_condition(
         assert dist_2.dim == dim - p
 
     @pytest.mark.parametrize("x_shape", shapes)
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
     def test_bijector(self, dim, shape, mean_shape, cov_shape, diagonal_cov, x_shape):
         dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
         x = np.random.rand(*x_shape, dim)
@@ -176,45 +201,21 @@ def test_bijector(self, dim, shape, mean_shape, cov_shape, diagonal_cov, x_shape
         assert x.shape == np.broadcast_shapes(dist.shape + (dim,), x.shape)
 
 
-@pytest.mark.parametrize("dim", dims)
-@pytest.mark.parametrize("shape", shapes)
 @pytest.mark.parametrize("logA_shape", shapes)
-@pytest.mark.parametrize("mean_shape", shapes + ["scalar"])
-@pytest.mark.parametrize("cov_shape", shapes + ["scalar"])
-@pytest.mark.parametrize("diagonal_cov", [True, False])
-class TestMixtureNormal(object):
+class TestMixtureNormal(TestMultivariateNormal):
     cls = mixture_normal
 
     def random(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov):
+        dist = super().random(dim, shape, mean_shape, cov_shape, diagonal_cov)
         logA = np.random.randn(*logA_shape)
-        if mean_shape == "scalar":
-            mean = np.random.randn()
-        else:
-            mean = np.random.randn(*mean_shape, dim)
-
-        if cov_shape == "scalar":
-            cov = np.random.randn() ** 2
-        elif diagonal_cov:
-            cov = np.random.randn(*cov_shape, dim) ** 2
-        else:
-            cov = np.random.randn(*cov_shape, dim, dim)
-            cov = np.einsum("...ij,...kj->...ik", cov, cov) + dim * np.eye(dim)
-
-        dist = self.cls(logA, mean, cov, shape, dim, diagonal_cov)
-
-        assert dist.dim == dim
-        assert dist.shape == np.broadcast_shapes(
-            shape,
-            logA_shape,
-            np.shape(mean)[:-1],
-            np.shape(cov)[: -2 + diagonal_cov],
+        dist = mixture_normal(
+            logA, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal_cov
         )
         assert np.all(dist.logA == logA)
-        assert np.all(dist.mean == mean)
-        assert np.all(dist.cov == cov)
         return dist
 
     @pytest.mark.parametrize("size", sizes)
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
     def test_logpdf(
         self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov, size
     ):
@@ -224,18 +225,18 @@ def test_logpdf(
         assert logpdf.shape == size + dist.shape[:-1]
 
     @pytest.mark.parametrize("size", sizes)
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
     def test_rvs(
         self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov, size
     ):
         dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
         x = dist.rvs(size)
-        x.shape
         assert x.shape == size + dist.shape[:-1] + (dim,)
 
-    @pytest.mark.parametrize("A_shape", shapes + ["scalar"])
-    @pytest.mark.parametrize("diagonal_A", [True, False])
-    @pytest.mark.parametrize("b_shape", shapes + ["scalar"])
-    @pytest.mark.parametrize("k", dims)
+    @pytest.mark.parametrize(
+        "dim, shape, mean_shape, cov_shape, diagonal_cov, A_shape, diagonal_A, b_shape, k",
+        A_tests,
+    )
     def test_predict(
         self,
         dim,
@@ -249,9 +250,6 @@ def test_predict(
         b_shape,
         k,
     ):
-        if (diagonal_A or A_shape == "scalar") and (b_shape != "scalar" or k != dim):
-            pytest.skip("Non broadcastable A and b")
-
         dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
 
         if b_shape == "scalar":
@@ -296,12 +294,12 @@ def test_predict(
         )
         assert dist_2.dim == k
 
-    @pytest.mark.parametrize("p", dims)
+    @pytest.mark.parametrize(
+        "dim, shape, mean_shape, cov_shape, diagonal_cov, p", p_tests
+    )
     def test_marginalise(
         self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov, p
     ):
-        if dim < p:
-            pytest.skip("dim < p")
         indices = np.random.choice(dim, p, replace=False)
         dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
         dist_2 = dist.marginalise(indices)
@@ -317,7 +315,9 @@ def test_marginalise(
         assert dist_2.dim == dim - p
 
     @pytest.mark.parametrize("values_shape", shapes)
-    @pytest.mark.parametrize("p", dims)
+    @pytest.mark.parametrize(
+        "dim, shape, mean_shape, cov_shape, diagonal_cov, p", p_tests
+    )
     def test_condition(
         self,
         dim,
@@ -329,8 +329,6 @@ def test_condition(
         p,
         values_shape,
     ):
-        if dim < p:
-            pytest.skip("dim < p")
         indices = np.random.choice(dim, p, replace=False)
         values = np.random.randn(*values_shape[:-1], p)
         dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
@@ -354,6 +352,7 @@ def test_condition(
         assert dist_2.dim == dim - p
 
     @pytest.mark.parametrize("x_shape", shapes)
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
     def test_bijector(
         self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov, x_shape
     ):

From c1230ddb774857d61922f3dbe9f8479b563808ae Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sun, 14 Jan 2024 15:26:54 +0000
Subject: [PATCH 038/117] LinearModel now tested with simple tests

---
 lsbi/model_1.py       | 190 ++++++++----
 lsbi/stats_1.py       |   7 +-
 tests/test_model_1.py | 662 +++++++++++++++++++++---------------------
 3 files changed, 457 insertions(+), 402 deletions(-)

diff --git a/lsbi/model_1.py b/lsbi/model_1.py
index b4da3e1..ebf051d 100644
--- a/lsbi/model_1.py
+++ b/lsbi/model_1.py
@@ -57,12 +57,34 @@ class LinearModel(object):
         Number of mixture components, defaults to automatically inferred value
     """
 
-    def __init__(self, M=1, m=0, C=1, mu=0, Sigma=1, shape=(), n=1, d=1):
+    def __init__(
+        self,
+        M=1,
+        m=0,
+        C=1,
+        mu=0,
+        Sigma=1,
+        shape=(),
+        n=1,
+        d=1,
+        diagonal_M=False,
+        diagonal_C=False,
+        diagonal_Sigma=False,
+    ):
         self.M = M
+        self.diagonal_M = diagonal_M
+        if len(np.shape(self.M)) < 2:
+            self.diagonal_M = True
         self.m = m
         self.C = C
+        self.diagonal_C = diagonal_C
+        if len(np.shape(self.C)) < 2:
+            self.diagonal_C = True
         self.mu = mu
         self.Sigma = Sigma
+        self.diagonal_Sigma = diagonal_Sigma
+        if len(np.shape(self.Sigma)) < 2:
+            self.diagonal_Sigma = True
         self._shape = shape
         self._n = n
         self._d = d
@@ -71,11 +93,11 @@ def __init__(self, M=1, m=0, C=1, mu=0, Sigma=1, shape=(), n=1, d=1):
     def shape(self):
         """Shape of the distribution."""
         return np.broadcast_shapes(
-            np.atleast_2d(self.M).shape[:-2],
-            np.atleast_1d(self.m).shape[:-1],
-            np.atleast_2d(self.C).shape[:-2],
-            np.atleast_1d(self.mu).shape[:-1],
-            np.atleast_2d(self.Sigma).shape[:-2],
+            np.shape(self.M)[: -2 + self.diagonal_M],
+            np.shape(self.m)[:-1],
+            np.shape(self.C)[: -2 + self.diagonal_C],
+            np.shape(self.mu)[:-1],
+            np.shape(self.Sigma)[: -2 + self.diagonal_Sigma],
             self._shape,
         )
 
@@ -84,8 +106,8 @@ def n(self):
         """Dimension of the distribution."""
         return np.max(
             [
-                *np.shape(self.M)[-1:],
-                *np.shape(self.Sigma)[-2:],
+                *np.shape(self.M)[len(np.shape(self.M)) - 1 + self.diagonal_M :],
+                *np.shape(self.Sigma)[-2 + self.diagonal_Sigma :],
                 *np.shape(self.mu)[-1:],
                 self._n,
             ]
@@ -96,8 +118,8 @@ def d(self):
         """Dimensionality of data space len(D)."""
         return np.max(
             [
-                *np.shape(self.M)[-2:-1],
-                *np.shape(self.C)[-2:],
+                *np.shape(self.M)[-2 + self.diagonal_M : -1],
+                *np.shape(self.C)[-2 + self.diagonal_C :],
                 *np.shape(self.m)[-1:],
                 self._d,
             ]
@@ -123,16 +145,17 @@ def likelihood(self, theta):
         ----------
         theta : array_like, shape (k, n)
         """
-        M = matrix(self.M, self.d, self.n)
-        mu = self.m + np.einsum("...ja,...a->...j", M, theta)
-        return multivariate_normal(mu, self.C, self.shape, self.d)
+        mu = self.m + np.einsum("...ja,...a->...j", self._M, theta)
+        return multivariate_normal(mu, self.C, self.shape, self.d, self.diagonal_C)
 
     def prior(self):
         """P(theta) as a scipy distribution object.
 
         theta ~ N( mu, Sigma )
         """
-        return multivariate_normal(self.mu, self.Sigma, self.shape, self.n)
+        return multivariate_normal(
+            self.mu, self.Sigma, self.shape, self.n, self.diagonal_Sigma
+        )
 
     def posterior(self, D):
         """P(theta|D) as a scipy distribution object.
@@ -144,65 +167,63 @@ def posterior(self, D):
         ----------
         D : array_like, shape (d,)
         """
-        M = matrix(self.M, self.d, self.n)
         values = (
-            D - self.m - np.einsum("...ja,...a->...j", M, self.mu * np.ones(self.n))
+            D
+            - self.m
+            - np.einsum("...ja,...a->...j", self._M, self.mu * np.ones(self.n))
         )
 
-        if (
-            len(np.shape(self.Sigma)) > 1
-            or len(np.shape(self.C)) > 1
-            or len(np.shape(self.M)) > 1
-        ):
-            if len(np.shape(self.C)) > 1:
-                invC = inv(self.C)
+        diagonal_Sigma = self.diagonal_C and self.diagonal_Sigma and self.diagonal_M
+
+        if diagonal_Sigma:
+            dim = min(self.n, self.d)
+            C = np.atleast_1d(self.C)[..., :dim]
+            M = np.atleast_1d(self.M)[..., :dim]
+            Sigma = np.ones(self.n) * self.Sigma
+            Sigma[..., :dim] = 1 / (1 / Sigma[..., :dim] + M**2 / C)
+
+            mu = np.broadcast_to(self.mu, values.shape[:-1] + (self.n,)).copy()
+            mu[..., :dim] = mu[..., :dim] + Sigma[..., :dim] * M / C * values[..., :dim]
+        else:
+            if self.diagonal_C:
+                invC = np.eye(self.d) / self.C[..., None, :]
             else:
-                invC = np.eye(self.d) / self.C
+                invC = inv(self.C)
 
-            if len(np.shape(self.Sigma)) > 1:
-                invSigma = inv(self.Sigma)
+            if self.diagonal_Sigma:
+                invSigma = np.eye(self.n) / self.Sigma[..., None, :]
             else:
-                invSigma = np.eye(self.n) / self.Sigma
+                invSigma = inv(self.Sigma)
 
-            Sigma = inv(invSigma + np.einsum("...aj,...ab,...bk->...jk", M, invC, M))
+            Sigma = inv(
+                invSigma + np.einsum("...aj,...ab,...bk->...jk", self._M, invC, self._M)
+            )
             mu = self.mu + np.einsum(
-                "...ja,...ba,...bc,...c->...j", Sigma, M, invC, values
+                "...ja,...ba,...bc,...c->...j", Sigma, self._M, invC, values
             )
-        else:
-            dim = min(self.n, self.d)
-            C = np.atleast_1d(self.C)[:dim]
-            M = np.atleast_1d(self.M)[:dim]
-            Sigma = np.ones(self.n) * self.Sigma
-            Sigma[:dim] = 1 / (1 / Sigma[:dim] + M**2 / C)
 
-            mu = np.broadcast_to(self.mu, values.shape[:-1] + (self.n,)).copy()
-            mu[..., :dim] = mu[..., :dim] + Sigma[:dim] * M / C * values[..., :dim]
-
-        return multivariate_normal(mu, Sigma, self.shape, self.n)
+        return multivariate_normal(mu, Sigma, self.shape, self.n, diagonal_Sigma)
 
     def evidence(self):
         """P(D) as a scipy distribution object.
 
         D ~ N( m + M mu, C + M Sigma M' )
         """
-        M = matrix(self.M, self.d, self.n)
-        mu = self.m + np.einsum("...ja,...a->...j", M, self.mu * np.ones(self.n))
-        if (
-            len(np.shape(self.Sigma)) > 1
-            or len(np.shape(self.C)) > 1
-            or len(np.shape(self.M)) > 1
-        ):
-            Sigma = matrix(self.C, self.d) + np.einsum(
-                "...ja,...ab,...kb->...jk", M, matrix(self.Sigma, self.n), M
-            )
-        else:
+        mu = self.m + np.einsum("...ja,...a->...j", self._M, self.mu * np.ones(self.n))
+        diagonal_Sigma = self.diagonal_C and self.diagonal_Sigma and self.diagonal_M
+
+        if diagonal_Sigma:
             dim = min(self.n, self.d)
             Sigma = self.C * np.ones(self.d)
-            M = np.atleast_1d(self.M)[:dim]
-            S = np.atleast_1d(self.Sigma)[:dim]
-            Sigma[:dim] += S * M**2
+            M = np.atleast_1d(self.M)[..., :dim]
+            S = np.atleast_1d(self.Sigma)[..., :dim]
+            Sigma[..., :dim] += S * M**2
+        else:
+            Sigma = self._C + np.einsum(
+                "...ja,...ab,...kb->...jk", self._M, self._Sigma, self._M
+            )
 
-        return multivariate_normal(mu, Sigma, self.shape, self.d)
+        return multivariate_normal(mu, Sigma, self.shape, self.d, diagonal_Sigma)
 
     def joint(self):
         """P(D, theta) as a scipy distribution object.
@@ -215,16 +236,43 @@ def joint(self):
         a = np.broadcast_to(evidence.mean, self.shape + (self.d,))
         b = np.broadcast_to(prior.mean, self.shape + (self.n,))
         mu = np.block([a, b])
-        M = matrix(self.M, self.d, self.n)
-        Sigma = matrix(self.Sigma, self.n)
-        corr = np.einsum("...ja,...al->...jl", M, Sigma)
-        A = np.broadcast_to(matrix(evidence.cov, self.d), self.shape + (self.d, self.d))
-        D = np.broadcast_to(matrix(prior.cov, self.n), self.shape + (self.n, self.n))
-        B = np.broadcast_to(corr, self.shape + (self.d, self.n))
+        if evidence.diagonal_cov:
+            A = evidence.cov[..., None, :] * np.eye(self.d)
+        else:
+            A = evidence.cov
+        if prior.diagonal_cov:
+            D = prior.cov[..., None, :] * np.eye(self.n)
+        else:
+            D = prior.cov
+        B = np.einsum("...ja,...al->...jl", self._M, self._Sigma)
+        A = np.broadcast_to(A, self.shape + (self.d, self.d))
+        D = np.broadcast_to(D, self.shape + (self.n, self.n))
+        B = np.broadcast_to(B, self.shape + (self.d, self.n))
         C = np.moveaxis(B, -1, -2)
         Sigma = np.block([[A, B], [C, D]])
         return multivariate_normal(mu, Sigma, self.shape, self.n + self.d)
 
+    @property
+    def _M(self):
+        if self.diagonal_M:
+            return self.M[..., None, :] * np.eye(self.d, self.n)
+        else:
+            return self.M
+
+    @property
+    def _C(self):
+        if self.diagonal_C:
+            return self.C[..., None, :] * np.eye(self.d)
+        else:
+            return self.C
+
+    @property
+    def _Sigma(self):
+        if self.diagonal_Sigma:
+            return self.Sigma[..., None, :] * np.eye(self.n)
+        else:
+            return self.Sigma
+
 
 class LinearMixtureModel(LinearModel):
     """A linear mixture model.
@@ -276,13 +324,27 @@ class LinearMixtureModel(LinearModel):
         Number of parameters, defaults to automatically inferred value
     d : int, optional
         Number of data dimensions, defaults to automatically inferred value
-    k : int, optional
-        Number of mixture components, defaults to automatically inferred value
     """
 
-    def __init__(self, logA=1, M=1, m=0, C=1, mu=0, Sigma=1, shape=(), n=1, d=1, k=1):
+    def __init__(
+        self,
+        logA=1,
+        M=1,
+        m=0,
+        C=1,
+        mu=0,
+        Sigma=1,
+        shape=(),
+        n=1,
+        d=1,
+        diagonal_M=False,
+        diagonal_C=False,
+        diagonal_Sigma=False,
+    ):
         self.logA = logA
-        super().__init__(M=M, m=m, C=C, mu=mu, Sigma=Sigma, shape=shape, n=n, d=d)
+        super().__init__(
+            M, m, C, mu, Sigma, shape, n, d, diagonal_M, diagonal_C, diagonal_Sigma
+        )
 
     @classmethod
     def from_joint(cls, means, covs, logA, n):
diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index aa95c1b..f817b7d 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -307,12 +307,7 @@ def __init__(self, logA=0, mean=0, cov=1, shape=(), dim=0, diagonal_cov=False):
     @property
     def shape(self):
         """Shape of the distribution."""
-        return np.broadcast_shapes(
-            np.shape(self.logA),
-            np.shape(self.mean)[:-1],
-            np.shape(self.cov)[: -2 + self.diagonal_cov],
-            self._shape,
-        )
+        return np.broadcast_shapes(np.shape(self.logA), super().shape)
 
     def logpdf(self, x):
         """Log of the probability density function.
diff --git a/tests/test_model_1.py b/tests/test_model_1.py
index fd29fd9..0caf448 100644
--- a/tests/test_model_1.py
+++ b/tests/test_model_1.py
@@ -6,36 +6,68 @@
 shapes = [(2, 3), (3,), ()]
 dims = [1, 2, 4]
 
+tests = []
+for d in dims:
+    for n in dims:
+        for shape in shapes:
+            m_shape = shape
+            M_shape = shape
+            mu_shape = shape
+            C_shape = shape
+            Sigma_shape = shape
+            for diagonal_Sigma in [True, False]:
+                for diagonal_C in [True, False]:
+                    for diagonal_M in [True, False]:
+                        tests.append(
+                            (
+                                d,
+                                n,
+                                shape,
+                                m_shape,
+                                M_shape,
+                                mu_shape,
+                                C_shape,
+                                Sigma_shape,
+                                diagonal_Sigma,
+                                diagonal_C,
+                                diagonal_M,
+                            )
+                        )
 
-@pytest.mark.parametrize("d", dims)
-@pytest.mark.parametrize("n", dims)
-@pytest.mark.parametrize("shape", shapes)
-@pytest.mark.parametrize("m_shape", shapes + ["scalar"])
-@pytest.mark.parametrize("mu_shape", shapes + ["scalar"])
-@pytest.mark.parametrize("M_shape", shapes + ["scalar"])
-@pytest.mark.parametrize("C_shape", shapes + ["scalar"])
-@pytest.mark.parametrize("Sigma_shape", shapes + ["scalar"])
-@pytest.mark.parametrize("diag_Sigma", [True, False])
-@pytest.mark.parametrize("diag_C", [True, False])
-@pytest.mark.parametrize("diag_M", [True, False])
+
+# @pytest.mark.parametrize("d", dims)
+# @pytest.mark.parametrize("n", dims)
+# @pytest.mark.parametrize("shape", shapes)
+# @pytest.mark.parametrize("m_shape", shapes + ["scalar"])
+# @pytest.mark.parametrize("mu_shape", shapes + ["scalar"])
+# @pytest.mark.parametrize("M_shape", shapes + ["scalar"])
+# @pytest.mark.parametrize("C_shape", shapes + ["scalar"])
+# @pytest.mark.parametrize("Sigma_shape", shapes + ["scalar"])
+# @pytest.mark.parametrize("diagonal_Sigma", [True, False])
+# @pytest.mark.parametrize("diagonal_C", [True, False])
+# @pytest.mark.parametrize("diagonal_M", [True, False])
+@pytest.mark.parametrize(
+    "d,n,shape,m_shape,M_shape,mu_shape,C_shape,Sigma_shape,diagonal_Sigma,diagonal_C,diagonal_M",
+    tests,
+)
 class TestLinearModel(object):
     def random(
         self,
         M_shape,
-        diag_M,
+        diagonal_M,
         m_shape,
         C_shape,
-        diag_C,
+        diagonal_C,
         mu_shape,
         Sigma_shape,
-        diag_Sigma,
+        diagonal_Sigma,
         shape,
         n,
         d,
     ):
         if M_shape == "scalar":
             M = np.random.randn()
-        elif diag_M:
+        elif diagonal_M:
             M = np.random.randn(*M_shape, n)
         else:
             M = np.random.randn(*M_shape, d, n)
@@ -47,7 +79,7 @@ def random(
 
         if C_shape == "scalar":
             C = np.random.randn() ** 2
-        elif diag_C:
+        elif diagonal_C:
             C = np.random.randn(*C_shape, d) ** 2
         else:
             C = np.random.randn(*C_shape, d, d)
@@ -60,13 +92,15 @@ def random(
 
         if Sigma_shape == "scalar":
             Sigma = np.random.randn() ** 2
-        elif diag_Sigma:
+        elif diagonal_Sigma:
             Sigma = np.random.randn(*Sigma_shape, n) ** 2
         else:
             Sigma = np.random.randn(*Sigma_shape, n, n)
             Sigma = np.einsum("...ij,...kj->...ik", Sigma, Sigma) + n * np.eye(n)
 
-        model = LinearModel(M, m, C, mu, Sigma, shape, n, d)
+        model = LinearModel(
+            M, m, C, mu, Sigma, shape, n, d, diagonal_M, diagonal_C, diagonal_Sigma
+        )
         assert model.d == d
         assert model.n == n
         assert np.all(model.M == M)
@@ -76,67 +110,31 @@ def random(
         assert np.all(model.Sigma == Sigma)
         return model
 
-    def test_init(
-        self,
-        M_shape,
-        diag_M,
-        m_shape,
-        C_shape,
-        diag_C,
-        mu_shape,
-        Sigma_shape,
-        diag_Sigma,
-        shape,
-        n,
-        d,
-    ):
-        model = self.random(
-            M_shape,
-            diag_M,
-            m_shape,
-            C_shape,
-            diag_C,
-            mu_shape,
-            Sigma_shape,
-            diag_Sigma,
-            shape,
-            n,
-            d,
-        )
-        assert model.shape == np.broadcast_shapes(
-            shape,
-            np.shape(np.atleast_2d(model.M))[:-2],
-            np.shape(np.atleast_1d(model.m))[:-1],
-            np.shape(np.atleast_1d(model.mu))[:-1],
-            np.shape(np.atleast_2d(model.C))[:-2],
-            np.shape(np.atleast_2d(model.Sigma))[:-2],
-        )
-
     @pytest.mark.parametrize("theta_shape", shapes)
     def test_likelihood(
         self,
         theta_shape,
         M_shape,
-        diag_M,
+        diagonal_M,
         m_shape,
         C_shape,
-        diag_C,
+        diagonal_C,
         mu_shape,
         Sigma_shape,
-        diag_Sigma,
+        diagonal_Sigma,
         shape,
         n,
         d,
     ):
         model = self.random(
             M_shape,
-            diag_M,
+            diagonal_M,
             m_shape,
             C_shape,
-            diag_C,
+            diagonal_C,
             mu_shape,
             Sigma_shape,
-            diag_Sigma,
+            diagonal_Sigma,
             shape,
             n,
             d,
@@ -149,26 +147,26 @@ def test_likelihood(
     def test_prior(
         self,
         M_shape,
-        diag_M,
+        diagonal_M,
         m_shape,
         C_shape,
-        diag_C,
+        diagonal_C,
         mu_shape,
         Sigma_shape,
-        diag_Sigma,
+        diagonal_Sigma,
         shape,
         n,
         d,
     ):
         model = self.random(
             M_shape,
-            diag_M,
+            diagonal_M,
             m_shape,
             C_shape,
-            diag_C,
+            diagonal_C,
             mu_shape,
             Sigma_shape,
-            diag_Sigma,
+            diagonal_Sigma,
             shape,
             n,
             d,
@@ -182,26 +180,26 @@ def test_posterior(
         self,
         D_shape,
         M_shape,
-        diag_M,
+        diagonal_M,
         m_shape,
         C_shape,
-        diag_C,
+        diagonal_C,
         mu_shape,
         Sigma_shape,
-        diag_Sigma,
+        diagonal_Sigma,
         shape,
         n,
         d,
     ):
         model = self.random(
             M_shape,
-            diag_M,
+            diagonal_M,
             m_shape,
             C_shape,
-            diag_C,
+            diagonal_C,
             mu_shape,
             Sigma_shape,
-            diag_Sigma,
+            diagonal_Sigma,
             shape,
             n,
             d,
@@ -214,26 +212,26 @@ def test_posterior(
     def test_evidence(
         self,
         M_shape,
-        diag_M,
+        diagonal_M,
         m_shape,
         C_shape,
-        diag_C,
+        diagonal_C,
         mu_shape,
         Sigma_shape,
-        diag_Sigma,
+        diagonal_Sigma,
         shape,
         n,
         d,
     ):
         model = self.random(
             M_shape,
-            diag_M,
+            diagonal_M,
             m_shape,
             C_shape,
-            diag_C,
+            diagonal_C,
             mu_shape,
             Sigma_shape,
-            diag_Sigma,
+            diagonal_Sigma,
             shape,
             n,
             d,
@@ -245,26 +243,26 @@ def test_evidence(
     def test_joint(
         self,
         M_shape,
-        diag_M,
+        diagonal_M,
         m_shape,
         C_shape,
-        diag_C,
+        diagonal_C,
         mu_shape,
         Sigma_shape,
-        diag_Sigma,
+        diagonal_Sigma,
         shape,
         n,
         d,
     ):
         model = self.random(
             M_shape,
-            diag_M,
+            diagonal_M,
             m_shape,
             C_shape,
-            diag_C,
+            diagonal_C,
             mu_shape,
             Sigma_shape,
-            diag_Sigma,
+            diagonal_Sigma,
             shape,
             n,
             d,
@@ -274,251 +272,251 @@ def test_joint(
         assert dist.dim == model.n + model.d
 
 
-@pytest.mark.parametrize("logA_shape", shapes)
-class TestLinearMixtureModel(TestLinearModel):
-    def random(
-        self,
-        logA_shape,
-        M_shape,
-        diag_M,
-        m_shape,
-        C_shape,
-        diag_C,
-        mu_shape,
-        Sigma_shape,
-        diag_Sigma,
-        shape,
-        n,
-        d,
-    ):
-        model = super().random(
-            M_shape,
-            diag_M,
-            m_shape,
-            C_shape,
-            diag_C,
-            mu_shape,
-            Sigma_shape,
-            diag_Sigma,
-            shape,
-            n,
-            d,
-        )
-        logA = np.random.randn(*logA_shape)
-        model = LinearMixtureModel(
-            logA, model.M, model.m, model.C, model.mu, model.Sigma, shape, n, d
-        )
-        assert np.all(model.logA == logA)
-        return model
-
-    def test_init(
-        self,
-        logA_shape,
-        M_shape,
-        diag_M,
-        m_shape,
-        C_shape,
-        diag_C,
-        mu_shape,
-        Sigma_shape,
-        diag_Sigma,
-        shape,
-        n,
-        d,
-    ):
-        model = self.random(
-            logA_shape,
-            M_shape,
-            diag_M,
-            m_shape,
-            C_shape,
-            diag_C,
-            mu_shape,
-            Sigma_shape,
-            diag_Sigma,
-            shape,
-            n,
-            d,
-        )
-        assert model.shape == np.broadcast_shapes(
-            shape,
-            np.shape(np.atleast_2d(model.M))[:-2],
-            np.shape(np.atleast_1d(model.m))[:-1],
-            np.shape(np.atleast_1d(model.mu))[:-1],
-            np.shape(np.atleast_2d(model.C))[:-2],
-            np.shape(np.atleast_2d(model.Sigma))[:-2],
-            np.shape(model.logA),
-        )
-
-    @pytest.mark.parametrize("theta_shape", shapes)
-    def test_likelihood(
-        self,
-        theta_shape,
-        logA_shape,
-        M_shape,
-        diag_M,
-        m_shape,
-        C_shape,
-        diag_C,
-        mu_shape,
-        Sigma_shape,
-        diag_Sigma,
-        shape,
-        n,
-        d,
-    ):
-        model = self.random(
-            logA_shape,
-            M_shape,
-            diag_M,
-            m_shape,
-            C_shape,
-            diag_C,
-            mu_shape,
-            Sigma_shape,
-            diag_Sigma,
-            shape,
-            n,
-            d,
-        )
-        theta = np.random.randn(*theta_shape[:-1], n)
-        dist = model.likelihood(theta)
-        if model.shape != ():
-            assert dist.shape == np.broadcast_shapes(model.shape, theta_shape)
-        assert dist.dim == model.d
-
-    def test_prior(
-        self,
-        logA_shape,
-        M_shape,
-        diag_M,
-        m_shape,
-        C_shape,
-        diag_C,
-        mu_shape,
-        Sigma_shape,
-        diag_Sigma,
-        shape,
-        n,
-        d,
-    ):
-        model = self.random(
-            logA_shape,
-            M_shape,
-            diag_M,
-            m_shape,
-            C_shape,
-            diag_C,
-            mu_shape,
-            Sigma_shape,
-            diag_Sigma,
-            shape,
-            n,
-            d,
-        )
-        dist = model.prior()
-        assert dist.shape == model.shape
-        assert dist.dim == model.n
-
-    @pytest.mark.parametrize("D_shape", shapes)
-    def test_posterior(
-        self,
-        D_shape,
-        logA_shape,
-        M_shape,
-        diag_M,
-        m_shape,
-        C_shape,
-        diag_C,
-        mu_shape,
-        Sigma_shape,
-        diag_Sigma,
-        shape,
-        n,
-        d,
-    ):
-        model = self.random(
-            logA_shape,
-            M_shape,
-            diag_M,
-            m_shape,
-            C_shape,
-            diag_C,
-            mu_shape,
-            Sigma_shape,
-            diag_Sigma,
-            shape,
-            n,
-            d,
-        )
-        D = np.random.randn(*D_shape[:-1], d)
-        dist = model.posterior(D)
-        if model.shape != ():
-            assert dist.shape == np.broadcast_shapes(model.shape, D_shape)
-        assert dist.dim == model.n
-
-    def test_evidence(
-        self,
-        logA_shape,
-        M_shape,
-        diag_M,
-        m_shape,
-        C_shape,
-        diag_C,
-        mu_shape,
-        Sigma_shape,
-        diag_Sigma,
-        shape,
-        n,
-        d,
-    ):
-        model = self.random(
-            logA_shape,
-            M_shape,
-            diag_M,
-            m_shape,
-            C_shape,
-            diag_C,
-            mu_shape,
-            Sigma_shape,
-            diag_Sigma,
-            shape,
-            n,
-            d,
-        )
-        dist = model.evidence()
-        assert dist.shape == model.shape
-        assert dist.dim == model.d
-
-    def test_joint(
-        self,
-        logA_shape,
-        M_shape,
-        diag_M,
-        m_shape,
-        C_shape,
-        diag_C,
-        mu_shape,
-        Sigma_shape,
-        diag_Sigma,
-        shape,
-        n,
-        d,
-    ):
-        model = self.random(
-            logA_shape,
-            M_shape,
-            diag_M,
-            m_shape,
-            C_shape,
-            diag_C,
-            mu_shape,
-            Sigma_shape,
-            diag_Sigma,
-            shape,
-            n,
-            d,
-        )
-        dist = model.joint()
-        assert dist.shape == model.shape
-        assert dist.dim == model.n + model.d
+# @pytest.mark.parametrize("logA_shape", shapes)
+# class TestLinearMixtureModel(TestLinearModel):
+#   def random(
+#       self,
+#       logA_shape,
+#       M_shape,
+#       diagonal_M,
+#       m_shape,
+#       C_shape,
+#       diagonal_C,
+#       mu_shape,
+#       Sigma_shape,
+#       diagonal_Sigma,
+#       shape,
+#       n,
+#       d,
+#   ):
+#       model = super().random(
+#           M_shape,
+#           diagonal_M,
+#           m_shape,
+#           C_shape,
+#           diagonal_C,
+#           mu_shape,
+#           Sigma_shape,
+#           diagonal_Sigma,
+#           shape,
+#           n,
+#           d,
+#       )
+#       logA = np.random.randn(*logA_shape)
+#       model = LinearMixtureModel(
+#           logA, model.M, model.m, model.C, model.mu, model.Sigma, shape, n, d
+#       )
+#       assert np.all(model.logA == logA)
+#       return model
+#
+#   def test_init(
+#       self,
+#       logA_shape,
+#       M_shape,
+#       diagonal_M,
+#       m_shape,
+#       C_shape,
+#       diagonal_C,
+#       mu_shape,
+#       Sigma_shape,
+#       diagonal_Sigma,
+#       shape,
+#       n,
+#       d,
+#   ):
+#       model = self.random(
+#           logA_shape,
+#           M_shape,
+#           diagonal_M,
+#           m_shape,
+#           C_shape,
+#           diagonal_C,
+#           mu_shape,
+#           Sigma_shape,
+#           diagonal_Sigma,
+#           shape,
+#           n,
+#           d,
+#       )
+#       assert model.shape == np.broadcast_shapes(
+#           shape,
+#           np.shape(np.atleast_2d(model.M))[:-2],
+#           np.shape(np.atleast_1d(model.m))[:-1],
+#           np.shape(np.atleast_1d(model.mu))[:-1],
+#           np.shape(np.atleast_2d(model.C))[:-2],
+#           np.shape(np.atleast_2d(model.Sigma))[:-2],
+#           np.shape(model.logA),
+#       )
+#
+#   @pytest.mark.parametrize("theta_shape", shapes)
+#   def test_likelihood(
+#       self,
+#       theta_shape,
+#       logA_shape,
+#       M_shape,
+#       diagonal_M,
+#       m_shape,
+#       C_shape,
+#       diagonal_C,
+#       mu_shape,
+#       Sigma_shape,
+#       diagonal_Sigma,
+#       shape,
+#       n,
+#       d,
+#   ):
+#       model = self.random(
+#           logA_shape,
+#           M_shape,
+#           diagonal_M,
+#           m_shape,
+#           C_shape,
+#           diagonal_C,
+#           mu_shape,
+#           Sigma_shape,
+#           diagonal_Sigma,
+#           shape,
+#           n,
+#           d,
+#       )
+#       theta = np.random.randn(*theta_shape[:-1], n)
+#       dist = model.likelihood(theta)
+#       if model.shape != ():
+#           assert dist.shape == np.broadcast_shapes(model.shape, theta_shape)
+#       assert dist.dim == model.d
+#
+#   def test_prior(
+#       self,
+#       logA_shape,
+#       M_shape,
+#       diagonal_M,
+#       m_shape,
+#       C_shape,
+#       diagonal_C,
+#       mu_shape,
+#       Sigma_shape,
+#       diagonal_Sigma,
+#       shape,
+#       n,
+#       d,
+#   ):
+#       model = self.random(
+#           logA_shape,
+#           M_shape,
+#           diagonal_M,
+#           m_shape,
+#           C_shape,
+#           diagonal_C,
+#           mu_shape,
+#           Sigma_shape,
+#           diagonal_Sigma,
+#           shape,
+#           n,
+#           d,
+#       )
+#       dist = model.prior()
+#       assert dist.shape == model.shape
+#       assert dist.dim == model.n
+#
+#   @pytest.mark.parametrize("D_shape", shapes)
+#   def test_posterior(
+#       self,
+#       D_shape,
+#       logA_shape,
+#       M_shape,
+#       diagonal_M,
+#       m_shape,
+#       C_shape,
+#       diagonal_C,
+#       mu_shape,
+#       Sigma_shape,
+#       diagonal_Sigma,
+#       shape,
+#       n,
+#       d,
+#   ):
+#       model = self.random(
+#           logA_shape,
+#           M_shape,
+#           diagonal_M,
+#           m_shape,
+#           C_shape,
+#           diagonal_C,
+#           mu_shape,
+#           Sigma_shape,
+#           diagonal_Sigma,
+#           shape,
+#           n,
+#           d,
+#       )
+#       D = np.random.randn(*D_shape[:-1], d)
+#       dist = model.posterior(D)
+#       if model.shape != ():
+#           assert dist.shape == np.broadcast_shapes(model.shape, D_shape)
+#       assert dist.dim == model.n
+#
+#   def test_evidence(
+#       self,
+#       logA_shape,
+#       M_shape,
+#       diagonal_M,
+#       m_shape,
+#       C_shape,
+#       diagonal_C,
+#       mu_shape,
+#       Sigma_shape,
+#       diagonal_Sigma,
+#       shape,
+#       n,
+#       d,
+#   ):
+#       model = self.random(
+#           logA_shape,
+#           M_shape,
+#           diagonal_M,
+#           m_shape,
+#           C_shape,
+#           diagonal_C,
+#           mu_shape,
+#           Sigma_shape,
+#           diagonal_Sigma,
+#           shape,
+#           n,
+#           d,
+#       )
+#       dist = model.evidence()
+#       assert dist.shape == model.shape
+#       assert dist.dim == model.d
+#
+#   def test_joint(
+#       self,
+#       logA_shape,
+#       M_shape,
+#       diagonal_M,
+#       m_shape,
+#       C_shape,
+#       diagonal_C,
+#       mu_shape,
+#       Sigma_shape,
+#       diagonal_Sigma,
+#       shape,
+#       n,
+#       d,
+#   ):
+#       model = self.random(
+#           logA_shape,
+#           M_shape,
+#           diagonal_M,
+#           m_shape,
+#           C_shape,
+#           diagonal_C,
+#           mu_shape,
+#           Sigma_shape,
+#           diagonal_Sigma,
+#           shape,
+#           n,
+#           d,
+#       )
+#       dist = model.joint()
+#       assert dist.shape == model.shape
+#       assert dist.dim == model.n + model.d

From ac6541ee58ed21a47c6716f996ab699f25ce1dd0 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sun, 14 Jan 2024 15:59:16 +0000
Subject: [PATCH 039/117] Mixture models passing

---
 lsbi/model_1.py       |  34 +--
 tests/test_model_1.py | 473 ++++++++++++++++++++----------------------
 2 files changed, 242 insertions(+), 265 deletions(-)

diff --git a/lsbi/model_1.py b/lsbi/model_1.py
index ebf051d..2f97167 100644
--- a/lsbi/model_1.py
+++ b/lsbi/model_1.py
@@ -274,7 +274,7 @@ def _Sigma(self):
             return self.Sigma
 
 
-class LinearMixtureModel(LinearModel):
+class MixtureModel(LinearModel):
     """A linear mixture model.
 
     D|theta, A ~ N( m + M theta, C )
@@ -359,15 +359,7 @@ def from_joint(cls, means, covs, logA, n):
     @property
     def shape(self):
         """Shape of the distribution."""
-        return np.broadcast_shapes(
-            np.array(self.logA).shape,
-            np.atleast_2d(self.M).shape[:-2],
-            np.atleast_1d(self.m).shape[:-1],
-            np.atleast_2d(self.C).shape[:-2],
-            np.atleast_1d(self.mu).shape[:-1],
-            np.atleast_2d(self.Sigma).shape[:-2],
-            self._shape,
-        )
+        return np.broadcast_shapes(np.shape(self.logA), super().shape)
 
     @property
     def k(self):
@@ -385,9 +377,11 @@ def likelihood(self, theta):
         ----------
         theta : array_like, shape (n,)
         """
-        dist = super().likelihood(theta[..., None, :])
+        dist = super(self.__class__, self).likelihood(theta[..., None, :])
         logA = self.prior().weights(theta)
-        return mixture_normal(logA, dist.mean, dist.cov, dist.shape, dist.dim)
+        return mixture_normal(
+            logA, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal_cov
+        )
 
     def prior(self):
         """P(theta) as a scipy distribution object.
@@ -396,7 +390,9 @@ def prior(self):
         A       ~ categorical(exp(logA))
         """
         dist = super().prior()
-        return mixture_normal(self.logA, dist.mean, dist.cov, dist.shape, dist.dim)
+        return mixture_normal(
+            self.logA, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal_cov
+        )
 
     def posterior(self, D):
         """P(theta|D) as a scipy distribution object.
@@ -412,7 +408,9 @@ def posterior(self, D):
         """
         dist = super().posterior(D[..., None, :])
         logA = self.evidence().weights(D)
-        return mixture_normal(logA, dist.mean, dist.cov, dist.shape, dist.dim)
+        return mixture_normal(
+            logA, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal_cov
+        )
 
     def evidence(self):
         """P(D) as a scipy distribution object.
@@ -421,7 +419,9 @@ def evidence(self):
         A   ~ categorical(exp(logA))
         """
         dist = super().evidence()
-        return mixture_normal(self.logA, dist.mean, dist.cov, dist.shape, dist.dim)
+        return mixture_normal(
+            self.logA, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal_cov
+        )
 
     def joint(self):
         """P(D, theta) as a scipy distribution object.
@@ -432,7 +432,9 @@ def joint(self):
         A           ~ categorical(exp(logA))
         """
         dist = super().joint()
-        return mixture_normal(self.logA, dist.mean, dist.cov, dist.shape, dist.dim)
+        return mixture_normal(
+            self.logA, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal_cov
+        )
 
 
 class ReducedLinearModel(object):
diff --git a/tests/test_model_1.py b/tests/test_model_1.py
index 0caf448..63f0d60 100644
--- a/tests/test_model_1.py
+++ b/tests/test_model_1.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-from lsbi.model_1 import LinearMixtureModel, LinearModel
+from lsbi.model_1 import LinearModel, MixtureModel
 
 shapes = [(2, 3), (3,), ()]
 dims = [1, 2, 4]
@@ -108,6 +108,9 @@ def random(
         assert np.all(model.C == C)
         assert np.all(model.mu == mu)
         assert np.all(model.Sigma == Sigma)
+        assert model.diagonal_M == diagonal_M
+        assert model.diagonal_C == diagonal_C
+        assert model.diagonal_Sigma == diagonal_Sigma
         return model
 
     @pytest.mark.parametrize("theta_shape", shapes)
@@ -272,251 +275,223 @@ def test_joint(
         assert dist.dim == model.n + model.d
 
 
-# @pytest.mark.parametrize("logA_shape", shapes)
-# class TestLinearMixtureModel(TestLinearModel):
-#   def random(
-#       self,
-#       logA_shape,
-#       M_shape,
-#       diagonal_M,
-#       m_shape,
-#       C_shape,
-#       diagonal_C,
-#       mu_shape,
-#       Sigma_shape,
-#       diagonal_Sigma,
-#       shape,
-#       n,
-#       d,
-#   ):
-#       model = super().random(
-#           M_shape,
-#           diagonal_M,
-#           m_shape,
-#           C_shape,
-#           diagonal_C,
-#           mu_shape,
-#           Sigma_shape,
-#           diagonal_Sigma,
-#           shape,
-#           n,
-#           d,
-#       )
-#       logA = np.random.randn(*logA_shape)
-#       model = LinearMixtureModel(
-#           logA, model.M, model.m, model.C, model.mu, model.Sigma, shape, n, d
-#       )
-#       assert np.all(model.logA == logA)
-#       return model
-#
-#   def test_init(
-#       self,
-#       logA_shape,
-#       M_shape,
-#       diagonal_M,
-#       m_shape,
-#       C_shape,
-#       diagonal_C,
-#       mu_shape,
-#       Sigma_shape,
-#       diagonal_Sigma,
-#       shape,
-#       n,
-#       d,
-#   ):
-#       model = self.random(
-#           logA_shape,
-#           M_shape,
-#           diagonal_M,
-#           m_shape,
-#           C_shape,
-#           diagonal_C,
-#           mu_shape,
-#           Sigma_shape,
-#           diagonal_Sigma,
-#           shape,
-#           n,
-#           d,
-#       )
-#       assert model.shape == np.broadcast_shapes(
-#           shape,
-#           np.shape(np.atleast_2d(model.M))[:-2],
-#           np.shape(np.atleast_1d(model.m))[:-1],
-#           np.shape(np.atleast_1d(model.mu))[:-1],
-#           np.shape(np.atleast_2d(model.C))[:-2],
-#           np.shape(np.atleast_2d(model.Sigma))[:-2],
-#           np.shape(model.logA),
-#       )
-#
-#   @pytest.mark.parametrize("theta_shape", shapes)
-#   def test_likelihood(
-#       self,
-#       theta_shape,
-#       logA_shape,
-#       M_shape,
-#       diagonal_M,
-#       m_shape,
-#       C_shape,
-#       diagonal_C,
-#       mu_shape,
-#       Sigma_shape,
-#       diagonal_Sigma,
-#       shape,
-#       n,
-#       d,
-#   ):
-#       model = self.random(
-#           logA_shape,
-#           M_shape,
-#           diagonal_M,
-#           m_shape,
-#           C_shape,
-#           diagonal_C,
-#           mu_shape,
-#           Sigma_shape,
-#           diagonal_Sigma,
-#           shape,
-#           n,
-#           d,
-#       )
-#       theta = np.random.randn(*theta_shape[:-1], n)
-#       dist = model.likelihood(theta)
-#       if model.shape != ():
-#           assert dist.shape == np.broadcast_shapes(model.shape, theta_shape)
-#       assert dist.dim == model.d
-#
-#   def test_prior(
-#       self,
-#       logA_shape,
-#       M_shape,
-#       diagonal_M,
-#       m_shape,
-#       C_shape,
-#       diagonal_C,
-#       mu_shape,
-#       Sigma_shape,
-#       diagonal_Sigma,
-#       shape,
-#       n,
-#       d,
-#   ):
-#       model = self.random(
-#           logA_shape,
-#           M_shape,
-#           diagonal_M,
-#           m_shape,
-#           C_shape,
-#           diagonal_C,
-#           mu_shape,
-#           Sigma_shape,
-#           diagonal_Sigma,
-#           shape,
-#           n,
-#           d,
-#       )
-#       dist = model.prior()
-#       assert dist.shape == model.shape
-#       assert dist.dim == model.n
-#
-#   @pytest.mark.parametrize("D_shape", shapes)
-#   def test_posterior(
-#       self,
-#       D_shape,
-#       logA_shape,
-#       M_shape,
-#       diagonal_M,
-#       m_shape,
-#       C_shape,
-#       diagonal_C,
-#       mu_shape,
-#       Sigma_shape,
-#       diagonal_Sigma,
-#       shape,
-#       n,
-#       d,
-#   ):
-#       model = self.random(
-#           logA_shape,
-#           M_shape,
-#           diagonal_M,
-#           m_shape,
-#           C_shape,
-#           diagonal_C,
-#           mu_shape,
-#           Sigma_shape,
-#           diagonal_Sigma,
-#           shape,
-#           n,
-#           d,
-#       )
-#       D = np.random.randn(*D_shape[:-1], d)
-#       dist = model.posterior(D)
-#       if model.shape != ():
-#           assert dist.shape == np.broadcast_shapes(model.shape, D_shape)
-#       assert dist.dim == model.n
-#
-#   def test_evidence(
-#       self,
-#       logA_shape,
-#       M_shape,
-#       diagonal_M,
-#       m_shape,
-#       C_shape,
-#       diagonal_C,
-#       mu_shape,
-#       Sigma_shape,
-#       diagonal_Sigma,
-#       shape,
-#       n,
-#       d,
-#   ):
-#       model = self.random(
-#           logA_shape,
-#           M_shape,
-#           diagonal_M,
-#           m_shape,
-#           C_shape,
-#           diagonal_C,
-#           mu_shape,
-#           Sigma_shape,
-#           diagonal_Sigma,
-#           shape,
-#           n,
-#           d,
-#       )
-#       dist = model.evidence()
-#       assert dist.shape == model.shape
-#       assert dist.dim == model.d
-#
-#   def test_joint(
-#       self,
-#       logA_shape,
-#       M_shape,
-#       diagonal_M,
-#       m_shape,
-#       C_shape,
-#       diagonal_C,
-#       mu_shape,
-#       Sigma_shape,
-#       diagonal_Sigma,
-#       shape,
-#       n,
-#       d,
-#   ):
-#       model = self.random(
-#           logA_shape,
-#           M_shape,
-#           diagonal_M,
-#           m_shape,
-#           C_shape,
-#           diagonal_C,
-#           mu_shape,
-#           Sigma_shape,
-#           diagonal_Sigma,
-#           shape,
-#           n,
-#           d,
-#       )
-#       dist = model.joint()
-#       assert dist.shape == model.shape
-#       assert dist.dim == model.n + model.d
+@pytest.mark.parametrize("logA_shape", shapes)
+class TestMixtureModel(TestLinearModel):
+    def random(
+        self,
+        logA_shape,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = super().random(
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
+        logA = np.random.randn(*logA_shape)
+        model = MixtureModel(
+            logA,
+            model.M,
+            model.m,
+            model.C,
+            model.mu,
+            model.Sigma,
+            shape,
+            n,
+            d,
+            diagonal_M,
+            diagonal_C,
+            diagonal_Sigma,
+        )
+        assert np.all(model.logA == logA)
+        return model
+
+    @pytest.mark.parametrize("theta_shape", shapes)
+    def test_likelihood(
+        self,
+        theta_shape,
+        logA_shape,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            logA_shape,
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
+        theta = np.random.randn(*theta_shape[:-1], n)
+        dist = model.likelihood(theta)
+        if model.shape != ():
+            assert dist.shape == np.broadcast_shapes(model.shape, theta_shape)
+        assert dist.dim == model.d
+
+    def test_prior(
+        self,
+        logA_shape,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            logA_shape,
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
+        dist = model.prior()
+        assert dist.shape == model.shape
+        assert dist.dim == model.n
+
+    @pytest.mark.parametrize("D_shape", shapes)
+    def test_posterior(
+        self,
+        D_shape,
+        logA_shape,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            logA_shape,
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
+        D = np.random.randn(*D_shape[:-1], d)
+        dist = model.posterior(D)
+        if model.shape != ():
+            assert dist.shape == np.broadcast_shapes(model.shape, D_shape)
+        assert dist.dim == model.n
+
+    def test_evidence(
+        self,
+        logA_shape,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            logA_shape,
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
+        dist = model.evidence()
+        assert dist.shape == model.shape
+        assert dist.dim == model.d
+
+    def test_joint(
+        self,
+        logA_shape,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            logA_shape,
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
+        dist = model.joint()
+        assert dist.shape == model.shape
+        assert dist.dim == model.n + model.d

From 53d60b158f3b0ea00e6933a5acb77e50f216ff62 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sun, 14 Jan 2024 17:12:10 +0000
Subject: [PATCH 040/117] Test suite now covers most things

---
 lsbi/model_1.py       | 21 ++++++++-------
 tests/test_model_1.py | 63 +++++++++++++++++++++++++------------------
 2 files changed, 48 insertions(+), 36 deletions(-)

diff --git a/lsbi/model_1.py b/lsbi/model_1.py
index 2f97167..b90e318 100644
--- a/lsbi/model_1.py
+++ b/lsbi/model_1.py
@@ -177,21 +177,22 @@ def posterior(self, D):
 
         if diagonal_Sigma:
             dim = min(self.n, self.d)
+            shape = np.broadcast_shapes(self.shape, values.shape[:-1])
             C = np.atleast_1d(self.C)[..., :dim]
             M = np.atleast_1d(self.M)[..., :dim]
-            Sigma = np.ones(self.n) * self.Sigma
+            Sigma = np.broadcast_to(self.Sigma, shape + (self.n,)).copy()
             Sigma[..., :dim] = 1 / (1 / Sigma[..., :dim] + M**2 / C)
 
-            mu = np.broadcast_to(self.mu, values.shape[:-1] + (self.n,)).copy()
+            mu = np.broadcast_to(self.mu, shape + (self.n,)).copy()
             mu[..., :dim] = mu[..., :dim] + Sigma[..., :dim] * M / C * values[..., :dim]
         else:
             if self.diagonal_C:
-                invC = np.eye(self.d) / self.C[..., None, :]
+                invC = np.eye(self.d) / np.atleast_1d(self.C)[..., None, :]
             else:
                 invC = inv(self.C)
 
             if self.diagonal_Sigma:
-                invSigma = np.eye(self.n) / self.Sigma[..., None, :]
+                invSigma = np.eye(self.n) / np.atleast_1d(self.Sigma)[..., None, :]
             else:
                 invSigma = inv(self.Sigma)
 
@@ -214,9 +215,9 @@ def evidence(self):
 
         if diagonal_Sigma:
             dim = min(self.n, self.d)
-            Sigma = self.C * np.ones(self.d)
             M = np.atleast_1d(self.M)[..., :dim]
             S = np.atleast_1d(self.Sigma)[..., :dim]
+            Sigma = np.broadcast_to(self.C, self.shape + (self.d,)).copy()
             Sigma[..., :dim] += S * M**2
         else:
             Sigma = self._C + np.einsum(
@@ -237,11 +238,11 @@ def joint(self):
         b = np.broadcast_to(prior.mean, self.shape + (self.n,))
         mu = np.block([a, b])
         if evidence.diagonal_cov:
-            A = evidence.cov[..., None, :] * np.eye(self.d)
+            A = np.atleast_1d(evidence.cov)[..., None, :] * np.eye(self.d)
         else:
             A = evidence.cov
         if prior.diagonal_cov:
-            D = prior.cov[..., None, :] * np.eye(self.n)
+            D = np.atleast_1d(prior.cov)[..., None, :] * np.eye(self.n)
         else:
             D = prior.cov
         B = np.einsum("...ja,...al->...jl", self._M, self._Sigma)
@@ -255,21 +256,21 @@ def joint(self):
     @property
     def _M(self):
         if self.diagonal_M:
-            return self.M[..., None, :] * np.eye(self.d, self.n)
+            return np.atleast_1d(self.M)[..., None, :] * np.eye(self.d, self.n)
         else:
             return self.M
 
     @property
     def _C(self):
         if self.diagonal_C:
-            return self.C[..., None, :] * np.eye(self.d)
+            return np.atleast_1d(self.C)[..., None, :] * np.eye(self.d)
         else:
             return self.C
 
     @property
     def _Sigma(self):
         if self.diagonal_Sigma:
-            return self.Sigma[..., None, :] * np.eye(self.n)
+            return np.atleast_1d(self.Sigma)[..., None, :] * np.eye(self.n)
         else:
             return self.Sigma
 
diff --git a/tests/test_model_1.py b/tests/test_model_1.py
index 63f0d60..83550f3 100644
--- a/tests/test_model_1.py
+++ b/tests/test_model_1.py
@@ -9,30 +9,35 @@
 tests = []
 for d in dims:
     for n in dims:
-        for shape in shapes:
-            m_shape = shape
-            M_shape = shape
-            mu_shape = shape
-            C_shape = shape
-            Sigma_shape = shape
-            for diagonal_Sigma in [True, False]:
-                for diagonal_C in [True, False]:
-                    for diagonal_M in [True, False]:
-                        tests.append(
-                            (
-                                d,
-                                n,
-                                shape,
-                                m_shape,
-                                M_shape,
-                                mu_shape,
-                                C_shape,
-                                Sigma_shape,
-                                diagonal_Sigma,
-                                diagonal_C,
-                                diagonal_M,
-                            )
+        for diagonal_Sigma in [True, False]:
+            for diagonal_C in [True, False]:
+                for diagonal_M in [True, False]:
+                    for base_shape in shapes + ["scalar"]:
+                        shape = base_shape
+                        m_shape = base_shape
+                        M_shape = base_shape
+                        mu_shape = base_shape
+                        C_shape = base_shape
+                        Sigma_shape = base_shape
+                        base_test = (
+                            d,
+                            n,
+                            shape,
+                            m_shape,
+                            M_shape,
+                            mu_shape,
+                            C_shape,
+                            Sigma_shape,
+                            diagonal_Sigma,
+                            diagonal_C,
+                            diagonal_M,
                         )
+                        for alt_shape in shapes + ["scalar"]:
+                            for i in range(2, 8):
+                                test = base_test[:i] + (alt_shape,) + base_test[i + 1 :]
+                                if test[2] == "scalar":
+                                    continue
+                                tests.append(test)
 
 
 # @pytest.mark.parametrize("d", dims)
@@ -108,9 +113,15 @@ def random(
         assert np.all(model.C == C)
         assert np.all(model.mu == mu)
         assert np.all(model.Sigma == Sigma)
-        assert model.diagonal_M == diagonal_M
-        assert model.diagonal_C == diagonal_C
-        assert model.diagonal_Sigma == diagonal_Sigma
+        assert model.diagonal_M == diagonal_M or (
+            M_shape == "scalar" and model.diagonal_M
+        )
+        assert model.diagonal_C == diagonal_C or (
+            C_shape == "scalar" and model.diagonal_C
+        )
+        assert model.diagonal_Sigma == diagonal_Sigma or (
+            Sigma_shape == "scalar" and model.diagonal_Sigma
+        )
         return model
 
     @pytest.mark.parametrize("theta_shape", shapes)

From 2e9ef6479a925b1e74890cc3e80fa3e77cb07fc3 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sun, 14 Jan 2024 17:34:53 +0000
Subject: [PATCH 041/117] Minor renaming

---
 lsbi/model_1.py | 103 +++++++++++++-----------------------------------
 lsbi/stats_1.py |   6 +--
 2 files changed, 31 insertions(+), 78 deletions(-)

diff --git a/lsbi/model_1.py b/lsbi/model_1.py
index b90e318..1fa76c0 100644
--- a/lsbi/model_1.py
+++ b/lsbi/model_1.py
@@ -125,16 +125,6 @@ def d(self):
             ]
         )
 
-    @classmethod
-    def from_joint(cls, means, covs, n):
-        """Construct model from joint distribution."""
-        mu = means[:, -n:]
-        Sigma = covs[:, -n:, -n:]
-        M = solve(Sigma, covs[:, -n:, :-n]).transpose(0, 2, 1)
-        m = means[:, :-n] - np.einsum("ija,ia->ij", M, mu)
-        C = covs[:, :-n, :-n] - np.einsum("ija,iab,ikb->ijk", M, Sigma, M)
-        return cls(M=M, m=m, C=C, mu=mu, Sigma=Sigma)
-
     def likelihood(self, theta):
         """P(D|theta) as a scipy distribution object.
 
@@ -237,17 +227,11 @@ def joint(self):
         a = np.broadcast_to(evidence.mean, self.shape + (self.d,))
         b = np.broadcast_to(prior.mean, self.shape + (self.n,))
         mu = np.block([a, b])
-        if evidence.diagonal_cov:
-            A = np.atleast_1d(evidence.cov)[..., None, :] * np.eye(self.d)
-        else:
-            A = evidence.cov
-        if prior.diagonal_cov:
-            D = np.atleast_1d(prior.cov)[..., None, :] * np.eye(self.n)
-        else:
-            D = prior.cov
-        B = np.einsum("...ja,...al->...jl", self._M, self._Sigma)
+        A = self._de_diagonalise(evidence.cov, evidence.diagonal_cov, self.d)
         A = np.broadcast_to(A, self.shape + (self.d, self.d))
+        D = self._de_diagonalise(prior.cov, prior.diagonal_cov, self.n)
         D = np.broadcast_to(D, self.shape + (self.n, self.n))
+        B = np.einsum("...ja,...al->...jl", self._M, self._Sigma)
         B = np.broadcast_to(B, self.shape + (self.d, self.n))
         C = np.moveaxis(B, -1, -2)
         Sigma = np.block([[A, B], [C, D]])
@@ -255,24 +239,21 @@ def joint(self):
 
     @property
     def _M(self):
-        if self.diagonal_M:
-            return np.atleast_1d(self.M)[..., None, :] * np.eye(self.d, self.n)
-        else:
-            return self.M
+        return self._de_diagonalise(self.M, self.diagonal_M, self.d, self.n)
 
     @property
     def _C(self):
-        if self.diagonal_C:
-            return np.atleast_1d(self.C)[..., None, :] * np.eye(self.d)
-        else:
-            return self.C
+        return self._de_diagonalise(self.C, self.diagonal_C, self.d)
 
     @property
     def _Sigma(self):
-        if self.diagonal_Sigma:
-            return np.atleast_1d(self.Sigma)[..., None, :] * np.eye(self.n)
+        return self._de_diagonalise(self.Sigma, self.diagonal_Sigma, self.n)
+
+    def _de_diagonalise(self, x, diagonal, *args):
+        if diagonal:
+            return np.atleast_1d(x)[..., None, :] * np.eye(*args)
         else:
-            return self.Sigma
+            return x
 
 
 class MixtureModel(LinearModel):
@@ -327,35 +308,9 @@ class MixtureModel(LinearModel):
         Number of data dimensions, defaults to automatically inferred value
     """
 
-    def __init__(
-        self,
-        logA=1,
-        M=1,
-        m=0,
-        C=1,
-        mu=0,
-        Sigma=1,
-        shape=(),
-        n=1,
-        d=1,
-        diagonal_M=False,
-        diagonal_C=False,
-        diagonal_Sigma=False,
-    ):
+    def __init__(self, logA=1, *args):
+        super().__init__(*args)
         self.logA = logA
-        super().__init__(
-            M, m, C, mu, Sigma, shape, n, d, diagonal_M, diagonal_C, diagonal_Sigma
-        )
-
-    @classmethod
-    def from_joint(cls, means, covs, logA, n):
-        """Construct model from joint distribution."""
-        mu = means[:, -n:]
-        Sigma = covs[:, -n:, -n:]
-        M = solve(Sigma, covs[:, -n:, :-n]).transpose(0, 2, 1)
-        m = means[:, :-n] - np.einsum("ija,ia->ij", M, mu)
-        C = covs[:, :-n, :-n] - np.einsum("ija,iab,ikb->ijk", M, Sigma, M)
-        return cls(M=M, m=m, C=C, mu=mu, Sigma=Sigma, logA=logA)
 
     @property
     def shape(self):
@@ -379,10 +334,9 @@ def likelihood(self, theta):
         theta : array_like, shape (n,)
         """
         dist = super(self.__class__, self).likelihood(theta[..., None, :])
-        logA = self.prior().weights(theta)
-        return mixture_normal(
-            logA, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal_cov
-        )
+        dist.__class__ = mixture_normal
+        dist.logA = self.prior()._logA(theta)
+        return dist
 
     def prior(self):
         """P(theta) as a scipy distribution object.
@@ -391,9 +345,9 @@ def prior(self):
         A       ~ categorical(exp(logA))
         """
         dist = super().prior()
-        return mixture_normal(
-            self.logA, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal_cov
-        )
+        dist.__class__ = mixture_normal
+        dist.logA = self.logA
+        return dist
 
     def posterior(self, D):
         """P(theta|D) as a scipy distribution object.
@@ -408,10 +362,9 @@ def posterior(self, D):
         D : array_like, shape (d,)
         """
         dist = super().posterior(D[..., None, :])
-        logA = self.evidence().weights(D)
-        return mixture_normal(
-            logA, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal_cov
-        )
+        dist.__class__ = mixture_normal
+        dist.logA = self.evidence()._logA(D)
+        return dist
 
     def evidence(self):
         """P(D) as a scipy distribution object.
@@ -420,9 +373,9 @@ def evidence(self):
         A   ~ categorical(exp(logA))
         """
         dist = super().evidence()
-        return mixture_normal(
-            self.logA, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal_cov
-        )
+        dist.__class__ = mixture_normal
+        dist.logA = self.logA
+        return dist
 
     def joint(self):
         """P(D, theta) as a scipy distribution object.
@@ -433,9 +386,9 @@ def joint(self):
         A           ~ categorical(exp(logA))
         """
         dist = super().joint()
-        return mixture_normal(
-            self.logA, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal_cov
-        )
+        dist.__class__ = mixture_normal
+        dist.logA = self.logA
+        return dist
 
 
 class ReducedLinearModel(object):
diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index f817b7d..c4e9864 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -377,10 +377,10 @@ def condition(self, indices, values):
         conditioned distribution, shape (*shape, len(indices))
         """
         dist = super().condition(indices, values[..., None, :])
-        dist.logA = self.marginalise(self._bar(indices)).weights(values)
+        dist.logA = self.marginalise(self._bar(indices))._logA(values)
         return dist
 
-    def weights(self, values):
+    def _logA(self, values):
         """Compute the conditional weights of the mixture.
 
         Parameters
@@ -392,7 +392,7 @@ def weights(self, values):
 
         Returns
         -------
-        weights : array_like shape (*shape, n)
+        _logA : array_like shape (*shape, n)
         """
         copy = deepcopy(self)
         copy.mean = copy.mean - values[..., None, :]

From 0724b1a80b95c0839765f4a1a6a8ac639090b7a7 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Mon, 15 Jan 2024 09:00:54 +0000
Subject: [PATCH 042/117] Need to reduce sensitivity of ks checks

---
 lsbi/stats_1.py       |  23 +++++---
 tests/test_model_1.py |  11 ----
 tests/test_stats_1.py | 119 ++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 132 insertions(+), 21 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index c4e9864..1954c6b 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -89,7 +89,7 @@ def logpdf(self, x):
         dx = x.reshape(*size, *np.ones_like(self.shape), self.dim) - mean
         if self.diagonal_cov:
             chi2 = (dx**2 / self.cov).sum(axis=-1)
-            norm = -np.log(2 * np.pi * np.ones(self.dim) * self.cov).sum() / 2
+            norm = -np.log(2 * np.pi * np.ones(self.dim) * self.cov).sum(axis=-1) / 2
         else:
             chi2 = np.einsum("...j,...jk,...k->...", dx, inv(self.cov), dx)
             norm = -logdet(2 * np.pi * self.cov) / 2
@@ -309,6 +309,13 @@ def shape(self):
         """Shape of the distribution."""
         return np.broadcast_shapes(np.shape(self.logA), super().shape)
 
+    @property
+    def k(self):
+        """Number of components."""
+        if self.shape == ():
+            return 1
+        return self.shape[-1]
+
     def logpdf(self, x):
         """Log of the probability density function.
 
@@ -326,8 +333,7 @@ def logpdf(self, x):
         logpdf = super().logpdf(x)
         if self.shape == ():
             return logpdf
-        logA = self.logA - logsumexp(self.logA, axis=-1)[..., None]
-        return logsumexp(logpdf + logA, axis=-1)
+        return logsumexp(logpdf + self._logA, axis=-1)
 
     def rvs(self, size=()):
         """Draw random samples from the distribution.
@@ -343,9 +349,7 @@ def rvs(self, size=()):
         if self.shape == ():
             return super().rvs(size=size)
         size = np.atleast_1d(np.array(size, dtype=int))
-        p = np.exp(self.logA - logsumexp(self.logA, axis=-1)[..., None])
-        p = np.broadcast_to(p, self.shape)
-        i = choice(size, p)
+        i = choice(size, np.exp(self._logA))
         mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
         mean = np.choose(i[..., None], np.moveaxis(mean, -2, 0))
         x = np.random.randn(*size, *self.shape[:-1], self.dim)
@@ -461,3 +465,10 @@ def f(t):
             return x
         else:
             return theta
+
+    @property
+    def _logA(self):
+        """Log of the mixing weights."""
+        logA = np.broadcast_to(self.logA, self.shape).copy()
+        logA -= logsumexp(logA, axis=-1, keepdims=True)
+        return logA
diff --git a/tests/test_model_1.py b/tests/test_model_1.py
index 83550f3..53ea160 100644
--- a/tests/test_model_1.py
+++ b/tests/test_model_1.py
@@ -40,17 +40,6 @@
                                 tests.append(test)
 
 
-# @pytest.mark.parametrize("d", dims)
-# @pytest.mark.parametrize("n", dims)
-# @pytest.mark.parametrize("shape", shapes)
-# @pytest.mark.parametrize("m_shape", shapes + ["scalar"])
-# @pytest.mark.parametrize("mu_shape", shapes + ["scalar"])
-# @pytest.mark.parametrize("M_shape", shapes + ["scalar"])
-# @pytest.mark.parametrize("C_shape", shapes + ["scalar"])
-# @pytest.mark.parametrize("Sigma_shape", shapes + ["scalar"])
-# @pytest.mark.parametrize("diagonal_Sigma", [True, False])
-# @pytest.mark.parametrize("diagonal_C", [True, False])
-# @pytest.mark.parametrize("diagonal_M", [True, False])
 @pytest.mark.parametrize(
     "d,n,shape,m_shape,M_shape,mu_shape,C_shape,Sigma_shape,diagonal_Sigma,diagonal_C,diagonal_M",
     tests,
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index 5892027..c7677d8 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -1,5 +1,9 @@
 import numpy as np
 import pytest
+import scipy
+from numpy.testing import assert_allclose
+from scipy.special import logsumexp
+from scipy.stats import multivariate_normal as scipy_multivariate_normal
 
 from lsbi.stats_1 import mixture_normal, multivariate_normal
 
@@ -79,12 +83,46 @@ def test_logpdf(self, dim, shape, mean_shape, cov_shape, diagonal_cov, size):
         logpdf = dist.logpdf(x)
         assert logpdf.shape == size + dist.shape
 
+        mean = np.broadcast_to(dist.mean, dist.shape + (dist.dim,)).reshape(
+            -1, dist.dim
+        )
+        if dist.diagonal_cov:
+            cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim,)).reshape(
+                -1, dist.dim
+            )
+        else:
+            cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim, dist.dim)).reshape(
+                -1, dist.dim, dist.dim
+            )
+
+        flat_dist = [
+            scipy_multivariate_normal(m, c, allow_singular=True)
+            for (m, c) in zip(mean, cov)
+        ]
+        flat_logpdf = np.array([d.logpdf(x) for d in flat_dist])
+        flat_logpdf = np.moveaxis(flat_logpdf, 0, -1).reshape(logpdf.shape)
+
+        assert_allclose(logpdf, flat_logpdf)
+
     @pytest.mark.parametrize("size", sizes)
     @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
     def test_rvs(self, dim, shape, mean_shape, cov_shape, diagonal_cov, size):
         dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
-        x = dist.rvs(size)
-        assert x.shape == size + dist.shape + (dim,)
+        rvs = dist.rvs(size)
+        assert rvs.shape == size + dist.shape + (dim,)
+
+        if dist.diagonal_cov:
+            chi2 = ((rvs - dist.mean) ** 2 / dist.cov).sum(axis=-1).flatten()
+        else:
+            chi2 = np.einsum(
+                "...j,...jk,...k->...",
+                rvs - dist.mean,
+                np.linalg.inv(dist.cov),
+                rvs - dist.mean,
+            ).flatten()
+        assert (
+            scipy.stats.kstest(chi2, scipy.stats.chi2(df=dist.dim).cdf).pvalue > 1e-10
+        )
 
     @pytest.mark.parametrize(
         "dim, shape, mean_shape, cov_shape, diagonal_cov, A_shape, diagonal_A, b_shape, k",
@@ -200,6 +238,9 @@ def test_bijector(self, dim, shape, mean_shape, cov_shape, diagonal_cov, x_shape
 
         assert x.shape == np.broadcast_shapes(dist.shape + (dim,), x.shape)
 
+        # TODO Sort out broadcasting
+        # assert_allclose(y, dist.bijector(x))
+
 
 @pytest.mark.parametrize("logA_shape", shapes)
 class TestMixtureNormal(TestMultivariateNormal):
@@ -224,14 +265,81 @@ def test_logpdf(
         logpdf = dist.logpdf(x)
         assert logpdf.shape == size + dist.shape[:-1]
 
+        logA = np.broadcast_to(dist.logA, dist.shape).reshape(-1, dist.k).copy()
+        logA -= logsumexp(logA, axis=-1, keepdims=True)
+        mean = np.broadcast_to(dist.mean, dist.shape + (dist.dim,)).reshape(
+            -1, dist.k, dist.dim
+        )
+        if dist.diagonal_cov:
+            cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim,)).reshape(
+                -1, dist.k, dist.dim
+            )
+        else:
+            cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim, dist.dim)).reshape(
+                -1, dist.k, dist.dim, dist.dim
+            )
+
+        flat_dist = [
+            [
+                scipy_multivariate_normal(m, c, allow_singular=True)
+                for (m, c) in zip(ms, cs)
+            ]
+            for (ms, cs) in zip(mean, cov)
+        ]
+        flat_logpdf = np.array(
+            [
+                logsumexp([la + d.logpdf(x) for la, d in zip(las, ds)], axis=0)
+                for las, ds in zip(logA, flat_dist)
+            ]
+        )
+        flat_logpdf = np.moveaxis(flat_logpdf, 0, -1).reshape(logpdf.shape)
+        assert_allclose(logpdf, flat_logpdf)
+
     @pytest.mark.parametrize("size", sizes)
     @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
     def test_rvs(
         self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov, size
     ):
         dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
-        x = dist.rvs(size)
-        assert x.shape == size + dist.shape[:-1] + (dim,)
+        rvs = dist.rvs(size)
+        assert rvs.shape == size + dist.shape[:-1] + (dim,)
+
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
+    def test_rvs(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov):
+        size = 1000
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
+        rvs = dist.rvs(size)
+        logA = np.broadcast_to(dist.logA, dist.shape).reshape(-1, dist.k).copy()
+        logA -= logsumexp(logA, axis=-1, keepdims=True)
+        p = np.exp(logA)
+        mean = np.broadcast_to(dist.mean, dist.shape + (dist.dim,)).reshape(
+            -1, dist.k, dist.dim
+        )
+        if dist.diagonal_cov:
+            cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim,)).reshape(
+                -1, dist.k, dist.dim
+            )
+        else:
+            cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim, dist.dim)).reshape(
+                -1, dist.k, dist.dim, dist.dim
+            )
+
+        rvs_ = np.array(
+            [
+                [
+                    scipy_multivariate_normal(ms[j], cs[j], allow_singular=True).rvs()
+                    for j in np.random.choice(len(ms), p=ps, size=size)
+                ]
+                for ms, cs, ps in zip(mean, cov, p)
+            ]
+        )
+        rvs = np.moveaxis(rvs, -2, 0).reshape(-1, size, dim)
+        for a, b in zip(rvs, rvs_):
+            for i in range(dim):
+                if dim == 1:
+                    assert scipy.stats.kstest(a[:, i], b).pvalue > 1e-10
+                else:
+                    assert scipy.stats.kstest(a[:, i], b[:, i]).pvalue > 1e-10
 
     @pytest.mark.parametrize(
         "dim, shape, mean_shape, cov_shape, diagonal_cov, A_shape, diagonal_A, b_shape, k",
@@ -364,3 +472,6 @@ def test_bijector(
         y = np.random.rand(*x_shape[:-1], dim)
         x = dist.bijector(y, inverse=True)
         assert x.shape == np.broadcast_shapes(y.shape, dist.shape[:-1] + (dim,))
+
+        # TODO Sort out broadcasting
+        # assert_allclose(y, dist.bijector(x))

From 46b3f89782c96d150ed5c9958ad28bf0439171b4 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 17 Jan 2024 16:22:53 +0000
Subject: [PATCH 043/117] Fixed bug in rvs for mixtures with new tests

---
 lsbi/stats_1.py       | 18 +++++++++---------
 tests/test_stats_1.py | 38 ++++++++++++++++++++------------------
 2 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index 1954c6b..3a79c79 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -333,7 +333,9 @@ def logpdf(self, x):
         logpdf = super().logpdf(x)
         if self.shape == ():
             return logpdf
-        return logsumexp(logpdf + self._logA, axis=-1)
+        logA = np.broadcast_to(self.logA, self.shape).copy()
+        logA -= logsumexp(logA, axis=-1, keepdims=True)
+        return logsumexp(logpdf + logA, axis=-1)
 
     def rvs(self, size=()):
         """Draw random samples from the distribution.
@@ -349,7 +351,12 @@ def rvs(self, size=()):
         if self.shape == ():
             return super().rvs(size=size)
         size = np.atleast_1d(np.array(size, dtype=int))
-        i = choice(size, np.exp(self._logA))
+        logA = np.broadcast_to(self.logA, self.shape).copy()
+        logA -= logsumexp(logA, axis=-1, keepdims=True)
+        p = np.exp(logA)
+        cump = np.cumsum(p, axis=-1)
+        u = np.random.rand(*size, *p.shape[:-1])
+        i = np.argmax(np.array(u)[..., None] < cump, axis=-1)
         mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
         mean = np.choose(i[..., None], np.moveaxis(mean, -2, 0))
         x = np.random.randn(*size, *self.shape[:-1], self.dim)
@@ -465,10 +472,3 @@ def f(t):
             return x
         else:
             return theta
-
-    @property
-    def _logA(self):
-        """Log of the mixing weights."""
-        logA = np.broadcast_to(self.logA, self.shape).copy()
-        logA -= logsumexp(logA, axis=-1, keepdims=True)
-        return logA
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index c7677d8..e3cb560 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -51,6 +51,23 @@
                         )
 
 
+def flatten(dist):
+    """Convert a multivariate_normal to a list of scipy.stats.multivariate_normal"""
+    mean = np.broadcast_to(dist.mean, dist.shape + (dist.dim,)).reshape(-1, dist.dim)
+    if dist.diagonal_cov:
+        cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim,)).reshape(-1, dist.dim)
+    else:
+        cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim, dist.dim)).reshape(
+            -1, dist.dim, dist.dim
+        )
+
+    flat_dist = [
+        scipy_multivariate_normal(m, c, allow_singular=True)
+        for (m, c) in zip(mean, cov)
+    ]
+    return flat_dist
+
+
 class TestMultivariateNormal(object):
     cls = multivariate_normal
 
@@ -83,25 +100,9 @@ def test_logpdf(self, dim, shape, mean_shape, cov_shape, diagonal_cov, size):
         logpdf = dist.logpdf(x)
         assert logpdf.shape == size + dist.shape
 
-        mean = np.broadcast_to(dist.mean, dist.shape + (dist.dim,)).reshape(
-            -1, dist.dim
-        )
-        if dist.diagonal_cov:
-            cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim,)).reshape(
-                -1, dist.dim
-            )
-        else:
-            cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim, dist.dim)).reshape(
-                -1, dist.dim, dist.dim
-            )
-
-        flat_dist = [
-            scipy_multivariate_normal(m, c, allow_singular=True)
-            for (m, c) in zip(mean, cov)
-        ]
+        flat_dist = flatten(dist)
         flat_logpdf = np.array([d.logpdf(x) for d in flat_dist])
         flat_logpdf = np.moveaxis(flat_logpdf, 0, -1).reshape(logpdf.shape)
-
         assert_allclose(logpdf, flat_logpdf)
 
     @pytest.mark.parametrize("size", sizes)
@@ -297,7 +298,7 @@ def test_logpdf(
 
     @pytest.mark.parametrize("size", sizes)
     @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
-    def test_rvs(
+    def test_rvs_shape(
         self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov, size
     ):
         dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
@@ -334,6 +335,7 @@ def test_rvs(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov):
             ]
         )
         rvs = np.moveaxis(rvs, -2, 0).reshape(-1, size, dim)
+
         for a, b in zip(rvs, rvs_):
             for i in range(dim):
                 if dim == 1:

From 7508774d477323c6e3580548fbecfe5f9e8aebf6 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sun, 28 Jan 2024 11:30:04 +0000
Subject: [PATCH 044/117] stats now passing

---
 lsbi/model_1.py       |  23 +++---
 lsbi/stats_1.py       |  57 +++++++++++++
 tests/test_model_1.py | 187 +++++++++++++++++++++++++++++++++++++++++-
 tests/test_stats_1.py |  20 +++--
 4 files changed, 266 insertions(+), 21 deletions(-)

diff --git a/lsbi/model_1.py b/lsbi/model_1.py
index 1fa76c0..77388a2 100644
--- a/lsbi/model_1.py
+++ b/lsbi/model_1.py
@@ -6,6 +6,13 @@
 from lsbi.utils import logdet, matrix
 
 
+def _de_diagonalise(x, diagonal, *args):
+    if diagonal:
+        return np.atleast_1d(x)[..., None, :] * np.eye(*args)
+    else:
+        return x
+
+
 class LinearModel(object):
     """A multilinear model.
 
@@ -227,9 +234,9 @@ def joint(self):
         a = np.broadcast_to(evidence.mean, self.shape + (self.d,))
         b = np.broadcast_to(prior.mean, self.shape + (self.n,))
         mu = np.block([a, b])
-        A = self._de_diagonalise(evidence.cov, evidence.diagonal_cov, self.d)
+        A = _de_diagonalise(evidence.cov, evidence.diagonal_cov, self.d)
         A = np.broadcast_to(A, self.shape + (self.d, self.d))
-        D = self._de_diagonalise(prior.cov, prior.diagonal_cov, self.n)
+        D = _de_diagonalise(prior.cov, prior.diagonal_cov, self.n)
         D = np.broadcast_to(D, self.shape + (self.n, self.n))
         B = np.einsum("...ja,...al->...jl", self._M, self._Sigma)
         B = np.broadcast_to(B, self.shape + (self.d, self.n))
@@ -239,21 +246,15 @@ def joint(self):
 
     @property
     def _M(self):
-        return self._de_diagonalise(self.M, self.diagonal_M, self.d, self.n)
+        return _de_diagonalise(self.M, self.diagonal_M, self.d, self.n)
 
     @property
     def _C(self):
-        return self._de_diagonalise(self.C, self.diagonal_C, self.d)
+        return _de_diagonalise(self.C, self.diagonal_C, self.d)
 
     @property
     def _Sigma(self):
-        return self._de_diagonalise(self.Sigma, self.diagonal_Sigma, self.n)
-
-    def _de_diagonalise(self, x, diagonal, *args):
-        if diagonal:
-            return np.atleast_1d(x)[..., None, :] * np.eye(*args)
-        else:
-            return x
+        return _de_diagonalise(self.Sigma, self.diagonal_Sigma, self.n)
 
 
 class MixtureModel(LinearModel):
diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index 3a79c79..d4d00b0 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -95,6 +95,21 @@ def logpdf(self, x):
             norm = -logdet(2 * np.pi * self.cov) / 2
         return norm - chi2 / 2
 
+    def pdf(self, x):
+        """Probability density function.
+
+        Parameters
+        ----------
+        x : array_like, shape (*size, dim)
+            Points at which to evaluate the probability density function.
+
+        Returns
+        -------
+        pdf : array_like, shape (*size, *shape)
+            Probability density function evaluated at x.
+        """
+        return np.exp(self.logpdf(x))
+
     def rvs(self, size=()):
         """Draw random samples from the distribution.
 
@@ -271,6 +286,43 @@ def bijector(self, x, inverse=False):
                 L = cholesky(self.cov)
                 return mean + np.einsum("...jk,...k->...j", L, y)
 
+    def __getitem__(self, arg):
+        """Access a subset of the distributions.
+
+        Parameters
+        ----------
+        arg : int or slice or tuple of ints or tuples
+            Indices to access.
+
+        Returns
+        -------
+        dist : distribution
+            A subset of the distribution
+
+        Examples
+        --------
+        >>> dist = multivariate_normal(shape=(2, 3), dim=4)
+        >>> dist.shape
+        (2, 3)
+        >>> dist.dim
+        4
+        >>> dist[0].shape
+        (3,)
+        >>> dist[0, 0].shape
+        ()
+        >>> dist[:, 0].shape
+        (2,)
+        """
+        dist = deepcopy(self)
+        dist.mean = np.broadcast_to(self.mean, (*self.shape, self.dim))[arg]
+        if self.diagonal_cov:
+            dist.cov = np.broadcast_to(self.cov, (*self.shape, self.dim))[arg]
+        else:
+            dist.cov = np.broadcast_to(self.cov, (*self.shape, self.dim, self.dim))[arg]
+        dist._shape = dist.mean.shape[:-1]
+        dist._dim = dist.mean.shape[-1]
+        return dist
+
 
 class mixture_normal(multivariate_normal):
     """Mixture of multivariate normal distributions.
@@ -472,3 +524,8 @@ def f(t):
             return x
         else:
             return theta
+
+    def __getitem__(self, arg):  # noqa: D105
+        dist = super().__getitem__(arg)
+        dist.logA = np.broadcast_to(self.logA, self.shape)[arg]
+        return dist
diff --git a/tests/test_model_1.py b/tests/test_model_1.py
index 53ea160..159a48c 100644
--- a/tests/test_model_1.py
+++ b/tests/test_model_1.py
@@ -1,7 +1,8 @@
 import numpy as np
 import pytest
+from numpy.testing import assert_allclose
 
-from lsbi.model_1 import LinearModel, MixtureModel
+from lsbi.model_1 import LinearModel, MixtureModel, _de_diagonalise
 
 shapes = [(2, 3), (3,), ()]
 dims = [1, 2, 4]
@@ -274,6 +275,96 @@ def test_joint(
         assert dist.shape == model.shape
         assert dist.dim == model.n + model.d
 
+    def test_marginal_conditional(
+        self,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
+
+        i = np.arange(d + n)[-n:]
+        model_1 = model.evidence()
+        model_2 = model.joint().marginalise(i)
+        assert_allclose(model_1.mean, model_2.mean)
+        assert_allclose(model_1.cov, model_2.cov)
+
+        theta = model.prior().rvs()
+        model_1 = model.likelihood(theta)
+        model_2 = model.joint().condition(i, theta)
+        assert_allclose(model_1.mean, model_2.mean)
+        assert_allclose(model_1.cov, model_2.cov)
+
+        i = np.arange(d + n)[:d]
+        model_1 = model.prior()
+        model_2 = model.joint().marginalise(i)
+        assert_allclose(model_1.mean, model_2.mean)
+        assert_allclose(
+            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim), model_2.cov
+        )
+
+        D = model.evidence().rvs()
+        model_1 = model.posterior(D)
+        model_2 = model.joint().condition(i, D)
+        assert_allclose(model_1.mean, model_2.mean)
+        assert_allclose(
+            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim), model_2.cov
+        )
+
+    def test_bayes_theorem(
+        self,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
+        theta = model.prior().rvs()
+        D = model.evidence().rvs()
+        assert_allclose(
+            model.posterior(D).logpdf(theta) + model.evidence().logpdf(D),
+            model.likelihood(theta).logpdf(D) + model.prior().logpdf(theta),
+        )
+
 
 @pytest.mark.parametrize("logA_shape", shapes)
 class TestMixtureModel(TestLinearModel):
@@ -495,3 +586,97 @@ def test_joint(
         dist = model.joint()
         assert dist.shape == model.shape
         assert dist.dim == model.n + model.d
+
+    def test_marginal_conditional(
+        self,
+        logA_shape,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            logA_shape,
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
+
+        i = np.arange(d + n)[-n:]
+        model_1 = model.evidence()
+        model_2 = model.joint().marginalise(i)
+        assert_allclose(model_1.mean, model_2.mean)
+        assert_allclose(model_1.cov, model_2.cov)
+
+        theta = model.prior().rvs()
+        model_1 = model.likelihood(theta)
+        model_2 = model.joint().condition(i, theta)
+        assert_allclose(model_1.mean, model_2.mean)
+        assert_allclose(model_1.cov, model_2.cov)
+
+        i = np.arange(d + n)[:d]
+        model_1 = model.prior()
+        model_2 = model.joint().marginalise(i)
+        assert_allclose(model_1.mean, model_2.mean)
+        assert_allclose(
+            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim), model_2.cov
+        )
+
+        D = model.evidence().rvs()
+        model_1 = model.posterior(D)
+        model_2 = model.joint().condition(i, D)
+        assert_allclose(model_1.mean, model_2.mean)
+        assert_allclose(
+            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim), model_2.cov
+        )
+
+    def test_bayes_theorem(
+        self,
+        logA_shape,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            logA_shape,
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
+        theta = model.prior().rvs()
+        D = model.evidence().rvs()
+        assert_allclose(
+            model.posterior(D).logpdf(theta) + model.evidence().logpdf(D),
+            model.likelihood(theta).logpdf(D) + model.prior().logpdf(theta),
+        )
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index e3cb560..34dcbcf 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -121,9 +121,7 @@ def test_rvs(self, dim, shape, mean_shape, cov_shape, diagonal_cov, size):
                 np.linalg.inv(dist.cov),
                 rvs - dist.mean,
             ).flatten()
-        assert (
-            scipy.stats.kstest(chi2, scipy.stats.chi2(df=dist.dim).cdf).pvalue > 1e-10
-        )
+        assert scipy.stats.kstest(chi2, scipy.stats.chi2(df=dist.dim).cdf).pvalue > 1e-5
 
     @pytest.mark.parametrize(
         "dim, shape, mean_shape, cov_shape, diagonal_cov, A_shape, diagonal_A, b_shape, k",
@@ -239,8 +237,9 @@ def test_bijector(self, dim, shape, mean_shape, cov_shape, diagonal_cov, x_shape
 
         assert x.shape == np.broadcast_shapes(dist.shape + (dim,), x.shape)
 
-        # TODO Sort out broadcasting
-        # assert_allclose(y, dist.bijector(x))
+        x = np.random.rand(*x_shape, dim)
+        y = dist.bijector(x)
+        assert_allclose(np.broadcast_to(x, y.shape), dist.bijector(y, inverse=True))
 
 
 @pytest.mark.parametrize("logA_shape", shapes)
@@ -339,9 +338,9 @@ def test_rvs(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov):
         for a, b in zip(rvs, rvs_):
             for i in range(dim):
                 if dim == 1:
-                    assert scipy.stats.kstest(a[:, i], b).pvalue > 1e-10
+                    assert scipy.stats.kstest(a[:, i], b).pvalue > 1e-5
                 else:
-                    assert scipy.stats.kstest(a[:, i], b[:, i]).pvalue > 1e-10
+                    assert scipy.stats.kstest(a[:, i], b[:, i]).pvalue > 1e-5
 
     @pytest.mark.parametrize(
         "dim, shape, mean_shape, cov_shape, diagonal_cov, A_shape, diagonal_A, b_shape, k",
@@ -475,5 +474,8 @@ def test_bijector(
         x = dist.bijector(y, inverse=True)
         assert x.shape == np.broadcast_shapes(y.shape, dist.shape[:-1] + (dim,))
 
-        # TODO Sort out broadcasting
-        # assert_allclose(y, dist.bijector(x))
+        x = np.random.rand(*x_shape[:-1], dim)
+        y = dist.bijector(x)
+        assert_allclose(
+            np.broadcast_to(x, y.shape), dist.bijector(y, inverse=True), atol=1e-4
+        )

From 263b9530241c19039e310854142de84fdf7420cb Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sat, 3 Feb 2024 00:36:38 +0000
Subject: [PATCH 045/117] rvs now testing and fast

---
 tests/test_stats_1.py | 47 +++++++++++++++++++++++++++++--------------
 1 file changed, 32 insertions(+), 15 deletions(-)

diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index 34dcbcf..84d581d 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -107,21 +107,41 @@ def test_logpdf(self, dim, shape, mean_shape, cov_shape, diagonal_cov, size):
 
     @pytest.mark.parametrize("size", sizes)
     @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
-    def test_rvs(self, dim, shape, mean_shape, cov_shape, diagonal_cov, size):
+    def test_rvs_shape(self, dim, shape, mean_shape, cov_shape, diagonal_cov, size):
         dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
         rvs = dist.rvs(size)
         assert rvs.shape == size + dist.shape + (dim,)
 
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
+    def test_rvs(self, dim, shape, mean_shape, cov_shape, diagonal_cov):
+        size = 100
+        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
+        rvs = dist.rvs(size)
+
+        mean = np.broadcast_to(dist.mean, dist.shape + (dist.dim,)).reshape(
+            -1, dist.dim
+        )
         if dist.diagonal_cov:
-            chi2 = ((rvs - dist.mean) ** 2 / dist.cov).sum(axis=-1).flatten()
+            cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim,)).reshape(
+                -1, dist.dim
+            )
         else:
-            chi2 = np.einsum(
-                "...j,...jk,...k->...",
-                rvs - dist.mean,
-                np.linalg.inv(dist.cov),
-                rvs - dist.mean,
-            ).flatten()
-        assert scipy.stats.kstest(chi2, scipy.stats.chi2(df=dist.dim).cdf).pvalue > 1e-5
+            cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim, dist.dim)).reshape(
+                -1, dist.dim, dist.dim
+            )
+
+        rvs_ = np.array(
+            [
+                scipy_multivariate_normal(ms, cs, allow_singular=True).rvs(size)
+                for ms, cs in zip(mean, cov)
+            ]
+        ).reshape(-1, size, dim)
+
+        rvs = np.moveaxis(rvs.reshape(size, -1, dim), 1, 0)
+
+        for a, b in zip(rvs, rvs_):
+            for i in range(dim):
+                assert scipy.stats.kstest(a[:, i], b[:, i]).pvalue > 1e-5
 
     @pytest.mark.parametrize(
         "dim, shape, mean_shape, cov_shape, diagonal_cov, A_shape, diagonal_A, b_shape, k",
@@ -306,7 +326,7 @@ def test_rvs_shape(
 
     @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
     def test_rvs(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov):
-        size = 1000
+        size = 100
         dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
         rvs = dist.rvs(size)
         logA = np.broadcast_to(dist.logA, dist.shape).reshape(-1, dist.k).copy()
@@ -332,15 +352,12 @@ def test_rvs(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov):
                 ]
                 for ms, cs, ps in zip(mean, cov, p)
             ]
-        )
+        ).reshape(-1, size, dim)
         rvs = np.moveaxis(rvs, -2, 0).reshape(-1, size, dim)
 
         for a, b in zip(rvs, rvs_):
             for i in range(dim):
-                if dim == 1:
-                    assert scipy.stats.kstest(a[:, i], b).pvalue > 1e-5
-                else:
-                    assert scipy.stats.kstest(a[:, i], b[:, i]).pvalue > 1e-5
+                assert scipy.stats.kstest(a[:, i], b[:, i]).pvalue > 1e-5
 
     @pytest.mark.parametrize(
         "dim, shape, mean_shape, cov_shape, diagonal_cov, A_shape, diagonal_A, b_shape, k",

From c68e8e781d6af1668287092baee3857bac0faa55 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sat, 3 Feb 2024 00:37:45 +0000
Subject: [PATCH 046/117] Lowered pvalue

---
 tests/test_stats_1.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
index 84d581d..c340d67 100644
--- a/tests/test_stats_1.py
+++ b/tests/test_stats_1.py
@@ -10,6 +10,7 @@
 shapes = [(2, 3), (3,), ()]
 sizes = [(6, 5), (5,), ()]
 dims = [1, 2, 4]
+pvalue = 1e-7
 
 tests = []
 A_tests = []
@@ -141,7 +142,7 @@ def test_rvs(self, dim, shape, mean_shape, cov_shape, diagonal_cov):
 
         for a, b in zip(rvs, rvs_):
             for i in range(dim):
-                assert scipy.stats.kstest(a[:, i], b[:, i]).pvalue > 1e-5
+                assert scipy.stats.kstest(a[:, i], b[:, i]).pvalue > pvalue
 
     @pytest.mark.parametrize(
         "dim, shape, mean_shape, cov_shape, diagonal_cov, A_shape, diagonal_A, b_shape, k",
@@ -357,7 +358,7 @@ def test_rvs(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov):
 
         for a, b in zip(rvs, rvs_):
             for i in range(dim):
-                assert scipy.stats.kstest(a[:, i], b[:, i]).pvalue > 1e-5
+                assert scipy.stats.kstest(a[:, i], b[:, i]).pvalue > pvalue
 
     @pytest.mark.parametrize(
         "dim, shape, mean_shape, cov_shape, diagonal_cov, A_shape, diagonal_A, b_shape, k",

From ced80f6e1195e8556ba53512377b1f8516a8769a Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sat, 3 Feb 2024 22:59:01 +0000
Subject: [PATCH 047/117] Added a broadcast argument to logpdf

---
 lsbi/stats_1.py       | 20 ++++++++++++++------
 tests/test_model_1.py | 14 ++++++++++----
 2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index d4d00b0..6a82244 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -69,7 +69,7 @@ def dim(self):
             ]
         )
 
-    def logpdf(self, x):
+    def logpdf(self, x, broadcast=False):
         """Log of the probability density function.
 
         Parameters
@@ -77,6 +77,8 @@ def logpdf(self, x):
         x : array_like, shape (*size, dim)
             Points at which to evaluate the log of the probability density
             function.
+        broadcast : bool, optional, default=False
+            If True, broadcast x across the distribution parameters.
 
         Returns
         -------
@@ -84,9 +86,12 @@ def logpdf(self, x):
             Log of the probability density function evaluated at x.
         """
         x = np.array(x)
-        size = x.shape[:-1]
-        mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
-        dx = x.reshape(*size, *np.ones_like(self.shape), self.dim) - mean
+        if broadcast:
+            dx = x - self.mean
+        else:
+            size = x.shape[:-1]
+            mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
+            dx = x.reshape(*size, *np.ones_like(self.shape), self.dim) - mean
         if self.diagonal_cov:
             chi2 = (dx**2 / self.cov).sum(axis=-1)
             norm = -np.log(2 * np.pi * np.ones(self.dim) * self.cov).sum(axis=-1) / 2
@@ -368,7 +373,7 @@ def k(self):
             return 1
         return self.shape[-1]
 
-    def logpdf(self, x):
+    def logpdf(self, x, broadcast=False):
         """Log of the probability density function.
 
         Parameters
@@ -377,12 +382,15 @@ def logpdf(self, x):
             Points at which to evaluate the log of the probability density
             function.
 
+        broadcast : bool, optional, default=False
+            If True, broadcast x across the distribution parameters.
+
         Returns
         -------
         logpdf : array_like, shape (*size, *shape[:-1])
             Log of the probability density function evaluated at x.
         """
-        logpdf = super().logpdf(x)
+        logpdf = super().logpdf(x, broadcast=broadcast)
         if self.shape == ():
             return logpdf
         logA = np.broadcast_to(self.logA, self.shape).copy()
diff --git a/tests/test_model_1.py b/tests/test_model_1.py
index 159a48c..3ba3323 100644
--- a/tests/test_model_1.py
+++ b/tests/test_model_1.py
@@ -307,13 +307,17 @@ def test_marginal_conditional(
         model_1 = model.evidence()
         model_2 = model.joint().marginalise(i)
         assert_allclose(model_1.mean, model_2.mean)
-        assert_allclose(model_1.cov, model_2.cov)
+        assert_allclose(
+            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim), model_2.cov
+        )
 
         theta = model.prior().rvs()
         model_1 = model.likelihood(theta)
         model_2 = model.joint().condition(i, theta)
         assert_allclose(model_1.mean, model_2.mean)
-        assert_allclose(model_1.cov, model_2.cov)
+        assert_allclose(
+            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim), model_2.cov
+        )
 
         i = np.arange(d + n)[:d]
         model_1 = model.prior()
@@ -361,8 +365,10 @@ def test_bayes_theorem(
         theta = model.prior().rvs()
         D = model.evidence().rvs()
         assert_allclose(
-            model.posterior(D).logpdf(theta) + model.evidence().logpdf(D),
-            model.likelihood(theta).logpdf(D) + model.prior().logpdf(theta),
+            model.posterior(D).logpdf(theta, broadcast=True)
+            + model.evidence().logpdf(D, broadcast=True),
+            model.likelihood(theta).logpdf(D, broadcast=True)
+            + model.prior().logpdf(theta, broadcast=True),
         )
 
 

From f057566db786dacb47d0e8e43b3db5e5df59e2ec Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sat, 3 Feb 2024 23:51:18 +0000
Subject: [PATCH 048/117] Broadcasting now working

---
 lsbi/model_1.py       |  10 +++--
 lsbi/stats_1.py       |  32 ++++---------
 tests/test_model_1.py | 101 ++++++++++++++++++++++++++++++++++--------
 3 files changed, 97 insertions(+), 46 deletions(-)

diff --git a/lsbi/model_1.py b/lsbi/model_1.py
index 77388a2..1567f1d 100644
--- a/lsbi/model_1.py
+++ b/lsbi/model_1.py
@@ -334,9 +334,10 @@ def likelihood(self, theta):
         ----------
         theta : array_like, shape (n,)
         """
-        dist = super(self.__class__, self).likelihood(theta[..., None, :])
+        theta = np.array(theta)[..., None, :]
+        dist = super().likelihood(theta)
         dist.__class__ = mixture_normal
-        dist.logA = self.prior()._logA(theta)
+        dist.logA = self.prior().logpdf(theta, broadcast=True, joint=True)
         return dist
 
     def prior(self):
@@ -362,9 +363,10 @@ def posterior(self, D):
         ----------
         D : array_like, shape (d,)
         """
-        dist = super().posterior(D[..., None, :])
+        D = np.array(D)[..., None, :]
+        dist = super().posterior(D)
         dist.__class__ = mixture_normal
-        dist.logA = self.evidence()._logA(D)
+        dist.logA = self.evidence().logpdf(D, broadcast=True, joint=True)
         return dist
 
     def evidence(self):
diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index 6a82244..fd50d91 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -373,7 +373,7 @@ def k(self):
             return 1
         return self.shape[-1]
 
-    def logpdf(self, x, broadcast=False):
+    def logpdf(self, x, broadcast=False, joint=False):
         """Log of the probability density function.
 
         Parameters
@@ -395,6 +395,8 @@ def logpdf(self, x, broadcast=False):
             return logpdf
         logA = np.broadcast_to(self.logA, self.shape).copy()
         logA -= logsumexp(logA, axis=-1, keepdims=True)
+        if joint:
+            return logpdf + logA
         return logsumexp(logpdf + logA, axis=-1)
 
     def rvs(self, size=()):
@@ -447,31 +449,13 @@ def condition(self, indices, values):
         -------
         conditioned distribution, shape (*shape, len(indices))
         """
-        dist = super().condition(indices, values[..., None, :])
-        dist.logA = self.marginalise(self._bar(indices))._logA(values)
+        values = np.array(values)[..., None, :]
+        dist = super().condition(indices, values)
+        dist.__class__ = mixture_normal
+        marg = self.marginalise(self._bar(indices))
+        dist.logA = marg.logpdf(values, broadcast=True, joint=True)
         return dist
 
-    def _logA(self, values):
-        """Compute the conditional weights of the mixture.
-
-        Parameters
-        ----------
-        values : array_like shape (..., dim)
-            Values to condition on.
-
-        where self.shape[:-1] is broadcastable to ...
-
-        Returns
-        -------
-        _logA : array_like shape (*shape, n)
-        """
-        copy = deepcopy(self)
-        copy.mean = copy.mean - values[..., None, :]
-        logA = super(copy.__class__, copy).logpdf(np.zeros(copy.dim))
-        logA -= logsumexp(logA, axis=-1)[..., None]
-        logA += self.logA
-        return logA
-
     def bijector(self, x, inverse=False):
         """Bijector between U([0, 1])^d and the distribution.
 
diff --git a/tests/test_model_1.py b/tests/test_model_1.py
index 3ba3323..a61214d 100644
--- a/tests/test_model_1.py
+++ b/tests/test_model_1.py
@@ -302,37 +302,66 @@ def test_marginal_conditional(
             n,
             d,
         )
+        atol = 1e-5
 
         i = np.arange(d + n)[-n:]
         model_1 = model.evidence()
         model_2 = model.joint().marginalise(i)
-        assert_allclose(model_1.mean, model_2.mean)
         assert_allclose(
-            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim), model_2.cov
+            np.broadcast_to(model_1.mean, model_2.mean.shape), model_2.mean, atol=atol
+        )
+        assert_allclose(
+            np.broadcast_to(
+                _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
+                model_2.cov.shape,
+            ),
+            model_2.cov,
+            atol=atol,
         )
 
         theta = model.prior().rvs()
         model_1 = model.likelihood(theta)
         model_2 = model.joint().condition(i, theta)
-        assert_allclose(model_1.mean, model_2.mean)
         assert_allclose(
-            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim), model_2.cov
+            np.broadcast_to(model_1.mean, model_2.mean.shape), model_2.mean, atol=atol
+        )
+        assert_allclose(
+            np.broadcast_to(
+                _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
+                model_2.cov.shape,
+            ),
+            model_2.cov,
+            atol=atol,
         )
 
         i = np.arange(d + n)[:d]
         model_1 = model.prior()
         model_2 = model.joint().marginalise(i)
-        assert_allclose(model_1.mean, model_2.mean)
         assert_allclose(
-            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim), model_2.cov
+            np.broadcast_to(model_1.mean, model_2.mean.shape), model_2.mean, atol=atol
+        )
+        assert_allclose(
+            np.broadcast_to(
+                _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
+                model_2.cov.shape,
+            ),
+            model_2.cov,
+            atol=atol,
         )
 
         D = model.evidence().rvs()
         model_1 = model.posterior(D)
         model_2 = model.joint().condition(i, D)
-        assert_allclose(model_1.mean, model_2.mean)
         assert_allclose(
-            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim), model_2.cov
+            np.broadcast_to(model_1.mean, model_2.mean.shape), model_2.mean, atol=atol
+        )
+        assert_allclose(
+            np.broadcast_to(
+                _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
+                model_2.cov.shape,
+            ),
+            model_2.cov,
+            atol=atol,
         )
 
     def test_bayes_theorem(
@@ -623,32 +652,66 @@ def test_marginal_conditional(
             d,
         )
 
+        atol = 1e-5
+
         i = np.arange(d + n)[-n:]
         model_1 = model.evidence()
         model_2 = model.joint().marginalise(i)
-        assert_allclose(model_1.mean, model_2.mean)
-        assert_allclose(model_1.cov, model_2.cov)
+        assert_allclose(
+            np.broadcast_to(model_1.mean, model_2.mean.shape), model_2.mean, atol=atol
+        )
+        assert_allclose(
+            np.broadcast_to(
+                _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
+                model_2.cov.shape,
+            ),
+            model_2.cov,
+            atol=atol,
+        )
 
         theta = model.prior().rvs()
         model_1 = model.likelihood(theta)
         model_2 = model.joint().condition(i, theta)
-        assert_allclose(model_1.mean, model_2.mean)
-        assert_allclose(model_1.cov, model_2.cov)
+        assert_allclose(
+            np.broadcast_to(model_1.mean, model_2.mean.shape), model_2.mean, atol=atol
+        )
+        assert_allclose(
+            np.broadcast_to(
+                _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
+                model_2.cov.shape,
+            ),
+            model_2.cov,
+            atol=atol,
+        )
 
         i = np.arange(d + n)[:d]
         model_1 = model.prior()
         model_2 = model.joint().marginalise(i)
-        assert_allclose(model_1.mean, model_2.mean)
         assert_allclose(
-            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim), model_2.cov
+            np.broadcast_to(model_1.mean, model_2.mean.shape), model_2.mean, atol=atol
+        )
+        assert_allclose(
+            np.broadcast_to(
+                _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
+                model_2.cov.shape,
+            ),
+            model_2.cov,
+            atol=atol,
         )
 
         D = model.evidence().rvs()
         model_1 = model.posterior(D)
         model_2 = model.joint().condition(i, D)
-        assert_allclose(model_1.mean, model_2.mean)
         assert_allclose(
-            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim), model_2.cov
+            np.broadcast_to(model_1.mean, model_2.mean.shape), model_2.mean, atol=atol
+        )
+        assert_allclose(
+            np.broadcast_to(
+                _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
+                model_2.cov.shape,
+            ),
+            model_2.cov,
+            atol=atol,
         )
 
     def test_bayes_theorem(
@@ -683,6 +746,8 @@ def test_bayes_theorem(
         theta = model.prior().rvs()
         D = model.evidence().rvs()
         assert_allclose(
-            model.posterior(D).logpdf(theta) + model.evidence().logpdf(D),
-            model.likelihood(theta).logpdf(D) + model.prior().logpdf(theta),
+            model.posterior(D).logpdf(theta, broadcast=True)
+            + model.evidence().logpdf(D, broadcast=True),
+            model.likelihood(theta).logpdf(D, broadcast=True)
+            + model.prior().logpdf(theta, broadcast=True),
         )

From 24f2ad513af412bb8415b9094042407b4683ce6a Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Tue, 6 Feb 2024 17:47:08 +0000
Subject: [PATCH 049/117] Test suite now passing

---
 lsbi/model_1.py       |   6 +-
 lsbi/stats_1.py       |   5 +-
 lsbi/utils.py         |   7 +-
 tests/test_model_1.py | 169 ++++++----
 tests/test_stats.py   | 750 +++++++++++++++++++++---------------------
 5 files changed, 489 insertions(+), 448 deletions(-)

diff --git a/lsbi/model_1.py b/lsbi/model_1.py
index 1567f1d..c39b624 100644
--- a/lsbi/model_1.py
+++ b/lsbi/model_1.py
@@ -334,8 +334,7 @@ def likelihood(self, theta):
         ----------
         theta : array_like, shape (n,)
         """
-        theta = np.array(theta)[..., None, :]
-        dist = super().likelihood(theta)
+        dist = super().likelihood(np.expand_dims(theta, -2))
         dist.__class__ = mixture_normal
         dist.logA = self.prior().logpdf(theta, broadcast=True, joint=True)
         return dist
@@ -363,8 +362,7 @@ def posterior(self, D):
         ----------
         D : array_like, shape (d,)
         """
-        D = np.array(D)[..., None, :]
-        dist = super().posterior(D)
+        dist = super().posterior(np.expand_dims(D, -2))
         dist.__class__ = mixture_normal
         dist.logA = self.evidence().logpdf(D, broadcast=True, joint=True)
         return dist
diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
index fd50d91..71ec517 100644
--- a/lsbi/stats_1.py
+++ b/lsbi/stats_1.py
@@ -390,6 +390,8 @@ def logpdf(self, x, broadcast=False, joint=False):
         logpdf : array_like, shape (*size, *shape[:-1])
             Log of the probability density function evaluated at x.
         """
+        if broadcast:
+            x = np.expand_dims(x, -2)
         logpdf = super().logpdf(x, broadcast=broadcast)
         if self.shape == ():
             return logpdf
@@ -449,8 +451,7 @@ def condition(self, indices, values):
         -------
         conditioned distribution, shape (*shape, len(indices))
         """
-        values = np.array(values)[..., None, :]
-        dist = super().condition(indices, values)
+        dist = super().condition(indices, np.expand_dims(values, -2))
         dist.__class__ = mixture_normal
         marg = self.marginalise(self._bar(indices))
         dist.logA = marg.logpdf(values, broadcast=True, joint=True)
diff --git a/lsbi/utils.py b/lsbi/utils.py
index 514149d..082d1ca 100644
--- a/lsbi/utils.py
+++ b/lsbi/utils.py
@@ -2,9 +2,12 @@
 import numpy as np
 
 
-def logdet(A):
+def logdet(A, diag=False):
     """log(abs(det(A)))."""
-    return np.linalg.slogdet(A)[1]
+    if diag:
+        return np.sum(np.log(np.abs(A)), axis=-1)
+    else:
+        return np.linalg.slogdet(A)[1]
 
 
 def quantise(f, x, tol=1e-8):
diff --git a/tests/test_model_1.py b/tests/test_model_1.py
index a61214d..d3b937a 100644
--- a/tests/test_model_1.py
+++ b/tests/test_model_1.py
@@ -2,7 +2,21 @@
 import pytest
 from numpy.testing import assert_allclose
 
-from lsbi.model_1 import LinearModel, MixtureModel, _de_diagonalise
+
+def assert_allclose_broadcast(a, b, *args, **kwargs):
+    shape = np.broadcast_shapes(np.shape(a), np.shape(b))
+    return assert_allclose(
+        np.broadcast_to(a, shape), np.broadcast_to(b, shape), *args, **kwargs
+    )
+
+
+from lsbi.model_1 import (
+    LinearModel,
+    MixtureModel,
+    ReducedLinearModel,
+    ReducedLinearModelUniformPrior,
+    _de_diagonalise,
+)
 
 shapes = [(2, 3), (3,), ()]
 dims = [1, 2, 4]
@@ -307,14 +321,9 @@ def test_marginal_conditional(
         i = np.arange(d + n)[-n:]
         model_1 = model.evidence()
         model_2 = model.joint().marginalise(i)
-        assert_allclose(
-            np.broadcast_to(model_1.mean, model_2.mean.shape), model_2.mean, atol=atol
-        )
-        assert_allclose(
-            np.broadcast_to(
-                _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
-                model_2.cov.shape,
-            ),
+        assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
+        assert_allclose_broadcast(
+            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
             model_2.cov,
             atol=atol,
         )
@@ -322,14 +331,9 @@ def test_marginal_conditional(
         theta = model.prior().rvs()
         model_1 = model.likelihood(theta)
         model_2 = model.joint().condition(i, theta)
-        assert_allclose(
-            np.broadcast_to(model_1.mean, model_2.mean.shape), model_2.mean, atol=atol
-        )
-        assert_allclose(
-            np.broadcast_to(
-                _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
-                model_2.cov.shape,
-            ),
+        assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
+        assert_allclose_broadcast(
+            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
             model_2.cov,
             atol=atol,
         )
@@ -337,14 +341,9 @@ def test_marginal_conditional(
         i = np.arange(d + n)[:d]
         model_1 = model.prior()
         model_2 = model.joint().marginalise(i)
-        assert_allclose(
-            np.broadcast_to(model_1.mean, model_2.mean.shape), model_2.mean, atol=atol
-        )
-        assert_allclose(
-            np.broadcast_to(
-                _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
-                model_2.cov.shape,
-            ),
+        assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
+        assert_allclose_broadcast(
+            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
             model_2.cov,
             atol=atol,
         )
@@ -352,14 +351,9 @@ def test_marginal_conditional(
         D = model.evidence().rvs()
         model_1 = model.posterior(D)
         model_2 = model.joint().condition(i, D)
-        assert_allclose(
-            np.broadcast_to(model_1.mean, model_2.mean.shape), model_2.mean, atol=atol
-        )
-        assert_allclose(
-            np.broadcast_to(
-                _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
-                model_2.cov.shape,
-            ),
+        assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
+        assert_allclose_broadcast(
+            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
             model_2.cov,
             atol=atol,
         )
@@ -657,14 +651,9 @@ def test_marginal_conditional(
         i = np.arange(d + n)[-n:]
         model_1 = model.evidence()
         model_2 = model.joint().marginalise(i)
-        assert_allclose(
-            np.broadcast_to(model_1.mean, model_2.mean.shape), model_2.mean, atol=atol
-        )
-        assert_allclose(
-            np.broadcast_to(
-                _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
-                model_2.cov.shape,
-            ),
+        assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
+        assert_allclose_broadcast(
+            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
             model_2.cov,
             atol=atol,
         )
@@ -672,14 +661,9 @@ def test_marginal_conditional(
         theta = model.prior().rvs()
         model_1 = model.likelihood(theta)
         model_2 = model.joint().condition(i, theta)
-        assert_allclose(
-            np.broadcast_to(model_1.mean, model_2.mean.shape), model_2.mean, atol=atol
-        )
-        assert_allclose(
-            np.broadcast_to(
-                _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
-                model_2.cov.shape,
-            ),
+        assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
+        assert_allclose_broadcast(
+            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
             model_2.cov,
             atol=atol,
         )
@@ -687,14 +671,9 @@ def test_marginal_conditional(
         i = np.arange(d + n)[:d]
         model_1 = model.prior()
         model_2 = model.joint().marginalise(i)
-        assert_allclose(
-            np.broadcast_to(model_1.mean, model_2.mean.shape), model_2.mean, atol=atol
-        )
-        assert_allclose(
-            np.broadcast_to(
-                _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
-                model_2.cov.shape,
-            ),
+        assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
+        assert_allclose_broadcast(
+            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
             model_2.cov,
             atol=atol,
         )
@@ -702,14 +681,9 @@ def test_marginal_conditional(
         D = model.evidence().rvs()
         model_1 = model.posterior(D)
         model_2 = model.joint().condition(i, D)
-        assert_allclose(
-            np.broadcast_to(model_1.mean, model_2.mean.shape), model_2.mean, atol=atol
-        )
-        assert_allclose(
-            np.broadcast_to(
-                _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
-                model_2.cov.shape,
-            ),
+        assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
+        assert_allclose_broadcast(
+            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
             model_2.cov,
             atol=atol,
         )
@@ -751,3 +725,68 @@ def test_bayes_theorem(
             model.likelihood(theta).logpdf(D, broadcast=True)
             + model.prior().logpdf(theta, broadcast=True),
         )
+
+
+@pytest.mark.parametrize("n", np.arange(1, 6))
+class TestReducedLinearModel(object):
+    def random(self, n):
+        mu_pi = np.random.randn(n)
+        Sigma_pi = invwishart(scale=np.eye(n)).rvs()
+        mu_L = np.random.randn(n)
+        Sigma_L = invwishart(scale=np.eye(n)).rvs()
+        logLmax = np.random.randn()
+
+        return ReducedLinearModel(
+            mu_pi=mu_pi, Sigma_pi=Sigma_pi, logLmax=logLmax, mu_L=mu_L, Sigma_L=Sigma_L
+        )
+
+    def test_bayes_theorem(self, n):
+        model = self.random(n)
+        theta = model.prior().rvs()
+        assert_allclose(
+            model.logP(theta) + model.logZ(), model.logL(theta) + model.logpi(theta)
+        )
+
+
+@pytest.mark.parametrize("n", np.arange(1, 6))
+class TestReducedLinearModelUniformPrior(object):
+    def random(self, n):
+        mu_L = np.random.randn(n)
+        Sigma_L = invwishart(scale=np.eye(n)).rvs()
+        logLmax = np.random.randn()
+        logV = np.random.randn()
+
+        return ReducedLinearModelUniformPrior(
+            logLmax=logLmax, logV=logV, mu_L=mu_L, Sigma_L=Sigma_L
+        )
+
+    def test_model(self, n):
+        model = self.random(n)
+        theta = model.posterior().rvs(N)
+        assert_allclose(
+            model.logpi(theta) + model.logL(theta), model.logP(theta) + model.logZ()
+        )
+
+        logV = 50
+        Sigma_pi = np.exp(2 * logV / n) / (2 * np.pi) * np.eye(n)
+
+        reduced_model = ReducedLinearModel(
+            logLmax=model.logLmax,
+            mu_L=model.mu_L,
+            Sigma_L=model.Sigma_L,
+            Sigma_pi=Sigma_pi,
+        )
+
+        model = ReducedLinearModelUniformPrior(
+            logLmax=model.logLmax, mu_L=model.mu_L, Sigma_L=model.Sigma_L, logV=logV
+        )
+
+        assert_allclose(reduced_model.logZ(), model.logZ())
+        assert_allclose(reduced_model.DKL(), model.DKL())
+
+    def test_bayes_theorem(self, n):
+        model = self.random(n)
+        theta = model.posterior().rvs()
+        assert_allclose(
+            model.logP(theta) + model.logZ(), model.logL(theta) + model.logpi(theta)
+        )
diff --git a/tests/test_stats.py b/tests/test_stats.py
index 05e20ff..0bf83f6 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -1,375 +1,375 @@
-import numpy as np
-import pytest
-import scipy.special
-from numpy.testing import assert_allclose
-from scipy.stats import invwishart, kstest
-
-from lsbi.stats import (
-    mixture_multivariate_normal,
-    multimultivariate_normal,
-    multivariate_normal,
-)
-
-N = 1000
-
-
-@pytest.mark.parametrize("d", [1, 2, 5, 10])
-@pytest.mark.parametrize("k", [1, 2, 5, 10])
-class TestMixtureMultivariateNormal(object):
-    cls = mixture_multivariate_normal
-
-    def random(self, k, d):
-        means = np.random.randn(k, d)
-        covs = invwishart(scale=np.eye(d), df=d * 10).rvs(k)
-        if k == 1:
-            covs = np.array([covs])
-        logA = np.log(scipy.stats.dirichlet(np.ones(k)).rvs())[0] + 10
-        return self.cls(means, covs, logA)
-
-    def test_rvs(self, k, d):
-        dist = self.random(k, d)
-        logA = dist.logA
-        logA -= scipy.special.logsumexp(logA)
-        mvns = [
-            scipy.stats.multivariate_normal(dist.means[i], dist.covs[i])
-            for i in range(k)
-        ]
-
-        samples_1, logpdfs_1 = [], []
-        for _ in range(N):
-            i = np.random.choice(k, p=np.exp(logA))
-            x = mvns[i].rvs()
-            samples_1.append(x)
-            logpdf = scipy.special.logsumexp(
-                [mvns[j].logpdf(x) + logA[j] for j in range(k)]
-            )
-            assert_allclose(logpdf, dist.logpdf(x))
-            logpdfs_1.append(logpdf)
-        samples_1, logpdfs_1 = np.array(samples_1), np.array(logpdfs_1)
-
-        samples_2 = dist.rvs(N)
-        logpdfs_2 = dist.logpdf(samples_2)
-
-        for i in range(d):
-            if d == 1:
-                p = kstest(samples_1, samples_2).pvalue
-            else:
-                p = kstest(samples_1[:, i], samples_2[:, i]).pvalue
-            assert p > 1e-5
-
-        p = kstest(logpdfs_1, logpdfs_2).pvalue
-        assert p > 1e-5
-
-        for shape in [(d,), (3, d), (3, 4, d)]:
-            x = np.random.rand(*shape)
-            assert mvns[0].logpdf(x).shape == dist.logpdf(x).shape
-
-    def test_bijector(self, k, d):
-        dist = self.random(k, d)
-
-        # Test inversion
-        x = np.random.rand(N, d)
-        theta = dist.bijector(x)
-        assert_allclose(dist.bijector(theta, inverse=True), x, atol=1e-6)
-
-        # Test sampling
-        samples = dist.rvs(N)
-        for i in range(d):
-            if d == 1:
-                p = kstest(np.squeeze(theta), samples).pvalue
-            else:
-                p = kstest(theta[:, i], samples[:, i]).pvalue
-            assert p > 1e-5
-
-        p = kstest(dist.logpdf(samples), dist.logpdf(theta)).pvalue
-        assert p > 1e-5
-
-        # Test shapes
-        x = np.random.rand(d)
-        theta = dist.bijector(x)
-        assert theta.shape == x.shape
-        assert dist.bijector(theta, inverse=True).shape == x.shape
-
-        x = np.random.rand(3, 4, d)
-        theta = dist.bijector(x)
-        assert theta.shape == x.shape
-        assert dist.bijector(theta, inverse=True).shape == x.shape
-
-    @pytest.mark.parametrize("p", np.arange(1, 5))
-    def test_marginalise_condition(self, k, d, p):
-        if d <= p:
-            pytest.skip("d <= p")
-        i = np.random.choice(d, p, replace=False)
-        j = np.array([x for x in range(d) if x not in i])
-        dist = self.random(k, d)
-        mixture_2 = dist.marginalise(i)
-        assert isinstance(mixture_2, self.cls)
-        assert mixture_2.means.shape == (k, d - p)
-        assert mixture_2.covs.shape == (k, d - p, d - p)
-        assert_allclose(dist.means[:, j], mixture_2.means)
-        assert_allclose(dist.covs[:, j][:, :, j], mixture_2.covs)
-
-        v = np.random.randn(k, p)
-        mixture_3 = dist.condition(i, v)
-        assert isinstance(mixture_3, self.cls)
-        assert mixture_3.means.shape == (k, d - p)
-        assert mixture_3.covs.shape == (k, d - p, d - p)
-
-        v = np.random.randn(p)
-        mixture_3 = dist.condition(i, v)
-        assert mixture_3.means.shape == (k, d - p)
-        assert mixture_3.covs.shape == (k, d - p, d - p)
-
-    @pytest.mark.parametrize("q", [1, 2, 5, 10])
-    def test_predict(self, q, k, d):
-        dist = self.random(k, d)
-        A = np.random.randn(k, q, d)
-        y = dist.predict(A)
-        assert isinstance(y, self.cls)
-        assert y.means.shape == (k, q)
-        assert y.covs.shape == (k, q, q)
-
-        b = np.random.randn(q)
-        y = dist.predict(A, b)
-        assert isinstance(y, self.cls)
-        assert y.means.shape == (
-            k,
-            q,
-        )
-        assert y.covs.shape == (k, q, q)
-
-
-@pytest.mark.parametrize("d", [1, 2, 5, 10])
-class TestMultivariateNormal(object):
-    cls = multivariate_normal
-
-    def random(self, d):
-        mean = np.random.randn(d)
-        cov = invwishart(scale=np.eye(d), df=d * 10).rvs()
-        return self.cls(mean, cov)
-
-    def test_rvs(self, d):
-        dist = self.random(d)
-        mvn = scipy.stats.multivariate_normal(dist.mean, dist.cov)
-
-        samples_1 = mvn.rvs(N)
-        logpdfs_1 = mvn.logpdf(samples_1)
-        assert_allclose(logpdfs_1, dist.logpdf(samples_1))
-        samples_2 = dist.rvs(N)
-        logpdfs_2 = dist.logpdf(samples_2)
-
-        for i in range(d):
-            if d == 1:
-                p = kstest(samples_1, samples_2).pvalue
-            else:
-                p = kstest(samples_1[:, i], samples_2[:, i]).pvalue
-            assert p > 1e-5
-
-        p = kstest(logpdfs_1, logpdfs_2).pvalue
-        assert p > 1e-5
-
-        for shape in [(), (d,), (3, d), (3, 4, d)]:
-            x = np.random.rand(*shape)
-            assert mvn.logpdf(x).shape == dist.logpdf(x).shape
-
-    def test_bijector(self, d):
-        dist = self.random(d)
-        # Test inversion
-        x = np.random.rand(N, d)
-        theta = dist.bijector(x)
-        assert_allclose(dist.bijector(theta, inverse=True), x, atol=1e-6)
-
-        # Test sampling
-        samples = dist.rvs(N)
-        for i in range(d):
-            if d == 1:
-                p = kstest(np.squeeze(theta), samples).pvalue
-            else:
-                p = kstest(theta[:, i], samples[:, i]).pvalue
-            assert p > 1e-5
-
-        p = kstest(dist.logpdf(samples), dist.logpdf(theta)).pvalue
-        assert p > 1e-5
-
-        # Test shapes
-        x = np.random.rand(d)
-        theta = dist.bijector(x)
-        assert theta.shape == x.shape
-        assert dist.bijector(theta, inverse=True).shape == x.shape
-
-        x = np.random.rand(3, 4, d)
-        theta = dist.bijector(x)
-        assert theta.shape == x.shape
-        assert dist.bijector(theta, inverse=True).shape == x.shape
-
-    @pytest.mark.parametrize("p", np.arange(1, 5))
-    def test_marginalise_condition_multivariate_normal(self, d, p):
-        if d <= p:
-            pytest.skip("d <= p")
-        i = np.random.choice(d, p, replace=False)
-        j = np.array([x for x in range(d) if x not in i])
-        dist_1 = self.random(d)
-        dist_2 = dist_1.marginalise(i)
-        assert isinstance(dist_2, self.cls)
-        assert dist_2.mean.shape == (d - p,)
-        assert dist_2.cov.shape == (d - p, d - p)
-        assert_allclose(dist_1.mean[j], dist_2.mean)
-        assert_allclose(dist_1.cov[j][:, j], dist_2.cov)
-
-        v = np.random.randn(p)
-        dist_3 = dist_1.condition(i, v)
-        assert isinstance(dist_3, self.cls)
-        assert dist_3.mean.shape == (d - p,)
-        assert dist_3.cov.shape == (d - p, d - p)
-
-    @pytest.mark.parametrize("q", [1, 2, 5, 10])
-    def test_predict(self, q, d):
-        dist = self.random(d)
-        A = np.random.randn(q, d)
-        y = dist.predict(A)
-        assert isinstance(y, self.cls)
-        assert y.mean.shape == (q,)
-        assert y.cov.shape == (q, q)
-
-        b = np.random.randn(q)
-        y = dist.predict(A, b)
-        assert isinstance(y, self.cls)
-        assert y.mean.shape == (q,)
-        assert y.cov.shape == (q, q)
-
-
-@pytest.mark.parametrize("d", [1, 2, 5, 10])
-@pytest.mark.parametrize("k", [1, 2, 5, 10])
-class TestMultiMultivariateNormal(object):
-    cls = multimultivariate_normal
-
-    def random(self, k, d):
-        means = np.random.randn(k, d)
-        covs = invwishart(scale=np.eye(d), df=d * 10).rvs(k)
-        if k == 1:
-            covs = np.array([covs])
-        return self.cls(means, covs)
-
-    def test_rvs(self, k, d):
-        dist = self.random(k, d)
-        mvns = [
-            scipy.stats.multivariate_normal(dist.means[i], dist.covs[i])
-            for i in range(k)
-        ]
-
-        samples_1, logpdfs_1 = [], []
-        for _ in range(N):
-            xs = [mvn.rvs() for mvn in mvns]
-            samples_1.append(xs)
-            logpdf = [mvn.logpdf(x) for x, mvn in zip(xs, mvns)]
-            assert_allclose(logpdf, dist.logpdf(xs))
-            logpdfs_1.append(logpdf)
-        samples_1, logpdfs_1 = np.array(samples_1), np.array(logpdfs_1)
-
-        samples_2 = dist.rvs(N)
-        if d == 1:
-            samples_2 = samples_2[..., None]
-        logpdfs_2 = dist.logpdf(samples_2)
-
-        for j in range(k):
-            for i in range(d):
-                if k == 1 and d == 1:
-                    p = kstest(samples_1[:, i], samples_2[:, i]).pvalue
-                elif k == 1:
-                    p = kstest(samples_1[:, j, i], samples_2[:, i]).pvalue
-                elif d == 1:
-                    p = kstest(samples_1[:, j], samples_2[:, j, i]).pvalue
-                else:
-                    p = kstest(samples_1[:, j, i], samples_2[:, j, i]).pvalue
-                assert p > 1e-5
-
-            if k == 1:
-                p = kstest(logpdfs_1[j], logpdfs_2).pvalue
-            else:
-                p = kstest(logpdfs_1[j], logpdfs_2[j]).pvalue
-            assert p > 1e-5
-
-        for shape in [(k, d), (3, k, d), (3, 4, k, d)]:
-            xs = np.random.rand(*shape)
-            logpdfs_1 = [mvn.logpdf(xs[..., i, :]) for i, mvn in enumerate(mvns)]
-            logpdfs_2 = dist.logpdf(xs)
-            if k == 1:
-                logpdfs_2 = np.array(logpdfs_2)[..., None]
-            for j in range(k):
-                assert np.shape(logpdfs_1[j]) == logpdfs_2[..., j].shape
-
-    def test_bijector(self, k, d):
-        dist = self.random(k, d)
-
-        # Test inversion
-        xs = np.random.rand(N, k, d)
-        theta = dist.bijector(xs)
-        assert_allclose(dist.bijector(theta, inverse=True), xs, atol=1e-6)
-
-        # Test sampling
-        samples = dist.rvs(N)
-        if d == 1:
-            samples = samples[..., None]
-        logpdf_1 = dist.logpdf(samples)
-        logpdf_2 = dist.logpdf(theta)
-        for j in range(k):
-            for i in range(d):
-                if k == 1:
-                    p = kstest(theta[:, j, i], samples[:, i]).pvalue
-                else:
-                    p = kstest(theta[:, j, i], samples[:, j, i]).pvalue
-                assert p > 1e-5
-            if k == 1:
-                p = kstest(logpdf_1, logpdf_2).pvalue
-            else:
-                p = kstest(logpdf_1[j], logpdf_2[j]).pvalue
-            assert p > 1e-5
-
-        # Test shapes
-        xs = np.random.rand(k, d)
-        theta = dist.bijector(xs)
-        assert theta.shape == xs.shape
-        assert dist.bijector(theta, inverse=True).shape == xs.shape
-
-        xs = np.random.rand(3, 4, k, d)
-        theta = dist.bijector(xs)
-        assert theta.shape == xs.shape
-        assert dist.bijector(theta, inverse=True).shape == xs.shape
-
-    @pytest.mark.parametrize("p", np.arange(1, 5))
-    def test_marginalise_condition(self, k, d, p):
-        if d <= p:
-            pytest.skip("d <= p")
-        i = np.random.choice(d, p, replace=False)
-        j = np.array([x for x in range(d) if x not in i])
-        dist = self.random(k, d)
-        mixture_2 = dist.marginalise(i)
-        assert isinstance(mixture_2, self.cls)
-        assert mixture_2.means.shape == (k, d - p)
-        assert mixture_2.covs.shape == (k, d - p, d - p)
-        assert_allclose(dist.means[:, j], mixture_2.means)
-        assert_allclose(dist.covs[:, j][:, :, j], mixture_2.covs)
-
-        v = np.random.randn(k, p)
-        mixture_3 = dist.condition(i, v)
-        assert isinstance(mixture_3, self.cls)
-        assert mixture_3.means.shape == (k, d - p)
-        assert mixture_3.covs.shape == (k, d - p, d - p)
-
-    @pytest.mark.parametrize("q", [1, 2, 5, 10])
-    def test_predict(self, q, k, d):
-        dist = self.random(k, d)
-        A = np.random.randn(k, q, d)
-        y = dist.predict(A)
-        assert isinstance(y, self.cls)
-        assert y.means.shape == (k, q)
-        assert y.covs.shape == (k, q, q)
-
-        b = np.random.randn(q)
-        y = dist.predict(A, b)
-        assert isinstance(y, self.cls)
-        assert y.means.shape == (
-            k,
-            q,
-        )
-        assert y.covs.shape == (k, q, q)
+# import numpy as np
+# import pytest
+# import scipy.special
+# from numpy.testing import assert_allclose
+# from scipy.stats import invwishart, kstest
+#
+# from lsbi.stats import (
+#    mixture_multivariate_normal,
+#    multimultivariate_normal,
+#    multivariate_normal,
+# )
+#
+# N = 1000
+#
+#
+# @pytest.mark.parametrize("d", [1, 2, 5, 10])
+# @pytest.mark.parametrize("k", [1, 2, 5, 10])
+# class TestMixtureMultivariateNormal(object):
+#    cls = mixture_multivariate_normal
+#
+#    def random(self, k, d):
+#        means = np.random.randn(k, d)
+#        covs = invwishart(scale=np.eye(d), df=d * 10).rvs(k)
+#        if k == 1:
+#            covs = np.array([covs])
+#        logA = np.log(scipy.stats.dirichlet(np.ones(k)).rvs())[0] + 10
+#        return self.cls(means, covs, logA)
+#
+#    def test_rvs(self, k, d):
+#        dist = self.random(k, d)
+#        logA = dist.logA
+#        logA -= scipy.special.logsumexp(logA)
+#        mvns = [
+#            scipy.stats.multivariate_normal(dist.means[i], dist.covs[i])
+#            for i in range(k)
+#        ]
+#
+#        samples_1, logpdfs_1 = [], []
+#        for _ in range(N):
+#            i = np.random.choice(k, p=np.exp(logA))
+#            x = mvns[i].rvs()
+#            samples_1.append(x)
+#            logpdf = scipy.special.logsumexp(
+#                [mvns[j].logpdf(x) + logA[j] for j in range(k)]
+#            )
+#            assert_allclose(logpdf, dist.logpdf(x))
+#            logpdfs_1.append(logpdf)
+#        samples_1, logpdfs_1 = np.array(samples_1), np.array(logpdfs_1)
+#
+#        samples_2 = dist.rvs(N)
+#        logpdfs_2 = dist.logpdf(samples_2)
+#
+#        for i in range(d):
+#            if d == 1:
+#                p = kstest(samples_1, samples_2).pvalue
+#            else:
+#                p = kstest(samples_1[:, i], samples_2[:, i]).pvalue
+#            assert p > 1e-5
+#
+#        p = kstest(logpdfs_1, logpdfs_2).pvalue
+#        assert p > 1e-5
+#
+#        for shape in [(d,), (3, d), (3, 4, d)]:
+#            x = np.random.rand(*shape)
+#            assert mvns[0].logpdf(x).shape == dist.logpdf(x).shape
+#
+#    def test_bijector(self, k, d):
+#        dist = self.random(k, d)
+#
+#        # Test inversion
+#        x = np.random.rand(N, d)
+#        theta = dist.bijector(x)
+#        assert_allclose(dist.bijector(theta, inverse=True), x, atol=1e-6)
+#
+#        # Test sampling
+#        samples = dist.rvs(N)
+#        for i in range(d):
+#            if d == 1:
+#                p = kstest(np.squeeze(theta), samples).pvalue
+#            else:
+#                p = kstest(theta[:, i], samples[:, i]).pvalue
+#            assert p > 1e-5
+#
+#        p = kstest(dist.logpdf(samples), dist.logpdf(theta)).pvalue
+#        assert p > 1e-5
+#
+#        # Test shapes
+#        x = np.random.rand(d)
+#        theta = dist.bijector(x)
+#        assert theta.shape == x.shape
+#        assert dist.bijector(theta, inverse=True).shape == x.shape
+#
+#        x = np.random.rand(3, 4, d)
+#        theta = dist.bijector(x)
+#        assert theta.shape == x.shape
+#        assert dist.bijector(theta, inverse=True).shape == x.shape
+#
+#    @pytest.mark.parametrize("p", np.arange(1, 5))
+#    def test_marginalise_condition(self, k, d, p):
+#        if d <= p:
+#            pytest.skip("d <= p")
+#        i = np.random.choice(d, p, replace=False)
+#        j = np.array([x for x in range(d) if x not in i])
+#        dist = self.random(k, d)
+#        mixture_2 = dist.marginalise(i)
+#        assert isinstance(mixture_2, self.cls)
+#        assert mixture_2.means.shape == (k, d - p)
+#        assert mixture_2.covs.shape == (k, d - p, d - p)
+#        assert_allclose(dist.means[:, j], mixture_2.means)
+#        assert_allclose(dist.covs[:, j][:, :, j], mixture_2.covs)
+#
+#        v = np.random.randn(k, p)
+#        mixture_3 = dist.condition(i, v)
+#        assert isinstance(mixture_3, self.cls)
+#        assert mixture_3.means.shape == (k, d - p)
+#        assert mixture_3.covs.shape == (k, d - p, d - p)
+#
+#        v = np.random.randn(p)
+#        mixture_3 = dist.condition(i, v)
+#        assert mixture_3.means.shape == (k, d - p)
+#        assert mixture_3.covs.shape == (k, d - p, d - p)
+#
+#    @pytest.mark.parametrize("q", [1, 2, 5, 10])
+#    def test_predict(self, q, k, d):
+#        dist = self.random(k, d)
+#        A = np.random.randn(k, q, d)
+#        y = dist.predict(A)
+#        assert isinstance(y, self.cls)
+#        assert y.means.shape == (k, q)
+#        assert y.covs.shape == (k, q, q)
+#
+#        b = np.random.randn(q)
+#        y = dist.predict(A, b)
+#        assert isinstance(y, self.cls)
+#        assert y.means.shape == (
+#            k,
+#            q,
+#        )
+#        assert y.covs.shape == (k, q, q)
+#
+#
+# @pytest.mark.parametrize("d", [1, 2, 5, 10])
+# class TestMultivariateNormal(object):
+#    cls = multivariate_normal
+#
+#    def random(self, d):
+#        mean = np.random.randn(d)
+#        cov = invwishart(scale=np.eye(d), df=d * 10).rvs()
+#        return self.cls(mean, cov)
+#
+#    def test_rvs(self, d):
+#        dist = self.random(d)
+#        mvn = scipy.stats.multivariate_normal(dist.mean, dist.cov)
+#
+#        samples_1 = mvn.rvs(N)
+#        logpdfs_1 = mvn.logpdf(samples_1)
+#        assert_allclose(logpdfs_1, dist.logpdf(samples_1))
+#        samples_2 = dist.rvs(N)
+#        logpdfs_2 = dist.logpdf(samples_2)
+#
+#        for i in range(d):
+#            if d == 1:
+#                p = kstest(samples_1, samples_2).pvalue
+#            else:
+#                p = kstest(samples_1[:, i], samples_2[:, i]).pvalue
+#            assert p > 1e-5
+#
+#        p = kstest(logpdfs_1, logpdfs_2).pvalue
+#        assert p > 1e-5
+#
+#        for shape in [(), (d,), (3, d), (3, 4, d)]:
+#            x = np.random.rand(*shape)
+#            assert mvn.logpdf(x).shape == dist.logpdf(x).shape
+#
+#    def test_bijector(self, d):
+#        dist = self.random(d)
+#        # Test inversion
+#        x = np.random.rand(N, d)
+#        theta = dist.bijector(x)
+#        assert_allclose(dist.bijector(theta, inverse=True), x, atol=1e-6)
+#
+#        # Test sampling
+#        samples = dist.rvs(N)
+#        for i in range(d):
+#            if d == 1:
+#                p = kstest(np.squeeze(theta), samples).pvalue
+#            else:
+#                p = kstest(theta[:, i], samples[:, i]).pvalue
+#            assert p > 1e-5
+#
+#        p = kstest(dist.logpdf(samples), dist.logpdf(theta)).pvalue
+#        assert p > 1e-5
+#
+#        # Test shapes
+#        x = np.random.rand(d)
+#        theta = dist.bijector(x)
+#        assert theta.shape == x.shape
+#        assert dist.bijector(theta, inverse=True).shape == x.shape
+#
+#        x = np.random.rand(3, 4, d)
+#        theta = dist.bijector(x)
+#        assert theta.shape == x.shape
+#        assert dist.bijector(theta, inverse=True).shape == x.shape
+#
+#    @pytest.mark.parametrize("p", np.arange(1, 5))
+#    def test_marginalise_condition_multivariate_normal(self, d, p):
+#        if d <= p:
+#            pytest.skip("d <= p")
+#        i = np.random.choice(d, p, replace=False)
+#        j = np.array([x for x in range(d) if x not in i])
+#        dist_1 = self.random(d)
+#        dist_2 = dist_1.marginalise(i)
+#        assert isinstance(dist_2, self.cls)
+#        assert dist_2.mean.shape == (d - p,)
+#        assert dist_2.cov.shape == (d - p, d - p)
+#        assert_allclose(dist_1.mean[j], dist_2.mean)
+#        assert_allclose(dist_1.cov[j][:, j], dist_2.cov)
+#
+#        v = np.random.randn(p)
+#        dist_3 = dist_1.condition(i, v)
+#        assert isinstance(dist_3, self.cls)
+#        assert dist_3.mean.shape == (d - p,)
+#        assert dist_3.cov.shape == (d - p, d - p)
+#
+#    @pytest.mark.parametrize("q", [1, 2, 5, 10])
+#    def test_predict(self, q, d):
+#        dist = self.random(d)
+#        A = np.random.randn(q, d)
+#        y = dist.predict(A)
+#        assert isinstance(y, self.cls)
+#        assert y.mean.shape == (q,)
+#        assert y.cov.shape == (q, q)
+#
+#        b = np.random.randn(q)
+#        y = dist.predict(A, b)
+#        assert isinstance(y, self.cls)
+#        assert y.mean.shape == (q,)
+#        assert y.cov.shape == (q, q)
+#
+#
+# @pytest.mark.parametrize("d", [1, 2, 5, 10])
+# @pytest.mark.parametrize("k", [1, 2, 5, 10])
+# class TestMultiMultivariateNormal(object):
+#    cls = multimultivariate_normal
+#
+#    def random(self, k, d):
+#        means = np.random.randn(k, d)
+#        covs = invwishart(scale=np.eye(d), df=d * 10).rvs(k)
+#        if k == 1:
+#            covs = np.array([covs])
+#        return self.cls(means, covs)
+#
+#    def test_rvs(self, k, d):
+#        dist = self.random(k, d)
+#        mvns = [
+#            scipy.stats.multivariate_normal(dist.means[i], dist.covs[i])
+#            for i in range(k)
+#        ]
+#
+#        samples_1, logpdfs_1 = [], []
+#        for _ in range(N):
+#            xs = [mvn.rvs() for mvn in mvns]
+#            samples_1.append(xs)
+#            logpdf = [mvn.logpdf(x) for x, mvn in zip(xs, mvns)]
+#            assert_allclose(logpdf, dist.logpdf(xs))
+#            logpdfs_1.append(logpdf)
+#        samples_1, logpdfs_1 = np.array(samples_1), np.array(logpdfs_1)
+#
+#        samples_2 = dist.rvs(N)
+#        if d == 1:
+#            samples_2 = samples_2[..., None]
+#        logpdfs_2 = dist.logpdf(samples_2)
+#
+#        for j in range(k):
+#            for i in range(d):
+#                if k == 1 and d == 1:
+#                    p = kstest(samples_1[:, i], samples_2[:, i]).pvalue
+#                elif k == 1:
+#                    p = kstest(samples_1[:, j, i], samples_2[:, i]).pvalue
+#                elif d == 1:
+#                    p = kstest(samples_1[:, j], samples_2[:, j, i]).pvalue
+#                else:
+#                    p = kstest(samples_1[:, j, i], samples_2[:, j, i]).pvalue
+#                assert p > 1e-5
+#
+#            if k == 1:
+#                p = kstest(logpdfs_1[j], logpdfs_2).pvalue
+#            else:
+#                p = kstest(logpdfs_1[j], logpdfs_2[j]).pvalue
+#            assert p > 1e-5
+#
+#        for shape in [(k, d), (3, k, d), (3, 4, k, d)]:
+#            xs = np.random.rand(*shape)
+#            logpdfs_1 = [mvn.logpdf(xs[..., i, :]) for i, mvn in enumerate(mvns)]
+#            logpdfs_2 = dist.logpdf(xs)
+#            if k == 1:
+#                logpdfs_2 = np.array(logpdfs_2)[..., None]
+#            for j in range(k):
+#                assert np.shape(logpdfs_1[j]) == logpdfs_2[..., j].shape
+#
+#    def test_bijector(self, k, d):
+#        dist = self.random(k, d)
+#
+#        # Test inversion
+#        xs = np.random.rand(N, k, d)
+#        theta = dist.bijector(xs)
+#        assert_allclose(dist.bijector(theta, inverse=True), xs, atol=1e-6)
+#
+#        # Test sampling
+#        samples = dist.rvs(N)
+#        if d == 1:
+#            samples = samples[..., None]
+#        logpdf_1 = dist.logpdf(samples)
+#        logpdf_2 = dist.logpdf(theta)
+#        for j in range(k):
+#            for i in range(d):
+#                if k == 1:
+#                    p = kstest(theta[:, j, i], samples[:, i]).pvalue
+#                else:
+#                    p = kstest(theta[:, j, i], samples[:, j, i]).pvalue
+#                assert p > 1e-5
+#            if k == 1:
+#                p = kstest(logpdf_1, logpdf_2).pvalue
+#            else:
+#                p = kstest(logpdf_1[j], logpdf_2[j]).pvalue
+#            assert p > 1e-5
+#
+#        # Test shapes
+#        xs = np.random.rand(k, d)
+#        theta = dist.bijector(xs)
+#        assert theta.shape == xs.shape
+#        assert dist.bijector(theta, inverse=True).shape == xs.shape
+#
+#        xs = np.random.rand(3, 4, k, d)
+#        theta = dist.bijector(xs)
+#        assert theta.shape == xs.shape
+#        assert dist.bijector(theta, inverse=True).shape == xs.shape
+#
+#    @pytest.mark.parametrize("p", np.arange(1, 5))
+#    def test_marginalise_condition(self, k, d, p):
+#        if d <= p:
+#            pytest.skip("d <= p")
+#        i = np.random.choice(d, p, replace=False)
+#        j = np.array([x for x in range(d) if x not in i])
+#        dist = self.random(k, d)
+#        mixture_2 = dist.marginalise(i)
+#        assert isinstance(mixture_2, self.cls)
+#        assert mixture_2.means.shape == (k, d - p)
+#        assert mixture_2.covs.shape == (k, d - p, d - p)
+#        assert_allclose(dist.means[:, j], mixture_2.means)
+#        assert_allclose(dist.covs[:, j][:, :, j], mixture_2.covs)
+#
+#        v = np.random.randn(k, p)
+#        mixture_3 = dist.condition(i, v)
+#        assert isinstance(mixture_3, self.cls)
+#        assert mixture_3.means.shape == (k, d - p)
+#        assert mixture_3.covs.shape == (k, d - p, d - p)
+#
+#    @pytest.mark.parametrize("q", [1, 2, 5, 10])
+#    def test_predict(self, q, k, d):
+#        dist = self.random(k, d)
+#        A = np.random.randn(k, q, d)
+#        y = dist.predict(A)
+#        assert isinstance(y, self.cls)
+#        assert y.means.shape == (k, q)
+#        assert y.covs.shape == (k, q, q)
+#
+#        b = np.random.randn(q)
+#        y = dist.predict(A, b)
+#        assert isinstance(y, self.cls)
+#        assert y.means.shape == (
+#            k,
+#            q,
+#        )
+#        assert y.covs.shape == (k, q, q)

From 55e9bd099c91d48dafb99dc56f6980ff51b4d990 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Tue, 6 Feb 2024 17:48:21 +0000
Subject: [PATCH 050/117] Executed renaming

---
 lsbi/model.py         |  905 ++++++++--------------
 lsbi/model_1.py       |  544 -------------
 lsbi/stats.py         |  622 ++++++++-------
 lsbi/stats_1.py       |  524 -------------
 tests/test_model.py   | 1696 +++++++++++++++++------------------------
 tests/test_model_1.py |  792 -------------------
 tests/test_stats.py   |  874 ++++++++++++---------
 tests/test_stats_1.py |  499 ------------
 8 files changed, 1826 insertions(+), 4630 deletions(-)
 delete mode 100644 lsbi/model_1.py
 delete mode 100644 lsbi/stats_1.py
 delete mode 100644 tests/test_model_1.py
 delete mode 100644 tests/test_stats_1.py

diff --git a/lsbi/model.py b/lsbi/model.py
index ce85db5..c39b624 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -2,235 +2,394 @@
 import numpy as np
 from numpy.linalg import inv, solve
 
-from lsbi.stats import (
-    mixture_multivariate_normal,
-    multimultivariate_normal,
-    multivariate_normal,
-)
-from lsbi.utils import logdet
+from lsbi.stats_1 import mixture_normal, multivariate_normal
+from lsbi.utils import logdet, matrix
+
+
+def _de_diagonalise(x, diagonal, *args):
+    if diagonal:
+        return np.atleast_1d(x)[..., None, :] * np.eye(*args)
+    else:
+        return x
 
 
 class LinearModel(object):
-    """A linear model.
+    """A multilinear model.
 
     D|theta ~ N( m + M theta, C )
     theta   ~ N( mu, Sigma )
 
     Defined by:
-        Parameters:       theta (n,)
-        Data:             D     (d,)
-        Prior mean:       mu    (n,)
-        Prior covariance: Sigma (n, n)
-        Data mean:        m     (d,)
-        Data covariance:  C     (d, d)
+        Parameters:       theta (..., n,)
+        Data:             D     (..., d,)
+        Model:            M     (..., d, n)
+        Prior mean:       mu    (..., n,)
+        Prior covariance: Sigma (..., n, n)
+        Data mean:        m     (..., d,)
+        Data covariance:  C     (..., d, d)
+
+    where the ellipses indicate arbitrary (broadcastable) additional copies.
 
     Parameters
     ----------
     M : array_like, optional
-        if matrix: model matrix
-        if vector: diagonal matrix with vector on diagonal
-        if scalar: scalar * rectangular identity matrix
+        if ndim>=2: model matrices
+        if ndim==1: model matrix with vector diagonal for all components
+        if ndim==0: scalar * rectangular identity matrix for all components
         Defaults to rectangular identity matrix
     m : array_like, optional
-        if vector: data mean
-        if scalar: scalar * unit vector
-        Defaults to zero vector
+        if ndim>=1: data means
+        if ndim==0: scalar * unit vector for all components
+        Defaults to 0 for all components
     C : array_like, optional
-        if matrix: data covariance
-        if vector: diagonal matrix with vector on diagonal
-        if scalar: scalar * identity matrix
-        Defaults to identity matrix
+        if ndim>=2: data covariances
+        if ndim==1: data covariance with vector diagonal for all components
+        if ndim==0: scalar * identity matrix for all components
+        Defaults to rectangular identity matrix
     mu : array_like, optional
-        if vector: prior mean
-        if scalar: scalar * unit vector
-        Defaults to zero vector
+        if ndim>=1: prior means
+        if ndim==0: scalar * unit vector for all components
+        Defaults to 0 for all components
+        Prior mean, defaults to zero vector
     Sigma : array_like, optional
-        if matrix: prior covariance
-        if vector: diagonal matrix with vector on diagonal
-        if scalar: scalar * identity matrix
-        Defaults to identity matrix
+        if ndim>=2: prior covariances
+        if ndim==1: prior covariance with vector diagonal for all components
+        if ndim==0: scalar * identity matrix for all components
+        Defaults to k copies of identity matrices
     n : int, optional
-        Number of parameters
-        Defaults to automatically inferred value
+        Number of parameters, defaults to automatically inferred value
     d : int, optional
-        Number of data dimensions
-        Defaults to automatically inferred value
+        Number of data dimensions, defaults to automatically inferred value
+    shape : (), optional
+        Number of mixture components, defaults to automatically inferred value
     """
 
-    def __init__(self, *args, **kwargs):
-        # Rationalise input arguments
-        M = self._atleast_2d(kwargs.pop("M", None))
-        m = self._atleast_1d(kwargs.pop("m", None))
-        C = self._atleast_2d(kwargs.pop("C", None))
-        mu = self._atleast_1d(kwargs.pop("mu", None))
-        Sigma = self._atleast_2d(kwargs.pop("Sigma", None))
-        n = kwargs.pop("n", 0)
-        d = kwargs.pop("d", 0)
-
-        # Determine dimensions
-        n = max([n, M.shape[1], mu.shape[0], Sigma.shape[0], Sigma.shape[1]])
-        d = max([d, M.shape[0], m.shape[0], C.shape[0], C.shape[1]])
-        if not n:
-            raise ValueError("Unable to determine number of parameters n")
-        if not d:
-            raise ValueError("Unable to determine data dimensions d")
-
-        # Set defaults if no argument was passed
-        M = M if M.size else np.eye(d, n)
-        m = m if m.size else np.zeros(d)
-        C = C if C.size else np.eye(d)
-        mu = mu if mu.size else np.zeros(n)
-        Sigma = Sigma if Sigma.size else np.eye(n)
-
-        # Broadcast to correct shape
-        self.M = self._broadcast_to(M, (d, n))
-        self.m = np.broadcast_to(m, (d,))
-        self.C = self._broadcast_to(C, (d, d))
-        self.mu = np.broadcast_to(mu, (n,))
-        self.Sigma = self._broadcast_to(Sigma, (n, n))
-
-    @classmethod
-    def from_joint(cls, mean, cov, n):
-        """Construct model from joint distribution."""
-        mean = np.atleast_1d(mean)
-        cov = np.atleast_2d(cov)
-        mu = mean[-n:]
-        Sigma = cov[-n:, -n:]
-        M = solve(Sigma, cov[-n:, :-n]).T
-        m = mean[:-n] - M @ mu
-        C = cov[:-n, :-n] - M @ Sigma @ M.T
-
-        return cls(M=M, m=m, C=C, mu=mu, Sigma=Sigma)
+    def __init__(
+        self,
+        M=1,
+        m=0,
+        C=1,
+        mu=0,
+        Sigma=1,
+        shape=(),
+        n=1,
+        d=1,
+        diagonal_M=False,
+        diagonal_C=False,
+        diagonal_Sigma=False,
+    ):
+        self.M = M
+        self.diagonal_M = diagonal_M
+        if len(np.shape(self.M)) < 2:
+            self.diagonal_M = True
+        self.m = m
+        self.C = C
+        self.diagonal_C = diagonal_C
+        if len(np.shape(self.C)) < 2:
+            self.diagonal_C = True
+        self.mu = mu
+        self.Sigma = Sigma
+        self.diagonal_Sigma = diagonal_Sigma
+        if len(np.shape(self.Sigma)) < 2:
+            self.diagonal_Sigma = True
+        self._shape = shape
+        self._n = n
+        self._d = d
+
+    @property
+    def shape(self):
+        """Shape of the distribution."""
+        return np.broadcast_shapes(
+            np.shape(self.M)[: -2 + self.diagonal_M],
+            np.shape(self.m)[:-1],
+            np.shape(self.C)[: -2 + self.diagonal_C],
+            np.shape(self.mu)[:-1],
+            np.shape(self.Sigma)[: -2 + self.diagonal_Sigma],
+            self._shape,
+        )
 
     @property
     def n(self):
-        """Dimensionality of parameter space len(theta)."""
-        return self.M.shape[1]
+        """Dimension of the distribution."""
+        return np.max(
+            [
+                *np.shape(self.M)[len(np.shape(self.M)) - 1 + self.diagonal_M :],
+                *np.shape(self.Sigma)[-2 + self.diagonal_Sigma :],
+                *np.shape(self.mu)[-1:],
+                self._n,
+            ]
+        )
 
     @property
     def d(self):
         """Dimensionality of data space len(D)."""
-        return self.M.shape[0]
+        return np.max(
+            [
+                *np.shape(self.M)[-2 + self.diagonal_M : -1],
+                *np.shape(self.C)[-2 + self.diagonal_C :],
+                *np.shape(self.m)[-1:],
+                self._d,
+            ]
+        )
 
     def likelihood(self, theta):
         """P(D|theta) as a scipy distribution object.
 
-        D ~ N( m + M theta, C )
+        D|theta ~ N( m + M theta, C )
+        theta   ~ N( mu, Sigma )
 
         Parameters
         ----------
-        theta : array_like, shape (n,)
+        theta : array_like, shape (k, n)
         """
-        theta = np.atleast_1d(theta)
-        return multivariate_normal(self.m + self.M @ theta, self.C)
+        mu = self.m + np.einsum("...ja,...a->...j", self._M, theta)
+        return multivariate_normal(mu, self.C, self.shape, self.d, self.diagonal_C)
 
     def prior(self):
         """P(theta) as a scipy distribution object.
 
         theta ~ N( mu, Sigma )
         """
-        return multivariate_normal(self.mu, self.Sigma)
+        return multivariate_normal(
+            self.mu, self.Sigma, self.shape, self.n, self.diagonal_Sigma
+        )
 
     def posterior(self, D):
         """P(theta|D) as a scipy distribution object.
 
-        theta ~ N( mu + Sigma M'C^{-1}(D-m), Sigma - Sigma M' C^{-1} M Sigma )
+        theta|D ~ N( mu + S M'C^{-1}(D - m - M mu), S )
+        S = (Sigma^{-1} + M'C^{-1}M)^{-1}
 
         Parameters
         ----------
         D : array_like, shape (d,)
         """
-        D = np.atleast_1d(D)
-        Sigma = inv(inv(self.Sigma) + self.M.T @ inv(self.C) @ self.M)
-        D0 = self.m + self.M @ self.mu
-        mu = self.mu + Sigma @ self.M.T @ inv(self.C) @ (D - D0)
-        return multivariate_normal(mu, Sigma)
+        values = (
+            D
+            - self.m
+            - np.einsum("...ja,...a->...j", self._M, self.mu * np.ones(self.n))
+        )
+
+        diagonal_Sigma = self.diagonal_C and self.diagonal_Sigma and self.diagonal_M
+
+        if diagonal_Sigma:
+            dim = min(self.n, self.d)
+            shape = np.broadcast_shapes(self.shape, values.shape[:-1])
+            C = np.atleast_1d(self.C)[..., :dim]
+            M = np.atleast_1d(self.M)[..., :dim]
+            Sigma = np.broadcast_to(self.Sigma, shape + (self.n,)).copy()
+            Sigma[..., :dim] = 1 / (1 / Sigma[..., :dim] + M**2 / C)
+
+            mu = np.broadcast_to(self.mu, shape + (self.n,)).copy()
+            mu[..., :dim] = mu[..., :dim] + Sigma[..., :dim] * M / C * values[..., :dim]
+        else:
+            if self.diagonal_C:
+                invC = np.eye(self.d) / np.atleast_1d(self.C)[..., None, :]
+            else:
+                invC = inv(self.C)
+
+            if self.diagonal_Sigma:
+                invSigma = np.eye(self.n) / np.atleast_1d(self.Sigma)[..., None, :]
+            else:
+                invSigma = inv(self.Sigma)
+
+            Sigma = inv(
+                invSigma + np.einsum("...aj,...ab,...bk->...jk", self._M, invC, self._M)
+            )
+            mu = self.mu + np.einsum(
+                "...ja,...ba,...bc,...c->...j", Sigma, self._M, invC, values
+            )
+
+        return multivariate_normal(mu, Sigma, self.shape, self.n, diagonal_Sigma)
 
     def evidence(self):
         """P(D) as a scipy distribution object.
 
         D ~ N( m + M mu, C + M Sigma M' )
         """
-        return multivariate_normal(
-            self.m + self.M @ self.mu, self.C + self.M @ self.Sigma @ self.M.T
-        )
+        mu = self.m + np.einsum("...ja,...a->...j", self._M, self.mu * np.ones(self.n))
+        diagonal_Sigma = self.diagonal_C and self.diagonal_Sigma and self.diagonal_M
+
+        if diagonal_Sigma:
+            dim = min(self.n, self.d)
+            M = np.atleast_1d(self.M)[..., :dim]
+            S = np.atleast_1d(self.Sigma)[..., :dim]
+            Sigma = np.broadcast_to(self.C, self.shape + (self.d,)).copy()
+            Sigma[..., :dim] += S * M**2
+        else:
+            Sigma = self._C + np.einsum(
+                "...ja,...ab,...kb->...jk", self._M, self._Sigma, self._M
+            )
+
+        return multivariate_normal(mu, Sigma, self.shape, self.d, diagonal_Sigma)
 
     def joint(self):
         """P(D, theta) as a scipy distribution object.
 
-        [  D  ] ~ N( [m + M mu]   [C + M Sigma M'  M Sigma] )
-        [theta]    ( [   mu   ] , [   Sigma M'      Sigma ] )
+        [  D  ] | A ~ N( [m + M mu]   [C + M Sigma M'  M Sigma] )
+        [theta] |      ( [   mu   ] , [   Sigma M'      Sigma ] )
         """
         evidence = self.evidence()
         prior = self.prior()
-        mu = np.concatenate([evidence.mean, prior.mean])
-        Sigma = np.block(
-            [[evidence.cov, self.M @ self.Sigma], [self.Sigma @ self.M.T, prior.cov]]
-        )
-        return multivariate_normal(mu, Sigma)
+        a = np.broadcast_to(evidence.mean, self.shape + (self.d,))
+        b = np.broadcast_to(prior.mean, self.shape + (self.n,))
+        mu = np.block([a, b])
+        A = _de_diagonalise(evidence.cov, evidence.diagonal_cov, self.d)
+        A = np.broadcast_to(A, self.shape + (self.d, self.d))
+        D = _de_diagonalise(prior.cov, prior.diagonal_cov, self.n)
+        D = np.broadcast_to(D, self.shape + (self.n, self.n))
+        B = np.einsum("...ja,...al->...jl", self._M, self._Sigma)
+        B = np.broadcast_to(B, self.shape + (self.d, self.n))
+        C = np.moveaxis(B, -1, -2)
+        Sigma = np.block([[A, B], [C, D]])
+        return multivariate_normal(mu, Sigma, self.shape, self.n + self.d)
+
+    @property
+    def _M(self):
+        return _de_diagonalise(self.M, self.diagonal_M, self.d, self.n)
+
+    @property
+    def _C(self):
+        return _de_diagonalise(self.C, self.diagonal_C, self.d)
+
+    @property
+    def _Sigma(self):
+        return _de_diagonalise(self.Sigma, self.diagonal_Sigma, self.n)
+
+
+class MixtureModel(LinearModel):
+    """A linear mixture model.
+
+    D|theta, A ~ N( m + M theta, C )
+    theta|A    ~ N( mu, Sigma )
+    A          ~ categorical( exp(logA) )
+
+    Defined by:
+        Parameters:          theta (..., n,)
+        Data:                D     (..., d,)
+        Prior means:         mu    (..., k, n)
+        Prior covariances:   Sigma (..., k, n, n)
+        Data means:          m     (..., k, d)
+        Data covariances:    C     (..., k, d, d)
+        log mixture weights: logA  (..., k,)
+
+    Parameters
+    ----------
+    M : array_like, optional
+        if ndim>=2: model matrices
+        if ndim==1: model matrix with vector diagonal for all components
+        if scalar: scalar * rectangular identity matrix for all components
+        Defaults to k copies of rectangular identity matrices
+    m : array_like, optional
+        if ndim>=1: data means
+        if scalar: scalar * unit vector for all components
+        Defaults to 0 for all components
+    C : array_like, optional
+        if ndim>=2: data covariances
+        if ndim==1: data covariance with vector diagonal for all components
+        if scalar: scalar * identity matrix for all components
+        Defaults to k copies of identity matrices
+    mu : array_like, optional
+        if ndim>=1: prior means
+        if scalar: scalar * unit vector for all components
+        Defaults to 0 for all components
+        Prior mean, defaults to zero vector
+    Sigma : array_like, optional
+        if ndim>=2: prior covariances
+        if ndim==1: prior covariance with vector diagonal for all components
+        if scalar: scalar * identity matrix for all components
+        Defaults to k copies of identity matrices
+    logA : array_like, optional
+        if ndim>=1: log mixture weights
+        if scalar: scalar * unit vector
+        Defaults to uniform weights
+    n : int, optional
+        Number of parameters, defaults to automatically inferred value
+    d : int, optional
+        Number of data dimensions, defaults to automatically inferred value
+    """
+
+    def __init__(self, logA=1, *args):
+        super().__init__(*args)
+        self.logA = logA
+
+    @property
+    def shape(self):
+        """Shape of the distribution."""
+        return np.broadcast_shapes(np.shape(self.logA), super().shape)
 
-    def DKL(self, D):
-        """D_KL(P(theta|D)||P(theta)) the Kullback-Leibler divergence.
+    @property
+    def k(self):
+        """Number of mixture components of the distribution."""
+        return self.shape[-1]
+
+    def likelihood(self, theta):
+        """P(D|theta) as a scipy distribution object.
+
+        D|theta,A ~ N( m + M theta, C )
+        theta|A   ~ N( mu, Sigma )
+        A         ~ categorical(exp(logA))
 
         Parameters
         ----------
-        D : array_like, shape (d,)
+        theta : array_like, shape (n,)
         """
-        cov_p = self.posterior(D).cov
-        cov_q = self.prior().cov
-        mu_p = self.posterior(D).mean
-        mu_q = self.prior().mean
-        return (
-            -logdet(cov_p)
-            + logdet(cov_q)
-            + np.trace(inv(cov_q) @ cov_p - 1)
-            + (mu_q - mu_p) @ inv(cov_q) @ (mu_q - mu_p)
-        ) / 2
+        dist = super().likelihood(np.expand_dims(theta, -2))
+        dist.__class__ = mixture_normal
+        dist.logA = self.prior().logpdf(theta, broadcast=True, joint=True)
+        return dist
+
+    def prior(self):
+        """P(theta) as a scipy distribution object.
 
-    def reduce(self, D):
-        """Reduce the model to a Gaussian in the parameters.
+        theta|A ~ N( mu, Sigma )
+        A       ~ categorical(exp(logA))
+        """
+        dist = super().prior()
+        dist.__class__ = mixture_normal
+        dist.logA = self.logA
+        return dist
+
+    def posterior(self, D):
+        """P(theta|D) as a scipy distribution object.
+
+        theta|D, A ~ N( mu + S M'C^{-1}(D - m - M mu), S )
+        D|A        ~ N( m + M mu, C + M Sigma M' )
+        A          ~ categorical(exp(logA))
+        S = (Sigma^{-1} + M'C^{-1}M)^{-1}
 
         Parameters
         ----------
         D : array_like, shape (d,)
+        """
+        dist = super().posterior(np.expand_dims(D, -2))
+        dist.__class__ = mixture_normal
+        dist.logA = self.evidence().logpdf(D, broadcast=True, joint=True)
+        return dist
+
+    def evidence(self):
+        """P(D) as a scipy distribution object.
 
-        Returns
-        -------
-        ReducedLinearModel
+        D|A ~ N( m + M mu, C + M Sigma M' )
+        A   ~ categorical(exp(logA))
         """
-        Sigma_L = inv(self.M.T @ inv(self.C) @ self.M)
-        mu_L = Sigma_L @ self.M.T @ inv(self.C) @ (D - self.m)
-        logLmax = (
-            -logdet(2 * np.pi * self.C) / 2
-            - (D - self.m)
-            @ inv(self.C)
-            @ (self.C - self.M @ Sigma_L @ self.M.T)
-            @ inv(self.C)
-            @ (D - self.m)
-            / 2
-        )
-        return ReducedLinearModel(
-            mu_L=mu_L,
-            Sigma_L=Sigma_L,
-            logLmax=logLmax,
-            mu_pi=self.prior().mean,
-            Sigma_pi=self.prior().cov,
-        )
+        dist = super().evidence()
+        dist.__class__ = mixture_normal
+        dist.logA = self.logA
+        return dist
 
-    def _atleast_2d(self, x):
-        if x is None:
-            return np.zeros(shape=(0, 0))
-        return np.atleast_2d(x)
+    def joint(self):
+        """P(D, theta) as a scipy distribution object.
 
-    def _atleast_1d(self, x):
-        if x is None:
-            return np.zeros(shape=(0,))
-        return np.atleast_1d(x)
+        [  D  ] | A ~ N( [m + M mu]   [C + M Sigma M'  M Sigma] )
+        [theta] |      ( [   mu   ] , [   Sigma M'      Sigma ] )
 
-    def _broadcast_to(self, x, shape):
-        if x.shape == shape:
-            return x
-        return x * np.eye(*shape)
+        A           ~ categorical(exp(logA))
+        """
+        dist = super().joint()
+        dist.__class__ = mixture_normal
+        dist.logA = self.logA
+        return dist
 
 
 class ReducedLinearModel(object):
@@ -383,449 +542,3 @@ def logZ(self):
     def DKL(self):
         """D_KL(P(theta|D)||P(theta)) the Kullback-Leibler divergence."""
         return self.logV - logdet(2 * np.pi * np.e * self.Sigma_P) / 2
-
-
-class LinearMixtureModel(object):
-    """A linear mixture model.
-
-    D|theta, A ~ N( m + M theta, C )
-    theta|A    ~ N( mu, Sigma )
-    A          ~ categorical( exp(logA) )
-
-    Defined by:
-        Parameters:          theta (n,)
-        Data:                D     (d,)
-        Prior means:         mu    (k, n)
-        Prior covariances:   Sigma (k, n, n)
-        Data means:          m     (k, d)
-        Data covariances:    C     (k, d, d)
-        log mixture weights: logA  (k,)
-
-    Parameters
-    ----------
-    M : array_like, optional
-        if ndim==3: model matrices
-        if ndim==2: model matrix with same matrix for all components
-        if ndim==1: model matrix with vector diagonal for all components
-        if scalar: scalar * rectangular identity matrix for all components
-        Defaults to k copies of rectangular identity matrices
-    m : array_like, optional
-        if ndim==2: data means
-        if ndim==1: data mean with same vector for all components
-        if scalar: scalar * unit vector for all components
-        Defaults to 0 for all components
-    C : array_like, optional
-        if ndim==3: data covariances
-        if ndim==2: data covariance with same matrix for all components
-        if ndim==1: data covariance with vector diagonal for all components
-        if scalar: scalar * identity matrix for all components
-        Defaults to k copies of identity matrices
-    mu : array_like, optional
-        if ndim==2: prior means
-        if ndim==1: prior mean with same vector for all components
-        if scalar: scalar * unit vector for all components
-        Defaults to 0 for all components
-        Prior mean, defaults to zero vector
-    Sigma : array_like, optional
-        if ndim==3: prior covariances
-        if ndim==2: prior covariance with same matrix for all components
-        if ndim==1: prior covariance with vector diagonal for all components
-        if scalar: scalar * identity matrix for all components
-        Defaults to k copies of identity matrices
-    logA : array_like, optional
-        if ndim==1: log mixture weights
-        if scalar: scalar * unit vector
-        Defaults to uniform weights
-    n : int, optional
-        Number of parameters, defaults to automatically inferred value
-    d : int, optional
-        Number of data dimensions, defaults to automatically inferred value
-    k : int, optional
-        Number of mixture components, defaults to automatically inferred value
-    """
-
-    def __init__(self, *args, **kwargs):
-        # Rationalise input arguments
-        M = self._atleast_3d(kwargs.pop("M", None))
-        m = self._atleast_2d(kwargs.pop("m", None))
-        C = self._atleast_3d(kwargs.pop("C", None))
-        mu = self._atleast_2d(kwargs.pop("mu", None))
-        Sigma = self._atleast_3d(kwargs.pop("Sigma", None))
-        logA = self._atleast_1d(kwargs.pop("logA", None))
-        n = kwargs.pop("n", 0)
-        d = kwargs.pop("d", 0)
-        k = kwargs.pop("k", 0)
-
-        # Determine dimensions
-        n = max([n, M.shape[2], mu.shape[1], Sigma.shape[1], Sigma.shape[2]])
-        d = max([d, M.shape[1], m.shape[1], C.shape[1], C.shape[2]])
-        k = max(
-            [
-                k,
-                M.shape[0],
-                m.shape[0],
-                C.shape[0],
-                mu.shape[0],
-                Sigma.shape[0],
-                logA.shape[0],
-            ]
-        )
-        if not n:
-            raise ValueError("Unable to determine number of parameters n")
-        if not d:
-            raise ValueError("Unable to determine data dimensions d")
-
-        # Set defaults if no argument was passed
-        M = M if M.size else np.eye(d, n)
-        m = m if m.size else np.zeros(d)
-        C = C if C.size else np.eye(d)
-        mu = mu if mu.size else np.zeros(n)
-        Sigma = Sigma if Sigma.size else np.eye(n)
-        logA = logA if logA.size else -np.log(k)
-
-        # Broadcast to correct shape
-        self.M = self._broadcast_to(M, (k, d, n))
-        self.m = np.broadcast_to(m, (k, d))
-        self.C = self._broadcast_to(C, (k, d, d))
-        self.mu = np.broadcast_to(mu, (k, n))
-        self.Sigma = self._broadcast_to(Sigma, (k, n, n))
-        self.logA = np.broadcast_to(logA, (k,))
-
-    @classmethod
-    def from_joint(cls, means, covs, logA, n):
-        """Construct model from joint distribution."""
-        mu = means[:, -n:]
-        Sigma = covs[:, -n:, -n:]
-        M = solve(Sigma, covs[:, -n:, :-n]).transpose(0, 2, 1)
-        m = means[:, :-n] - np.einsum("ija,ia->ij", M, mu)
-        C = covs[:, :-n, :-n] - np.einsum("ija,iab,ikb->ijk", M, Sigma, M)
-        return cls(M=M, m=m, C=C, mu=mu, Sigma=Sigma, logA=logA)
-
-    @property
-    def n(self):
-        """Dimensionality of parameter space len(theta)."""
-        return self.M.shape[2]
-
-    @property
-    def d(self):
-        """Dimensionality of data space len(D)."""
-        return self.M.shape[1]
-
-    @property
-    def k(self):
-        """Number of mixture components len(logA)."""
-        return self.M.shape[0]
-
-    def likelihood(self, theta):
-        """P(D|theta) as a scipy distribution object.
-
-        D|theta,A ~ N( m + M theta, C )
-        theta|A   ~ N( mu, Sigma )
-        A         ~ categorical(exp(logA))
-
-        Parameters
-        ----------
-        theta : array_like, shape (n,)
-        """
-        theta = np.atleast_1d(theta)
-        mu = self.m + np.einsum("ija,a->ij", self.M, theta)
-        prior = self.prior()
-        logA = prior.logpdf(theta, reduce=False) + self.logA - prior.logpdf(theta)
-        return mixture_multivariate_normal(mu, self.C, logA)
-
-    def prior(self):
-        """P(theta) as a scipy distribution object.
-
-        theta|A ~ N( mu, Sigma )
-        A       ~ categorical(exp(logA))
-        """
-        return mixture_multivariate_normal(self.mu, self.Sigma, self.logA)
-
-    def posterior(self, D):
-        """P(theta|D) as a scipy distribution object.
-
-        theta|D, A ~ N( mu + S M'C^{-1}(D - m - M mu), S )
-        D|A        ~ N( m + M mu, C + M Sigma M' )
-        A          ~ categorical(exp(logA))
-        S = (Sigma^{-1} + M'C^{-1}M)^{-1}
-
-        Parameters
-        ----------
-        D : array_like, shape (d,)
-        """
-        D = np.atleast_1d(D)
-        Sigma = inv(
-            inv(self.Sigma) + np.einsum("iaj,iab,ibk->ijk", self.M, inv(self.C), self.M)
-        )
-        D0 = self.m + np.einsum("ija,ia->ij", self.M, self.mu)
-        mu = self.mu + np.einsum(
-            "ija,iba,ibc,ic->ij", Sigma, self.M, inv(self.C), D - D0
-        )
-        evidence = self.evidence()
-        logA = evidence.logpdf(D, reduce=False) + self.logA - evidence.logpdf(D)
-        return mixture_multivariate_normal(mu, Sigma, logA)
-
-    def evidence(self):
-        """P(D) as a scipy distribution object.
-
-        D|A ~ N( m + M mu, C + M Sigma M' )
-        A   ~ categorical(exp(logA))
-        """
-        mu = self.m + np.einsum("ija,ia->ij", self.M, self.mu)
-        Sigma = self.C + np.einsum("ija,iab,ikb->ijk", self.M, self.Sigma, self.M)
-        return mixture_multivariate_normal(mu, Sigma, self.logA)
-
-    def joint(self):
-        """P(D, theta) as a scipy distribution object.
-
-        [  D  ] | A ~ N( [m + M mu]   [C + M Sigma M'  M Sigma] )
-        [theta] |      ( [   mu   ] , [   Sigma M'      Sigma ] )
-
-        A           ~ categorical(exp(logA))
-        """
-        evidence = self.evidence()
-        prior = self.prior()
-        mu = np.block([evidence.means, prior.means])
-        corr = np.einsum("ija,ial->ijl", self.M, self.Sigma)
-        Sigma = np.block([[evidence.covs, corr], [corr.transpose(0, 2, 1), prior.covs]])
-        return mixture_multivariate_normal(mu, Sigma, self.logA)
-
-    def _atleast_3d(self, x):
-        if x is None:
-            return np.zeros(shape=(0, 0, 0))
-        x = np.array(x)
-        if x.ndim == 3:
-            return x
-        return np.atleast_2d(x)[None, ...]
-
-    def _atleast_2d(self, x):
-        if x is None:
-            return np.zeros(shape=(0, 0))
-        x = np.array(x)
-        if x.ndim == 2:
-            return x
-        return np.atleast_1d(x)[None, ...]
-
-    def _atleast_1d(self, x):
-        if x is None:
-            return np.zeros(shape=(0,))
-        return np.atleast_1d(x)
-
-    def _broadcast_to(self, x, shape):
-        if x.shape == shape:
-            return x
-        if x.shape[1:] == shape[1:]:
-            return np.broadcast_to(x, shape)
-        return x * np.ones(shape) * np.eye(shape[1], shape[2])[None, ...]
-
-
-class MultiLinearModel(object):
-    """A multilinear model.
-
-    D|theta ~ N( m + M theta, C )
-    theta   ~ N( mu, Sigma )
-
-    Defined by:
-        Parameters:       theta (k, n,)
-        Data:             D     (k, d,)
-        Prior mean:       mu    (k, n,)
-        Prior covariance: Sigma (k, n, n)
-        Data mean:        m     (k, d,)
-        Data covariance:  C     (k, d, d)
-
-    i.e. the same as a LinearModel, but with k copies of each parameter.
-    Fully vectorised so k may in principle be very large.
-
-    Parameters
-    ----------
-    M : array_like, optional
-        if ndim==3: model matrices
-        if ndim==2: model matrix with same matrix for all components
-        if ndim==1: model matrix with vector diagonal for all components
-        if scalar: scalar * rectangular identity matrix for all components
-        Defaults to k copies of rectangular identity matrices
-    m : array_like, optional
-        if ndim==2: data means
-        if ndim==1: data mean with same vector for all components
-        if scalar: scalar * unit vector for all components
-        Defaults to 0 for all components
-    C : array_like, optional
-        if ndim==3: data covariances
-        if ndim==2: data covariance with same matrix for all components
-        if ndim==1: data covariance with vector diagonal for all components
-        if scalar: scalar * identity matrix for all components
-        Defaults to k copies of identity matrices
-    mu : array_like, optional
-        if ndim==2: prior means
-        if ndim==1: prior mean with same vector for all components
-        if scalar: scalar * unit vector for all components
-        Defaults to 0 for all components
-        Prior mean, defaults to zero vector
-    Sigma : array_like, optional
-        if ndim==3: prior covariances
-        if ndim==2: prior covariance with same matrix for all components
-        if ndim==1: prior covariance with vector diagonal for all components
-        if scalar: scalar * identity matrix for all components
-        Defaults to k copies of identity matrices
-    n : int, optional
-        Number of parameters, defaults to automatically inferred value
-    d : int, optional
-        Number of data dimensions, defaults to automatically inferred value
-    k : int, optional
-        Number of mixture components, defaults to automatically inferred value
-    """
-
-    def __init__(self, *args, **kwargs):
-        # Rationalise input arguments
-        M = self._atleast_3d(kwargs.pop("M", None))
-        m = self._atleast_2d(kwargs.pop("m", None))
-        C = self._atleast_3d(kwargs.pop("C", None))
-        mu = self._atleast_2d(kwargs.pop("mu", None))
-        Sigma = self._atleast_3d(kwargs.pop("Sigma", None))
-        n = kwargs.pop("n", 0)
-        d = kwargs.pop("d", 0)
-        k = kwargs.pop("k", 0)
-
-        # Determine dimensions
-        n = max([n, M.shape[2], mu.shape[1], Sigma.shape[1], Sigma.shape[2]])
-        d = max([d, M.shape[1], m.shape[1], C.shape[1], C.shape[2]])
-        k = max(
-            [
-                k,
-                M.shape[0],
-                m.shape[0],
-                C.shape[0],
-                mu.shape[0],
-                Sigma.shape[0],
-            ]
-        )
-        if not n:
-            raise ValueError("Unable to determine number of parameters n")
-        if not d:
-            raise ValueError("Unable to determine data dimensions d")
-
-        # Set defaults if no argument was passed
-        M = M if M.size else np.eye(d, n)
-        m = m if m.size else np.zeros(d)
-        C = C if C.size else np.eye(d)
-        mu = mu if mu.size else np.zeros(n)
-        Sigma = Sigma if Sigma.size else np.eye(n)
-
-        # Broadcast to correct shape
-        self.M = self._broadcast_to(M, (k, d, n))
-        self.m = np.broadcast_to(m, (k, d))
-        self.C = self._broadcast_to(C, (k, d, d))
-        self.mu = np.broadcast_to(mu, (k, n))
-        self.Sigma = self._broadcast_to(Sigma, (k, n, n))
-
-    @classmethod
-    def from_joint(cls, means, covs, n):
-        """Construct model from joint distribution."""
-        mu = means[:, -n:]
-        Sigma = covs[:, -n:, -n:]
-        M = solve(Sigma, covs[:, -n:, :-n]).transpose(0, 2, 1)
-        m = means[:, :-n] - np.einsum("ija,ia->ij", M, mu)
-        C = covs[:, :-n, :-n] - np.einsum("ija,iab,ikb->ijk", M, Sigma, M)
-        return cls(M=M, m=m, C=C, mu=mu, Sigma=Sigma)
-
-    @property
-    def n(self):
-        """Dimensionality of parameter space len(theta)."""
-        return self.M.shape[2]
-
-    @property
-    def d(self):
-        """Dimensionality of data space len(D)."""
-        return self.M.shape[1]
-
-    @property
-    def k(self):
-        """Number of copies."""
-        return self.M.shape[0]
-
-    def likelihood(self, theta):
-        """P(D|theta) as a scipy distribution object.
-
-        D|theta ~ N( m + M theta, C )
-        theta   ~ N( mu, Sigma )
-
-        Parameters
-        ----------
-        theta : array_like, shape (k, n)
-        """
-        theta = np.array(theta).reshape(self.k, self.n)
-        mu = self.m + np.einsum("ija,ia->ij", self.M, theta)
-        return multimultivariate_normal(mu, self.C)
-
-    def prior(self):
-        """P(theta) as a scipy distribution object.
-
-        theta ~ N( mu, Sigma )
-        """
-        return multimultivariate_normal(self.mu, self.Sigma)
-
-    def posterior(self, D):
-        """P(theta|D) as a scipy distribution object.
-
-        theta|D ~ N( mu + S M'C^{-1}(D - m - M mu), S )
-        D       ~ N( m + M mu, C + M Sigma M' )
-        S = (Sigma^{-1} + M'C^{-1}M)^{-1}
-
-        Parameters
-        ----------
-        D : array_like, shape (d,)
-        """
-        D = D.reshape(self.k, self.d)
-        Sigma = inv(
-            inv(self.Sigma) + np.einsum("iaj,iab,ibk->ijk", self.M, inv(self.C), self.M)
-        )
-        D0 = self.m + np.einsum("ija,ia->ij", self.M, self.mu)
-        mu = self.mu + np.einsum(
-            "ija,iba,ibc,ic->ij", Sigma, self.M, inv(self.C), D - D0
-        )
-        return multimultivariate_normal(mu, Sigma)
-
-    def evidence(self):
-        """P(D) as a scipy distribution object.
-
-        D|A ~ N( m + M mu, C + M Sigma M' )
-        """
-        mu = self.m + np.einsum("ija,ia->ij", self.M, self.mu)
-        Sigma = self.C + np.einsum("ija,iab,ikb->ijk", self.M, self.Sigma, self.M)
-        return multimultivariate_normal(mu, Sigma)
-
-    def joint(self):
-        """P(D, theta) as a scipy distribution object.
-
-        [  D  ] | A ~ N( [m + M mu]   [C + M Sigma M'  M Sigma] )
-        [theta] |      ( [   mu   ] , [   Sigma M'      Sigma ] )
-        """
-        evidence = self.evidence()
-        prior = self.prior()
-        mu = np.block([evidence.means, prior.means])
-        corr = np.einsum("ija,ial->ijl", self.M, self.Sigma)
-        Sigma = np.block([[evidence.covs, corr], [corr.transpose(0, 2, 1), prior.covs]])
-        return multimultivariate_normal(mu, Sigma)
-
-    def _atleast_3d(self, x):
-        if x is None:
-            return np.zeros(shape=(0, 0, 0))
-        x = np.array(x)
-        if x.ndim == 3:
-            return x
-        return np.atleast_2d(x)[None, ...]
-
-    def _atleast_2d(self, x):
-        if x is None:
-            return np.zeros(shape=(0, 0))
-        x = np.array(x)
-        if x.ndim == 2:
-            return x
-        return np.atleast_1d(x)[None, ...]
-
-    def _broadcast_to(self, x, shape):
-        if x.shape == shape:
-            return x
-        if x.shape[1:] == shape[1:]:
-            return np.broadcast_to(x, shape)
-        return x * np.ones(shape) * np.eye(shape[1], shape[2])[None, ...]
diff --git a/lsbi/model_1.py b/lsbi/model_1.py
deleted file mode 100644
index c39b624..0000000
--- a/lsbi/model_1.py
+++ /dev/null
@@ -1,544 +0,0 @@
-"""Gaussian models for linear Bayesian inference."""
-import numpy as np
-from numpy.linalg import inv, solve
-
-from lsbi.stats_1 import mixture_normal, multivariate_normal
-from lsbi.utils import logdet, matrix
-
-
-def _de_diagonalise(x, diagonal, *args):
-    if diagonal:
-        return np.atleast_1d(x)[..., None, :] * np.eye(*args)
-    else:
-        return x
-
-
-class LinearModel(object):
-    """A multilinear model.
-
-    D|theta ~ N( m + M theta, C )
-    theta   ~ N( mu, Sigma )
-
-    Defined by:
-        Parameters:       theta (..., n,)
-        Data:             D     (..., d,)
-        Model:            M     (..., d, n)
-        Prior mean:       mu    (..., n,)
-        Prior covariance: Sigma (..., n, n)
-        Data mean:        m     (..., d,)
-        Data covariance:  C     (..., d, d)
-
-    where the ellipses indicate arbitrary (broadcastable) additional copies.
-
-    Parameters
-    ----------
-    M : array_like, optional
-        if ndim>=2: model matrices
-        if ndim==1: model matrix with vector diagonal for all components
-        if ndim==0: scalar * rectangular identity matrix for all components
-        Defaults to rectangular identity matrix
-    m : array_like, optional
-        if ndim>=1: data means
-        if ndim==0: scalar * unit vector for all components
-        Defaults to 0 for all components
-    C : array_like, optional
-        if ndim>=2: data covariances
-        if ndim==1: data covariance with vector diagonal for all components
-        if ndim==0: scalar * identity matrix for all components
-        Defaults to rectangular identity matrix
-    mu : array_like, optional
-        if ndim>=1: prior means
-        if ndim==0: scalar * unit vector for all components
-        Defaults to 0 for all components
-        Prior mean, defaults to zero vector
-    Sigma : array_like, optional
-        if ndim>=2: prior covariances
-        if ndim==1: prior covariance with vector diagonal for all components
-        if ndim==0: scalar * identity matrix for all components
-        Defaults to k copies of identity matrices
-    n : int, optional
-        Number of parameters, defaults to automatically inferred value
-    d : int, optional
-        Number of data dimensions, defaults to automatically inferred value
-    shape : (), optional
-        Number of mixture components, defaults to automatically inferred value
-    """
-
-    def __init__(
-        self,
-        M=1,
-        m=0,
-        C=1,
-        mu=0,
-        Sigma=1,
-        shape=(),
-        n=1,
-        d=1,
-        diagonal_M=False,
-        diagonal_C=False,
-        diagonal_Sigma=False,
-    ):
-        self.M = M
-        self.diagonal_M = diagonal_M
-        if len(np.shape(self.M)) < 2:
-            self.diagonal_M = True
-        self.m = m
-        self.C = C
-        self.diagonal_C = diagonal_C
-        if len(np.shape(self.C)) < 2:
-            self.diagonal_C = True
-        self.mu = mu
-        self.Sigma = Sigma
-        self.diagonal_Sigma = diagonal_Sigma
-        if len(np.shape(self.Sigma)) < 2:
-            self.diagonal_Sigma = True
-        self._shape = shape
-        self._n = n
-        self._d = d
-
-    @property
-    def shape(self):
-        """Shape of the distribution."""
-        return np.broadcast_shapes(
-            np.shape(self.M)[: -2 + self.diagonal_M],
-            np.shape(self.m)[:-1],
-            np.shape(self.C)[: -2 + self.diagonal_C],
-            np.shape(self.mu)[:-1],
-            np.shape(self.Sigma)[: -2 + self.diagonal_Sigma],
-            self._shape,
-        )
-
-    @property
-    def n(self):
-        """Dimension of the distribution."""
-        return np.max(
-            [
-                *np.shape(self.M)[len(np.shape(self.M)) - 1 + self.diagonal_M :],
-                *np.shape(self.Sigma)[-2 + self.diagonal_Sigma :],
-                *np.shape(self.mu)[-1:],
-                self._n,
-            ]
-        )
-
-    @property
-    def d(self):
-        """Dimensionality of data space len(D)."""
-        return np.max(
-            [
-                *np.shape(self.M)[-2 + self.diagonal_M : -1],
-                *np.shape(self.C)[-2 + self.diagonal_C :],
-                *np.shape(self.m)[-1:],
-                self._d,
-            ]
-        )
-
-    def likelihood(self, theta):
-        """P(D|theta) as a scipy distribution object.
-
-        D|theta ~ N( m + M theta, C )
-        theta   ~ N( mu, Sigma )
-
-        Parameters
-        ----------
-        theta : array_like, shape (k, n)
-        """
-        mu = self.m + np.einsum("...ja,...a->...j", self._M, theta)
-        return multivariate_normal(mu, self.C, self.shape, self.d, self.diagonal_C)
-
-    def prior(self):
-        """P(theta) as a scipy distribution object.
-
-        theta ~ N( mu, Sigma )
-        """
-        return multivariate_normal(
-            self.mu, self.Sigma, self.shape, self.n, self.diagonal_Sigma
-        )
-
-    def posterior(self, D):
-        """P(theta|D) as a scipy distribution object.
-
-        theta|D ~ N( mu + S M'C^{-1}(D - m - M mu), S )
-        S = (Sigma^{-1} + M'C^{-1}M)^{-1}
-
-        Parameters
-        ----------
-        D : array_like, shape (d,)
-        """
-        values = (
-            D
-            - self.m
-            - np.einsum("...ja,...a->...j", self._M, self.mu * np.ones(self.n))
-        )
-
-        diagonal_Sigma = self.diagonal_C and self.diagonal_Sigma and self.diagonal_M
-
-        if diagonal_Sigma:
-            dim = min(self.n, self.d)
-            shape = np.broadcast_shapes(self.shape, values.shape[:-1])
-            C = np.atleast_1d(self.C)[..., :dim]
-            M = np.atleast_1d(self.M)[..., :dim]
-            Sigma = np.broadcast_to(self.Sigma, shape + (self.n,)).copy()
-            Sigma[..., :dim] = 1 / (1 / Sigma[..., :dim] + M**2 / C)
-
-            mu = np.broadcast_to(self.mu, shape + (self.n,)).copy()
-            mu[..., :dim] = mu[..., :dim] + Sigma[..., :dim] * M / C * values[..., :dim]
-        else:
-            if self.diagonal_C:
-                invC = np.eye(self.d) / np.atleast_1d(self.C)[..., None, :]
-            else:
-                invC = inv(self.C)
-
-            if self.diagonal_Sigma:
-                invSigma = np.eye(self.n) / np.atleast_1d(self.Sigma)[..., None, :]
-            else:
-                invSigma = inv(self.Sigma)
-
-            Sigma = inv(
-                invSigma + np.einsum("...aj,...ab,...bk->...jk", self._M, invC, self._M)
-            )
-            mu = self.mu + np.einsum(
-                "...ja,...ba,...bc,...c->...j", Sigma, self._M, invC, values
-            )
-
-        return multivariate_normal(mu, Sigma, self.shape, self.n, diagonal_Sigma)
-
-    def evidence(self):
-        """P(D) as a scipy distribution object.
-
-        D ~ N( m + M mu, C + M Sigma M' )
-        """
-        mu = self.m + np.einsum("...ja,...a->...j", self._M, self.mu * np.ones(self.n))
-        diagonal_Sigma = self.diagonal_C and self.diagonal_Sigma and self.diagonal_M
-
-        if diagonal_Sigma:
-            dim = min(self.n, self.d)
-            M = np.atleast_1d(self.M)[..., :dim]
-            S = np.atleast_1d(self.Sigma)[..., :dim]
-            Sigma = np.broadcast_to(self.C, self.shape + (self.d,)).copy()
-            Sigma[..., :dim] += S * M**2
-        else:
-            Sigma = self._C + np.einsum(
-                "...ja,...ab,...kb->...jk", self._M, self._Sigma, self._M
-            )
-
-        return multivariate_normal(mu, Sigma, self.shape, self.d, diagonal_Sigma)
-
-    def joint(self):
-        """P(D, theta) as a scipy distribution object.
-
-        [  D  ] | A ~ N( [m + M mu]   [C + M Sigma M'  M Sigma] )
-        [theta] |      ( [   mu   ] , [   Sigma M'      Sigma ] )
-        """
-        evidence = self.evidence()
-        prior = self.prior()
-        a = np.broadcast_to(evidence.mean, self.shape + (self.d,))
-        b = np.broadcast_to(prior.mean, self.shape + (self.n,))
-        mu = np.block([a, b])
-        A = _de_diagonalise(evidence.cov, evidence.diagonal_cov, self.d)
-        A = np.broadcast_to(A, self.shape + (self.d, self.d))
-        D = _de_diagonalise(prior.cov, prior.diagonal_cov, self.n)
-        D = np.broadcast_to(D, self.shape + (self.n, self.n))
-        B = np.einsum("...ja,...al->...jl", self._M, self._Sigma)
-        B = np.broadcast_to(B, self.shape + (self.d, self.n))
-        C = np.moveaxis(B, -1, -2)
-        Sigma = np.block([[A, B], [C, D]])
-        return multivariate_normal(mu, Sigma, self.shape, self.n + self.d)
-
-    @property
-    def _M(self):
-        return _de_diagonalise(self.M, self.diagonal_M, self.d, self.n)
-
-    @property
-    def _C(self):
-        return _de_diagonalise(self.C, self.diagonal_C, self.d)
-
-    @property
-    def _Sigma(self):
-        return _de_diagonalise(self.Sigma, self.diagonal_Sigma, self.n)
-
-
-class MixtureModel(LinearModel):
-    """A linear mixture model.
-
-    D|theta, A ~ N( m + M theta, C )
-    theta|A    ~ N( mu, Sigma )
-    A          ~ categorical( exp(logA) )
-
-    Defined by:
-        Parameters:          theta (..., n,)
-        Data:                D     (..., d,)
-        Prior means:         mu    (..., k, n)
-        Prior covariances:   Sigma (..., k, n, n)
-        Data means:          m     (..., k, d)
-        Data covariances:    C     (..., k, d, d)
-        log mixture weights: logA  (..., k,)
-
-    Parameters
-    ----------
-    M : array_like, optional
-        if ndim>=2: model matrices
-        if ndim==1: model matrix with vector diagonal for all components
-        if scalar: scalar * rectangular identity matrix for all components
-        Defaults to k copies of rectangular identity matrices
-    m : array_like, optional
-        if ndim>=1: data means
-        if scalar: scalar * unit vector for all components
-        Defaults to 0 for all components
-    C : array_like, optional
-        if ndim>=2: data covariances
-        if ndim==1: data covariance with vector diagonal for all components
-        if scalar: scalar * identity matrix for all components
-        Defaults to k copies of identity matrices
-    mu : array_like, optional
-        if ndim>=1: prior means
-        if scalar: scalar * unit vector for all components
-        Defaults to 0 for all components
-        Prior mean, defaults to zero vector
-    Sigma : array_like, optional
-        if ndim>=2: prior covariances
-        if ndim==1: prior covariance with vector diagonal for all components
-        if scalar: scalar * identity matrix for all components
-        Defaults to k copies of identity matrices
-    logA : array_like, optional
-        if ndim>=1: log mixture weights
-        if scalar: scalar * unit vector
-        Defaults to uniform weights
-    n : int, optional
-        Number of parameters, defaults to automatically inferred value
-    d : int, optional
-        Number of data dimensions, defaults to automatically inferred value
-    """
-
-    def __init__(self, logA=1, *args):
-        super().__init__(*args)
-        self.logA = logA
-
-    @property
-    def shape(self):
-        """Shape of the distribution."""
-        return np.broadcast_shapes(np.shape(self.logA), super().shape)
-
-    @property
-    def k(self):
-        """Number of mixture components of the distribution."""
-        return self.shape[-1]
-
-    def likelihood(self, theta):
-        """P(D|theta) as a scipy distribution object.
-
-        D|theta,A ~ N( m + M theta, C )
-        theta|A   ~ N( mu, Sigma )
-        A         ~ categorical(exp(logA))
-
-        Parameters
-        ----------
-        theta : array_like, shape (n,)
-        """
-        dist = super().likelihood(np.expand_dims(theta, -2))
-        dist.__class__ = mixture_normal
-        dist.logA = self.prior().logpdf(theta, broadcast=True, joint=True)
-        return dist
-
-    def prior(self):
-        """P(theta) as a scipy distribution object.
-
-        theta|A ~ N( mu, Sigma )
-        A       ~ categorical(exp(logA))
-        """
-        dist = super().prior()
-        dist.__class__ = mixture_normal
-        dist.logA = self.logA
-        return dist
-
-    def posterior(self, D):
-        """P(theta|D) as a scipy distribution object.
-
-        theta|D, A ~ N( mu + S M'C^{-1}(D - m - M mu), S )
-        D|A        ~ N( m + M mu, C + M Sigma M' )
-        A          ~ categorical(exp(logA))
-        S = (Sigma^{-1} + M'C^{-1}M)^{-1}
-
-        Parameters
-        ----------
-        D : array_like, shape (d,)
-        """
-        dist = super().posterior(np.expand_dims(D, -2))
-        dist.__class__ = mixture_normal
-        dist.logA = self.evidence().logpdf(D, broadcast=True, joint=True)
-        return dist
-
-    def evidence(self):
-        """P(D) as a scipy distribution object.
-
-        D|A ~ N( m + M mu, C + M Sigma M' )
-        A   ~ categorical(exp(logA))
-        """
-        dist = super().evidence()
-        dist.__class__ = mixture_normal
-        dist.logA = self.logA
-        return dist
-
-    def joint(self):
-        """P(D, theta) as a scipy distribution object.
-
-        [  D  ] | A ~ N( [m + M mu]   [C + M Sigma M'  M Sigma] )
-        [theta] |      ( [   mu   ] , [   Sigma M'      Sigma ] )
-
-        A           ~ categorical(exp(logA))
-        """
-        dist = super().joint()
-        dist.__class__ = mixture_normal
-        dist.logA = self.logA
-        return dist
-
-
-class ReducedLinearModel(object):
-    """A model with no data.
-
-    If a Likelihood is Gaussian in the parameters, it is sometimes more
-    clear/efficient to phrase it in terms of a parameter covariance, parameter
-    mean and peak value:
-
-    logL(theta) = logLmax - (theta - mu_L)' Sigma_L^{-1} (theta - mu_L)
-
-    We can link this to a data-based model with the relations:
-
-    Sigma_L = (M' C^{-1} M)^{-1}
-    mu_L = Sigma_L M' C^{-1} (D-m)
-    logLmax =
-    - log|2 pi C|/2 - (D-m)'C^{-1}(C - M (M' C^{-1} M)^{-1} M' )C^{-1}(D-m)/2
-
-    Parameters
-    ----------
-    mu_L : array_like
-        Likelihood peak
-    Sigma_L : array_like
-        Likelihood covariance
-    logLmax : float, optional
-        Likelihood maximum, defaults to zero
-    mu_pi : array_like, optional
-        Prior mean, defaults to zero vector
-    Sigma_pi : array_like, optional
-        Prior covariance, defaults to identity matrix
-    """
-
-    def __init__(self, *args, **kwargs):
-        self.mu_L = np.atleast_1d(kwargs.pop("mu_L"))
-        self.Sigma_L = np.atleast_2d(kwargs.pop("Sigma_L", None))
-        self.logLmax = kwargs.pop("logLmax", 0)
-        self.mu_pi = np.atleast_1d(kwargs.pop("mu_pi", np.zeros_like(self.mu_L)))
-        self.Sigma_pi = np.atleast_2d(kwargs.pop("Sigma_pi", np.eye(len(self.mu_pi))))
-        self.Sigma_P = inv(inv(self.Sigma_pi) + inv(self.Sigma_L))
-        self.mu_P = self.Sigma_P @ (
-            solve(self.Sigma_pi, self.mu_pi) + solve(self.Sigma_L, self.mu_L)
-        )
-
-    def prior(self):
-        """P(theta) as a scipy distribution object."""
-        return multivariate_normal(self.mu_pi, self.Sigma_pi)
-
-    def posterior(self):
-        """P(theta|D) as a scipy distribution object."""
-        return multivariate_normal(self.mu_P, self.Sigma_P)
-
-    def logpi(self, theta):
-        """P(theta) as a scalar."""
-        return self.prior().logpdf(theta)
-
-    def logP(self, theta):
-        """P(theta|D) as a scalar."""
-        return self.posterior().logpdf(theta)
-
-    def logL(self, theta):
-        """P(D|theta) as a scalar."""
-        return (
-            self.logLmax
-            + multivariate_normal(self.mu_L, self.Sigma_L).logpdf(theta)
-            + logdet(2 * np.pi * self.Sigma_L) / 2
-        )
-
-    def logZ(self):
-        """P(D) as a scalar."""
-        return (
-            self.logLmax
-            + logdet(self.Sigma_P) / 2
-            - logdet(self.Sigma_pi) / 2
-            - (self.mu_P - self.mu_pi)
-            @ solve(self.Sigma_pi, self.mu_P - self.mu_pi)
-            / 2
-            - (self.mu_P - self.mu_L) @ solve(self.Sigma_L, self.mu_P - self.mu_L) / 2
-        )
-
-    def DKL(self):
-        """D_KL(P(theta|D)||P(theta)) the Kullback-Leibler divergence."""
-        return (
-            logdet(self.Sigma_pi)
-            - logdet(self.Sigma_P)
-            + np.trace(inv(self.Sigma_pi) @ self.Sigma_P - 1)
-            + (self.mu_P - self.mu_pi) @ solve(self.Sigma_pi, self.mu_P - self.mu_pi)
-        ) / 2
-
-
-class ReducedLinearModelUniformPrior(object):
-    """A model with no data.
-
-    Gaussian likelihood in the parameters
-
-    logL(theta) = logLmax - (theta - mu_L)' Sigma_L^{-1} (theta - mu_L)
-
-    Uniform prior
-
-    We can link this to a data-based model with the relations:
-
-    Sigma_L = (M' C^{-1} M)^{-1}
-    mu_L = Sigma_L M' C^{-1} (D-m)
-    logLmax =
-    -log|2 pi C|/2 - (D-m)'C^{-1}(C - M (M' C^{-1} M)^{-1} M' )C^{-1}(D-m)/2
-
-    Parameters
-    ----------
-    mu_L : array_like
-        Likelihood peak
-    Sigma_L : array_like
-        Likelihood covariance
-    logLmax : float, optional
-        Likelihood maximum, defaults to zero
-    logV : float, optional
-        log prior volume, defaults to zero
-    """
-
-    def __init__(self, *args, **kwargs):
-        self.mu_L = np.atleast_1d(kwargs.pop("mu_L"))
-        self.Sigma_L = np.atleast_2d(kwargs.pop("Sigma_L"))
-        self.logLmax = kwargs.pop("logLmax", 0)
-        self.logV = kwargs.pop("logV", 0)
-        self.Sigma_P = self.Sigma_L
-        self.mu_P = self.mu_L
-
-    def posterior(self):
-        """P(theta|D) as a scipy distribution object."""
-        return multivariate_normal(self.mu_P, self.Sigma_P)
-
-    def logpi(self, theta):
-        """P(theta) as a scalar."""
-        return -self.logV
-
-    def logP(self, theta):
-        """P(theta|D) as a scalar."""
-        return self.posterior().logpdf(theta)
-
-    def logL(self, theta):
-        """P(D|theta) as a scalar."""
-        return (
-            self.logLmax
-            + logdet(2 * np.pi * self.Sigma_L) / 2
-            + multivariate_normal(self.mu_L, self.Sigma_L).logpdf(theta)
-        )
-
-    def logZ(self):
-        """P(D) as a scalar."""
-        return self.logLmax + logdet(2 * np.pi * self.Sigma_P) / 2 - self.logV
-
-    def DKL(self):
-        """D_KL(P(theta|D)||P(theta)) the Kullback-Leibler divergence."""
-        return self.logV - logdet(2 * np.pi * np.e * self.Sigma_P) / 2
diff --git a/lsbi/stats.py b/lsbi/stats.py
index 6a5afae..71ec517 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -1,94 +1,141 @@
 """Extensions to scipy.stats functions."""
+from copy import deepcopy
+
 import numpy as np
 import scipy.stats
-from numpy.linalg import inv
+from numpy.linalg import cholesky, inv
 from scipy.special import erf, logsumexp
-from scipy.stats._multivariate import multivariate_normal_frozen
 
-from lsbi.utils import bisect, logdet
+from lsbi.utils import bisect, choice, logdet
 
 
-class multivariate_normal(multivariate_normal_frozen):  # noqa: D101
-    def marginalise(self, indices):
-        """Marginalise over indices.
+class multivariate_normal(object):
+    """Vectorised multivariate normal distribution.
+
+    This extends scipy.stats.multivariate_normal to allow for vectorisation across
+    the distribution parameters mean and cov.
+
+    Implemented with the same style as scipy.stats.multivariate_normal, except that
+    results are not squeezed.
+
+    mean and cov are lazily broadcasted to the same shape to improve performance.
+
+    Parameters
+    ----------
+    mean : array_like, shape (..., dim)
+        Mean of each component.
+
+    cov array_like, shape (..., dim, dim)
+        Covariance matrix of each component.
+
+    shape: tuple, optional, default=()
+        Shape of the distribution. Useful for forcing a broadcast beyond that
+        inferred by mean and cov shapes
+
+    dim: int, optional, default=0
+        Dimension of the distribution. Useful for forcing a broadcast beyond that
+        inferred by mean and cov shapes
+
+    diagonal_cov: bool, optional, default=False
+        If True, cov is interpreted as the diagonal of the covariance matrix.
+    """
+
+    def __init__(self, mean=0, cov=1, shape=(), dim=0, diagonal_cov=False):
+        self.mean = mean
+        self.cov = cov
+        self._shape = shape
+        self._dim = dim
+        self.diagonal_cov = diagonal_cov
+        if len(np.shape(self.cov)) < 2:
+            self.diagonal_cov = True
+
+    @property
+    def shape(self):
+        """Shape of the distribution."""
+        return np.broadcast_shapes(
+            np.shape(self.mean)[:-1],
+            np.shape(self.cov)[: -2 + self.diagonal_cov],
+            self._shape,
+        )
+
+    @property
+    def dim(self):
+        """Dimension of the distribution."""
+        return np.max(
+            [
+                *np.shape(self.mean)[-1:],
+                *np.shape(self.cov)[-2 + self.diagonal_cov :],
+                self._dim,
+            ]
+        )
+
+    def logpdf(self, x, broadcast=False):
+        """Log of the probability density function.
 
         Parameters
         ----------
-        indices : array_like
-            Indices to marginalise.
+        x : array_like, shape (*size, dim)
+            Points at which to evaluate the log of the probability density
+            function.
+        broadcast : bool, optional, default=False
+            If True, broadcast x across the distribution parameters.
 
         Returns
         -------
-        marginalised distribution: multivariate_normal
+        logpdf : array_like, shape (*size, *shape)
+            Log of the probability density function evaluated at x.
         """
-        i = self._bar(indices)
-        mean = self.mean[i]
-        cov = self.cov[i][:, i]
-        return multivariate_normal(mean, cov)
+        x = np.array(x)
+        if broadcast:
+            dx = x - self.mean
+        else:
+            size = x.shape[:-1]
+            mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
+            dx = x.reshape(*size, *np.ones_like(self.shape), self.dim) - mean
+        if self.diagonal_cov:
+            chi2 = (dx**2 / self.cov).sum(axis=-1)
+            norm = -np.log(2 * np.pi * np.ones(self.dim) * self.cov).sum(axis=-1) / 2
+        else:
+            chi2 = np.einsum("...j,...jk,...k->...", dx, inv(self.cov), dx)
+            norm = -logdet(2 * np.pi * self.cov) / 2
+        return norm - chi2 / 2
 
-    def condition(self, indices, values):
-        """Condition on indices with values.
+    def pdf(self, x):
+        """Probability density function.
 
         Parameters
         ----------
-        indices : array_like
-            Indices to condition over.
-        values : array_like
-            Values to condition on.
+        x : array_like, shape (*size, dim)
+            Points at which to evaluate the probability density function.
 
         Returns
         -------
-        conditional distribution: multivariate_normal
+        pdf : array_like, shape (*size, *shape)
+            Probability density function evaluated at x.
         """
-        i = self._bar(indices)
-        k = indices
-        mean = self.mean[i] + self.cov[i][:, k] @ inv(self.cov[k][:, k]) @ (
-            values - self.mean[k]
-        )
-        cov = (
-            self.cov[i][:, i]
-            - self.cov[i][:, k] @ inv(self.cov[k][:, k]) @ self.cov[k][:, i]
-        )
-        return multivariate_normal(mean, cov)
+        return np.exp(self.logpdf(x))
 
-    def _bar(self, indices):
-        """Return the indices not in the given indices."""
-        k = np.ones(len(self.mean), dtype=bool)
-        k[indices] = False
-        return k
-
-    def bijector(self, x, inverse=False):
-        """Bijector between U([0, 1])^d and the distribution.
-
-        - x in [0, 1]^d is the hypercube space.
-        - theta in R^d is the physical space.
-
-        Computes the transformation from x to theta or theta to x depending on
-        the value of inverse.
+    def rvs(self, size=()):
+        """Draw random samples from the distribution.
 
         Parameters
         ----------
-        x : array_like, shape (..., d)
-            if inverse: x is theta
-            else: x is x
-        inverse : bool, optional, default=False
-            If True: compute the inverse transformation from physical to
-            hypercube space.
+        size : int or tuple of ints, optional, default=()
+            Number of samples to draw.
 
         Returns
         -------
-        transformed x or theta: array_like, shape (..., d)
+        rvs : ndarray, shape (*size, *shape, dim)
+            Random samples from the distribution.
         """
-        L = np.linalg.cholesky(self.cov)
-        if inverse:
-            Linv = inv(L)
-            y = np.einsum("ij,...j->...i", Linv, x - self.mean)
-            return scipy.stats.norm.cdf(y)
+        size = np.atleast_1d(size)
+        x = np.random.randn(*size, *self.shape, self.dim)
+        if self.diagonal_cov:
+            return self.mean + np.sqrt(self.cov) * x
         else:
-            y = scipy.stats.norm.ppf(x)
-            return self.mean + np.einsum("ij,...j->...i", L, y)
+            return self.mean + np.einsum("...jk,...k->...j", cholesky(self.cov), x)
 
-    def predict(self, A, b=None):
+    def predict(self, A=1, b=0, diagonal_A=False):
         """Predict the mean and covariance of a linear transformation.
 
         if:         x ~ N(mu, Sigma)
@@ -96,70 +143,43 @@ def predict(self, A, b=None):
 
         Parameters
         ----------
-        A : array_like, shape (q, n)
+        A : array_like, shape (..., k, dim)
             Linear transformation matrix.
-        b : array_like, shape (q,), optional
+        b : array_like, shape (..., k), optional
             Linear transformation vector.
 
+        where self.shape is broadcastable to ...
+
         Returns
         -------
-        predicted distribution: multivariate_normal
+        transformed distribution shape (..., k)
         """
-        if b is None:
-            b = np.zeros(A.shape[0])
-        mean = A @ self.mean + b
-        cov = A @ self.cov @ A.T
-        return multivariate_normal(mean, cov, allow_singular=True)
-
-
-class multimultivariate_normal(object):
-    """Multivariate normal distribution with multiple means and covariances.
-
-    Implemented with the same style as scipy.stats.multivariate_normal
-
-    Parameters
-    ----------
-    means : array_like, shape (n_components, n_features)
-        Mean of each component.
-
-    covs: array_like, shape (n_components, n_features, n_features)
-        Covariance matrix of each component.
-
-    """
-
-    def __init__(self, means, covs):
-        self.means = np.array([np.atleast_1d(m) for m in means])
-        self.covs = np.array([np.atleast_2d(c) for c in covs])
-
-    def _process_quantiles(self, x, dim):
-        x = np.asarray(x, dtype=float)
-
-        if x.ndim == 0:
-            x = x[np.newaxis, np.newaxis]
-        elif x.ndim == 1:
-            if dim == 1:
-                x = x[:, np.newaxis]
+        if len(np.shape(A)) < 2:
+            diagonal_A = True
+        dist = deepcopy(self)
+        if diagonal_A:
+            dist.mean = A * self.mean + b
+            if self.diagonal_cov:
+                dist.cov = A * self.cov * A
             else:
-                x = x[np.newaxis, :]
-
-        return x
-
-    def logpdf(self, x):
-        """Log of the probability density function."""
-        x = self._process_quantiles(x, self.means.shape[-1])
-        dx = self.means - x[..., :, :]
-        invcovs = np.linalg.inv(self.covs)
-        chi2 = np.einsum("...ij,ijk,...ik->...i", dx, invcovs, dx)
-        norm = -logdet(2 * np.pi * self.covs) / 2
-        logpdf = norm - chi2 / 2
-        return np.squeeze(logpdf)
-
-    def rvs(self, size=1):
-        """Random variates."""
-        size = np.atleast_1d(size)
-        x = np.random.randn(*size, *self.means.shape)
-        choleskys = np.linalg.cholesky(self.covs)
-        return np.squeeze(self.means + np.einsum("ijk,...ik->...ij", choleskys, x))
+                dist.cov = (
+                    self.cov
+                    * np.atleast_1d(A)[..., None]
+                    * np.atleast_1d(A)[..., None, :]
+                )
+        else:
+            dist.mean = (
+                np.einsum("...qn,...n->...q", A, np.ones(self.dim) * self.mean) + b
+            )
+            if self.diagonal_cov:
+                dist.cov = np.einsum(
+                    "...qn,...pn->...qp", A, A * np.atleast_1d(self.cov)[..., None, :]
+                )
+                dist.diagonal_cov = False
+            else:
+                dist.cov = np.einsum("...qn,...nm,...pm->...qp", A, self.cov, A)
+            dist._dim = np.shape(A)[-2]
+        return dist
 
     def marginalise(self, indices):
         """Marginalise over indices.
@@ -171,12 +191,19 @@ def marginalise(self, indices):
 
         Returns
         -------
-        marginalised distribution: multimultivariate_normal
+        marginalised distribution, shape (*shape, dim - len(indices))
         """
+        dist = deepcopy(self)
         i = self._bar(indices)
-        means = self.means[:, i]
-        covs = self.covs[:, i][:, :, i]
-        return multimultivariate_normal(means, covs)
+        dist.mean = (np.ones(self.dim) * self.mean)[..., i]
+
+        if self.diagonal_cov:
+            dist.cov = (np.ones(self.dim) * self.cov)[..., i]
+        else:
+            dist.cov = self.cov[..., i, :][..., i]
+
+        dist._dim = sum(i)
+        return dist
 
     def condition(self, indices, values):
         """Condition on indices with values.
@@ -185,33 +212,42 @@ def condition(self, indices, values):
         ----------
         indices : array_like
             Indices to condition over.
-        values : array_like
+        values : array_like shape (..., len(indices))
             Values to condition on.
 
+        where where self.shape is broadcastable to ...
+
         Returns
         -------
-        conditional distribution: multimultivariate_normal
+        conditioned distribution shape (..., len(indices))
         """
         i = self._bar(indices)
         k = indices
-        values = values.reshape(self.means[:, k].shape)
-        means = self.means[:, i] + np.einsum(
-            "ija,iab,ib->ij",
-            self.covs[:, i][:, :, k],
-            inv(self.covs[:, k][:, :, k]),
-            (values - self.means[:, k]),
-        )
-        covs = self.covs[:, i][:, :, i] - np.einsum(
-            "ija,iab,ibk->ijk",
-            self.covs[:, i][:, :, k],
-            inv(self.covs[:, k][:, :, k]),
-            self.covs[:, k][:, :, i],
-        )
-        return multimultivariate_normal(means, covs)
+        dist = deepcopy(self)
+        dist.mean = (np.ones(self.dim) * self.mean)[..., i]
+
+        if self.diagonal_cov:
+            dist.cov = (np.ones(self.dim) * self.cov)[..., i]
+            dist._shape = np.broadcast_shapes(self.shape, values.shape[:-1])
+        else:
+            dist.mean = dist.mean + np.einsum(
+                "...ja,...ab,...b->...j",
+                self.cov[..., i, :][..., :, k],
+                inv(self.cov[..., k, :][..., :, k]),
+                values - (np.ones(self.dim) * self.mean)[..., k],
+            )
+            dist.cov = self.cov[..., i, :][..., :, i] - np.einsum(
+                "...ja,...ab,...bk->...jk",
+                self.cov[..., i, :][..., :, k],
+                inv(self.cov[..., k, :][..., :, k]),
+                self.cov[..., k, :][..., :, i],
+            )
+        dist._dim = sum(i)
+        return dist
 
     def _bar(self, indices):
         """Return the indices not in the given indices."""
-        k = np.ones(self.means.shape[-1], dtype=bool)
+        k = np.ones(self.dim, dtype=bool)
         k[indices] = False
         return k
 
@@ -226,114 +262,178 @@ def bijector(self, x, inverse=False):
 
         Parameters
         ----------
-        x : array_like, shape (..., d)
+        x : array_like, shape (..., dim)
             if inverse: x is theta
             else: x is x
         inverse : bool, optional, default=False
             If True: compute the inverse transformation from physical to
             hypercube space.
 
+        where self.shape is broadcastable to ...
+
         Returns
         -------
-        transformed x or theta: array_like, shape (..., d)
+        transformed x or theta: array_like, shape (..., dim)
         """
-        Ls = np.linalg.cholesky(self.covs)
+        x = np.array(x)
+        mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
         if inverse:
-            Linvs = inv(Ls)
-            y = np.einsum("ijk,...ik->...ij", Linvs, x - self.means)
+            if self.diagonal_cov:
+                y = (x - mean) / np.sqrt(self.cov)
+            else:
+                y = np.einsum("...jk,...k->...j", inv(cholesky(self.cov)), x - mean)
             return scipy.stats.norm.cdf(y)
         else:
             y = scipy.stats.norm.ppf(x)
-            return self.means + np.einsum("ijk,...ik->...ij", Ls, y)
-
-    def predict(self, A, b=None):
-        """Predict the mean and covariance of a linear transformation.
+            if self.diagonal_cov:
+                return mean + np.sqrt(self.cov) * y
+            else:
+                L = cholesky(self.cov)
+                return mean + np.einsum("...jk,...k->...j", L, y)
 
-        if:         x ~ N(mu, Sigma)
-        then:  Ax + b ~ N(A mu + b, A Sigma A^T)
+    def __getitem__(self, arg):
+        """Access a subset of the distributions.
 
         Parameters
         ----------
-        A : array_like, shape (k, q, n)
-            Linear transformation matrix.
-        b : array_like, shape (k, q), optional
-            Linear transformation vector.
+        arg : int or slice or tuple of ints or tuples
+            Indices to access.
 
         Returns
         -------
-        predicted distribution: mixture_multivariate_normal
+        dist : distribution
+            A subset of the distribution
+
+        Examples
+        --------
+        >>> dist = multivariate_normal(shape=(2, 3), dim=4)
+        >>> dist.shape
+        (2, 3)
+        >>> dist.dim
+        4
+        >>> dist[0].shape
+        (3,)
+        >>> dist[0, 0].shape
+        ()
+        >>> dist[:, 0].shape
+        (2,)
         """
-        if b is None:
-            b = np.zeros(A.shape[:-1])
-        means = np.einsum("kqn,kn->kq", A, self.means) + b
-        covs = np.einsum("kpn,knm,kqm->kpq", A, self.covs, A)
-        return multimultivariate_normal(means, covs)
+        dist = deepcopy(self)
+        dist.mean = np.broadcast_to(self.mean, (*self.shape, self.dim))[arg]
+        if self.diagonal_cov:
+            dist.cov = np.broadcast_to(self.cov, (*self.shape, self.dim))[arg]
+        else:
+            dist.cov = np.broadcast_to(self.cov, (*self.shape, self.dim, self.dim))[arg]
+        dist._shape = dist.mean.shape[:-1]
+        dist._dim = dist.mean.shape[-1]
+        return dist
 
 
-class mixture_multivariate_normal(object):
+class mixture_normal(multivariate_normal):
     """Mixture of multivariate normal distributions.
 
-    Implemented with the same style as scipy.stats.multivariate_normal
+    Broadcastable multivariate mixture model.
 
     Parameters
     ----------
-    means : array_like, shape (n_components, n_features)
+    mean : array_like, shape (..., n, dim)
         Mean of each component.
 
-    covs: array_like, shape (n_components, n_features, n_features)
+    cov: array_like, shape (..., n, dim, dim)
         Covariance matrix of each component.
 
-    logA: array_like, shape (n_components,)
+    logA: array_like, shape (..., n)
         Log of the mixing weights.
+
+    shape: tuple, optional, default=()
+        Shape of the distribution. Useful for forcing a broadcast beyond that
+        inferred by mean and cov shapes
+
+    dim: int, optional, default=0
+        Dimension of the distribution. Useful for forcing a broadcast beyond that
+        inferred by mean and cov shapes
+
+    diagonal_cov: bool, optional, default=False
+        If True, cov is interpreted as the diagonal of the covariance matrix.
     """
 
-    def __init__(self, means, covs, logA):
-        self.means = np.array([np.atleast_1d(m) for m in means])
-        self.covs = np.array([np.atleast_2d(c) for c in covs])
-        self.logA = np.atleast_1d(logA)
-
-    def logpdf(self, x, reduce=True, keepdims=False):
-        """Log of the probability density function."""
-        x = self._process_quantiles(x, self.means.shape[-1])
-        dx = self.means - x[..., None, :]
-        invcovs = np.linalg.inv(self.covs)
-        chi2 = np.einsum("...ij,ijk,...ik->...i", dx, invcovs, dx)
-        norm = -logdet(2 * np.pi * self.covs) / 2
-        logpdf = norm - chi2 / 2
-        if reduce:
-            logA = self.logA - scipy.special.logsumexp(self.logA)
-            logpdf = np.squeeze(scipy.special.logsumexp(logpdf + logA, axis=-1))
-        if not keepdims:
-            logpdf = np.squeeze(logpdf)
-        return logpdf
-
-    def rvs(self, size=1):
-        """Random variates."""
-        size = np.atleast_1d(size)
-        p = np.exp(self.logA - self.logA.max())
-        p /= p.sum()
-        i = np.random.choice(len(p), size, p=p)
-        x = np.random.randn(*size, self.means.shape[-1])
-        choleskys = np.linalg.cholesky(self.covs)
-        return np.squeeze(self.means[i, ..., None] + choleskys[i] @ x[..., None])
+    def __init__(self, logA=0, mean=0, cov=1, shape=(), dim=0, diagonal_cov=False):
+        self.logA = logA
+        super().__init__(mean, cov, shape, dim, diagonal_cov)
 
-    def marginalise(self, indices):
-        """Marginalise over indices.
+    @property
+    def shape(self):
+        """Shape of the distribution."""
+        return np.broadcast_shapes(np.shape(self.logA), super().shape)
+
+    @property
+    def k(self):
+        """Number of components."""
+        if self.shape == ():
+            return 1
+        return self.shape[-1]
+
+    def logpdf(self, x, broadcast=False, joint=False):
+        """Log of the probability density function.
 
         Parameters
         ----------
-        indices : array_like
-            Indices to marginalise.
+        x : array_like, shape (*size, dim)
+            Points at which to evaluate the log of the probability density
+            function.
+
+        broadcast : bool, optional, default=False
+            If True, broadcast x across the distribution parameters.
 
         Returns
         -------
-        marginalised distribution: mixture_multivariate_normal
+        logpdf : array_like, shape (*size, *shape[:-1])
+            Log of the probability density function evaluated at x.
         """
-        i = self._bar(indices)
-        means = self.means[:, i]
-        covs = self.covs[:, i][:, :, i]
-        logA = self.logA
-        return mixture_multivariate_normal(means, covs, logA)
+        if broadcast:
+            x = np.expand_dims(x, -2)
+        logpdf = super().logpdf(x, broadcast=broadcast)
+        if self.shape == ():
+            return logpdf
+        logA = np.broadcast_to(self.logA, self.shape).copy()
+        logA -= logsumexp(logA, axis=-1, keepdims=True)
+        if joint:
+            return logpdf + logA
+        return logsumexp(logpdf + logA, axis=-1)
+
+    def rvs(self, size=()):
+        """Draw random samples from the distribution.
+
+        Parameters
+        ----------
+        size : int or tuple of ints, optional, default=1
+
+        Returns
+        -------
+        rvs : array_like, shape (*size, *shape[:-1], dim)
+        """
+        if self.shape == ():
+            return super().rvs(size=size)
+        size = np.atleast_1d(np.array(size, dtype=int))
+        logA = np.broadcast_to(self.logA, self.shape).copy()
+        logA -= logsumexp(logA, axis=-1, keepdims=True)
+        p = np.exp(logA)
+        cump = np.cumsum(p, axis=-1)
+        u = np.random.rand(*size, *p.shape[:-1])
+        i = np.argmax(np.array(u)[..., None] < cump, axis=-1)
+        mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
+        mean = np.choose(i[..., None], np.moveaxis(mean, -2, 0))
+        x = np.random.randn(*size, *self.shape[:-1], self.dim)
+        if self.diagonal_cov:
+            L = np.sqrt(self.cov)
+            L = np.broadcast_to(L, (*self.shape, self.dim))
+            L = np.choose(i[..., None], np.moveaxis(L, -2, 0))
+            return mean + L * x
+        else:
+            L = cholesky(self.cov)
+            L = np.broadcast_to(L, (*self.shape, self.dim, self.dim))
+            L = np.choose(i[..., None, None], np.moveaxis(L, -3, 0))
+            return mean + np.einsum("...ij,...j->...i", L, x)
 
     def condition(self, indices, values):
         """Condition on indices with values.
@@ -342,39 +442,20 @@ def condition(self, indices, values):
         ----------
         indices : array_like
             Indices to condition over.
-        values : array_like
+        values : array_like shape (..., len(indices))
             Values to condition on.
 
+        where self.shape[:-1] is broadcastable to ...
+
         Returns
         -------
-        conditional distribution: mixture_multivariate_normal
+        conditioned distribution, shape (*shape, len(indices))
         """
-        i = self._bar(indices)
-        k = indices
-        marginal = self.marginalise(i)
-
-        means = self.means[:, i] + np.einsum(
-            "ija,iab,ib->ij",
-            self.covs[:, i][:, :, k],
-            inv(self.covs[:, k][:, :, k]),
-            (values - self.means[:, k]),
-        )
-        covs = self.covs[:, i][:, :, i] - np.einsum(
-            "ija,iab,ibk->ijk",
-            self.covs[:, i][:, :, k],
-            inv(self.covs[:, k][:, :, k]),
-            self.covs[:, k][:, :, i],
-        )
-        logA = (
-            marginal.logpdf(values, reduce=False) + self.logA - marginal.logpdf(values)
-        )
-        return mixture_multivariate_normal(means, covs, logA)
-
-    def _bar(self, indices):
-        """Return the indices not in the given indices."""
-        k = np.ones(self.means.shape[-1], dtype=bool)
-        k[indices] = False
-        return k
+        dist = super().condition(indices, np.expand_dims(values, -2))
+        dist.__class__ = mixture_normal
+        marg = self.marginalise(self._bar(indices))
+        dist.logA = marg.logpdf(values, broadcast=True, joint=True)
+        return dist
 
     def bijector(self, x, inverse=False):
         """Bijector between U([0, 1])^d and the distribution.
@@ -394,37 +475,30 @@ def bijector(self, x, inverse=False):
             If True: compute the inverse transformation from physical to
             hypercube space.
 
+        where self.shape[:-1] is broadcastable to ...
+
         Returns
         -------
         transformed x or theta: array_like, shape (..., d)
         """
-        theta = np.empty_like(x)
+        x = np.array(x)
+        theta = np.empty(np.broadcast_shapes(x.shape, self.shape[:-1] + (self.dim,)))
+
         if inverse:
             theta[:] = x
-            x = np.empty_like(x)
-
-        for i in range(x.shape[-1]):
-            m = self.means[..., :, i] + np.einsum(
-                "ia,iab,...ib->...i",
-                self.covs[:, i, :i],
-                inv(self.covs[:, :i, :i]),
-                theta[..., None, :i] - self.means[:, :i],
-            )
-            c = self.covs[:, i, i] - np.einsum(
-                "ia,iab,ib->i",
-                self.covs[:, i, :i],
-                inv(self.covs[:, :i, :i]),
-                self.covs[:, i, :i],
-            )
-            dist = mixture_multivariate_normal(
-                self.means[:, :i], self.covs[:, :i, :i], self.logA
-            )
-            logA = (
-                self.logA
-                + dist.logpdf(theta[..., :i], reduce=False, keepdims=True)
-                - dist.logpdf(theta[..., :i], keepdims=True)[..., None]
+            x = np.empty(np.broadcast_shapes(x.shape, self.shape[:-1] + (self.dim,)))
+
+        for i in range(self.dim):
+            dist = self.marginalise(np.s_[i + 1 :]).condition(
+                np.s_[:-1], theta[..., :i]
             )
-            A = np.exp(logA - logsumexp(logA, axis=-1)[..., None])
+            m = np.atleast_1d(dist.mean)[..., 0]
+            if dist.diagonal_cov:
+                c = np.atleast_1d(dist.cov)[..., 0]
+            else:
+                c = np.atleast_2d(dist.cov)[..., 0, 0]
+            A = np.exp(dist.logA - logsumexp(dist.logA, axis=-1)[..., None])
+            m = np.broadcast_to(m, dist.shape)
 
             def f(t):
                 return (A * 0.5 * (1 + erf((t[..., None] - m) / np.sqrt(2 * c)))).sum(
@@ -444,39 +518,7 @@ def f(t):
         else:
             return theta
 
-    def _process_quantiles(self, x, dim):
-        x = np.asarray(x, dtype=float)
-
-        if x.ndim == 0:
-            x = x[np.newaxis, np.newaxis]
-        elif x.ndim == 1:
-            if dim == 1:
-                x = x[:, np.newaxis]
-            else:
-                x = x[np.newaxis, :]
-
-        return x
-
-    def predict(self, A, b=None):
-        """Predict the mean and covariance of a linear transformation.
-
-        if:         x ~ mixN(mu, Sigma, logA)
-        then:  Ax + b ~ mixN(A mu + b, A Sigma A^T, logA)
-
-        Parameters
-        ----------
-        A : array_like, shape (k, q, n)
-            Linear transformation matrix.
-        b : array_like, shape (k, q,), optional
-            Linear transformation vector.
-
-        Returns
-        -------
-        predicted distribution: mixture_multivariate_normal
-        """
-        if b is None:
-            b = np.zeros(A.shape[:-1])
-        means = np.einsum("kqn,kn->kq", A, self.means) + b
-        covs = np.einsum("kqn,knm,kpm->kqp", A, self.covs, A)
-        logA = self.logA
-        return mixture_multivariate_normal(means, covs, logA)
+    def __getitem__(self, arg):  # noqa: D105
+        dist = super().__getitem__(arg)
+        dist.logA = np.broadcast_to(self.logA, self.shape)[arg]
+        return dist
diff --git a/lsbi/stats_1.py b/lsbi/stats_1.py
deleted file mode 100644
index 71ec517..0000000
--- a/lsbi/stats_1.py
+++ /dev/null
@@ -1,524 +0,0 @@
-"""Extensions to scipy.stats functions."""
-from copy import deepcopy
-
-import numpy as np
-import scipy.stats
-from numpy.linalg import cholesky, inv
-from scipy.special import erf, logsumexp
-
-from lsbi.utils import bisect, choice, logdet
-
-
-class multivariate_normal(object):
-    """Vectorised multivariate normal distribution.
-
-    This extends scipy.stats.multivariate_normal to allow for vectorisation across
-    the distribution parameters mean and cov.
-
-    Implemented with the same style as scipy.stats.multivariate_normal, except that
-    results are not squeezed.
-
-    mean and cov are lazily broadcasted to the same shape to improve performance.
-
-    Parameters
-    ----------
-    mean : array_like, shape (..., dim)
-        Mean of each component.
-
-    cov array_like, shape (..., dim, dim)
-        Covariance matrix of each component.
-
-    shape: tuple, optional, default=()
-        Shape of the distribution. Useful for forcing a broadcast beyond that
-        inferred by mean and cov shapes
-
-    dim: int, optional, default=0
-        Dimension of the distribution. Useful for forcing a broadcast beyond that
-        inferred by mean and cov shapes
-
-    diagonal_cov: bool, optional, default=False
-        If True, cov is interpreted as the diagonal of the covariance matrix.
-    """
-
-    def __init__(self, mean=0, cov=1, shape=(), dim=0, diagonal_cov=False):
-        self.mean = mean
-        self.cov = cov
-        self._shape = shape
-        self._dim = dim
-        self.diagonal_cov = diagonal_cov
-        if len(np.shape(self.cov)) < 2:
-            self.diagonal_cov = True
-
-    @property
-    def shape(self):
-        """Shape of the distribution."""
-        return np.broadcast_shapes(
-            np.shape(self.mean)[:-1],
-            np.shape(self.cov)[: -2 + self.diagonal_cov],
-            self._shape,
-        )
-
-    @property
-    def dim(self):
-        """Dimension of the distribution."""
-        return np.max(
-            [
-                *np.shape(self.mean)[-1:],
-                *np.shape(self.cov)[-2 + self.diagonal_cov :],
-                self._dim,
-            ]
-        )
-
-    def logpdf(self, x, broadcast=False):
-        """Log of the probability density function.
-
-        Parameters
-        ----------
-        x : array_like, shape (*size, dim)
-            Points at which to evaluate the log of the probability density
-            function.
-        broadcast : bool, optional, default=False
-            If True, broadcast x across the distribution parameters.
-
-        Returns
-        -------
-        logpdf : array_like, shape (*size, *shape)
-            Log of the probability density function evaluated at x.
-        """
-        x = np.array(x)
-        if broadcast:
-            dx = x - self.mean
-        else:
-            size = x.shape[:-1]
-            mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
-            dx = x.reshape(*size, *np.ones_like(self.shape), self.dim) - mean
-        if self.diagonal_cov:
-            chi2 = (dx**2 / self.cov).sum(axis=-1)
-            norm = -np.log(2 * np.pi * np.ones(self.dim) * self.cov).sum(axis=-1) / 2
-        else:
-            chi2 = np.einsum("...j,...jk,...k->...", dx, inv(self.cov), dx)
-            norm = -logdet(2 * np.pi * self.cov) / 2
-        return norm - chi2 / 2
-
-    def pdf(self, x):
-        """Probability density function.
-
-        Parameters
-        ----------
-        x : array_like, shape (*size, dim)
-            Points at which to evaluate the probability density function.
-
-        Returns
-        -------
-        pdf : array_like, shape (*size, *shape)
-            Probability density function evaluated at x.
-        """
-        return np.exp(self.logpdf(x))
-
-    def rvs(self, size=()):
-        """Draw random samples from the distribution.
-
-        Parameters
-        ----------
-        size : int or tuple of ints, optional, default=()
-            Number of samples to draw.
-
-        Returns
-        -------
-        rvs : ndarray, shape (*size, *shape, dim)
-            Random samples from the distribution.
-        """
-        size = np.atleast_1d(size)
-        x = np.random.randn(*size, *self.shape, self.dim)
-        if self.diagonal_cov:
-            return self.mean + np.sqrt(self.cov) * x
-        else:
-            return self.mean + np.einsum("...jk,...k->...j", cholesky(self.cov), x)
-
-    def predict(self, A=1, b=0, diagonal_A=False):
-        """Predict the mean and covariance of a linear transformation.
-
-        if:         x ~ N(mu, Sigma)
-        then:  Ax + b ~ N(A mu + b, A Sigma A^T)
-
-        Parameters
-        ----------
-        A : array_like, shape (..., k, dim)
-            Linear transformation matrix.
-        b : array_like, shape (..., k), optional
-            Linear transformation vector.
-
-        where self.shape is broadcastable to ...
-
-        Returns
-        -------
-        transformed distribution shape (..., k)
-        """
-        if len(np.shape(A)) < 2:
-            diagonal_A = True
-        dist = deepcopy(self)
-        if diagonal_A:
-            dist.mean = A * self.mean + b
-            if self.diagonal_cov:
-                dist.cov = A * self.cov * A
-            else:
-                dist.cov = (
-                    self.cov
-                    * np.atleast_1d(A)[..., None]
-                    * np.atleast_1d(A)[..., None, :]
-                )
-        else:
-            dist.mean = (
-                np.einsum("...qn,...n->...q", A, np.ones(self.dim) * self.mean) + b
-            )
-            if self.diagonal_cov:
-                dist.cov = np.einsum(
-                    "...qn,...pn->...qp", A, A * np.atleast_1d(self.cov)[..., None, :]
-                )
-                dist.diagonal_cov = False
-            else:
-                dist.cov = np.einsum("...qn,...nm,...pm->...qp", A, self.cov, A)
-            dist._dim = np.shape(A)[-2]
-        return dist
-
-    def marginalise(self, indices):
-        """Marginalise over indices.
-
-        Parameters
-        ----------
-        indices : array_like
-            Indices to marginalise.
-
-        Returns
-        -------
-        marginalised distribution, shape (*shape, dim - len(indices))
-        """
-        dist = deepcopy(self)
-        i = self._bar(indices)
-        dist.mean = (np.ones(self.dim) * self.mean)[..., i]
-
-        if self.diagonal_cov:
-            dist.cov = (np.ones(self.dim) * self.cov)[..., i]
-        else:
-            dist.cov = self.cov[..., i, :][..., i]
-
-        dist._dim = sum(i)
-        return dist
-
-    def condition(self, indices, values):
-        """Condition on indices with values.
-
-        Parameters
-        ----------
-        indices : array_like
-            Indices to condition over.
-        values : array_like shape (..., len(indices))
-            Values to condition on.
-
-        where where self.shape is broadcastable to ...
-
-        Returns
-        -------
-        conditioned distribution shape (..., len(indices))
-        """
-        i = self._bar(indices)
-        k = indices
-        dist = deepcopy(self)
-        dist.mean = (np.ones(self.dim) * self.mean)[..., i]
-
-        if self.diagonal_cov:
-            dist.cov = (np.ones(self.dim) * self.cov)[..., i]
-            dist._shape = np.broadcast_shapes(self.shape, values.shape[:-1])
-        else:
-            dist.mean = dist.mean + np.einsum(
-                "...ja,...ab,...b->...j",
-                self.cov[..., i, :][..., :, k],
-                inv(self.cov[..., k, :][..., :, k]),
-                values - (np.ones(self.dim) * self.mean)[..., k],
-            )
-            dist.cov = self.cov[..., i, :][..., :, i] - np.einsum(
-                "...ja,...ab,...bk->...jk",
-                self.cov[..., i, :][..., :, k],
-                inv(self.cov[..., k, :][..., :, k]),
-                self.cov[..., k, :][..., :, i],
-            )
-        dist._dim = sum(i)
-        return dist
-
-    def _bar(self, indices):
-        """Return the indices not in the given indices."""
-        k = np.ones(self.dim, dtype=bool)
-        k[indices] = False
-        return k
-
-    def bijector(self, x, inverse=False):
-        """Bijector between U([0, 1])^d and the distribution.
-
-        - x in [0, 1]^d is the hypercube space.
-        - theta in R^d is the physical space.
-
-        Computes the transformation from x to theta or theta to x depending on
-        the value of inverse.
-
-        Parameters
-        ----------
-        x : array_like, shape (..., dim)
-            if inverse: x is theta
-            else: x is x
-        inverse : bool, optional, default=False
-            If True: compute the inverse transformation from physical to
-            hypercube space.
-
-        where self.shape is broadcastable to ...
-
-        Returns
-        -------
-        transformed x or theta: array_like, shape (..., dim)
-        """
-        x = np.array(x)
-        mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
-        if inverse:
-            if self.diagonal_cov:
-                y = (x - mean) / np.sqrt(self.cov)
-            else:
-                y = np.einsum("...jk,...k->...j", inv(cholesky(self.cov)), x - mean)
-            return scipy.stats.norm.cdf(y)
-        else:
-            y = scipy.stats.norm.ppf(x)
-            if self.diagonal_cov:
-                return mean + np.sqrt(self.cov) * y
-            else:
-                L = cholesky(self.cov)
-                return mean + np.einsum("...jk,...k->...j", L, y)
-
-    def __getitem__(self, arg):
-        """Access a subset of the distributions.
-
-        Parameters
-        ----------
-        arg : int or slice or tuple of ints or tuples
-            Indices to access.
-
-        Returns
-        -------
-        dist : distribution
-            A subset of the distribution
-
-        Examples
-        --------
-        >>> dist = multivariate_normal(shape=(2, 3), dim=4)
-        >>> dist.shape
-        (2, 3)
-        >>> dist.dim
-        4
-        >>> dist[0].shape
-        (3,)
-        >>> dist[0, 0].shape
-        ()
-        >>> dist[:, 0].shape
-        (2,)
-        """
-        dist = deepcopy(self)
-        dist.mean = np.broadcast_to(self.mean, (*self.shape, self.dim))[arg]
-        if self.diagonal_cov:
-            dist.cov = np.broadcast_to(self.cov, (*self.shape, self.dim))[arg]
-        else:
-            dist.cov = np.broadcast_to(self.cov, (*self.shape, self.dim, self.dim))[arg]
-        dist._shape = dist.mean.shape[:-1]
-        dist._dim = dist.mean.shape[-1]
-        return dist
-
-
-class mixture_normal(multivariate_normal):
-    """Mixture of multivariate normal distributions.
-
-    Broadcastable multivariate mixture model.
-
-    Parameters
-    ----------
-    mean : array_like, shape (..., n, dim)
-        Mean of each component.
-
-    cov: array_like, shape (..., n, dim, dim)
-        Covariance matrix of each component.
-
-    logA: array_like, shape (..., n)
-        Log of the mixing weights.
-
-    shape: tuple, optional, default=()
-        Shape of the distribution. Useful for forcing a broadcast beyond that
-        inferred by mean and cov shapes
-
-    dim: int, optional, default=0
-        Dimension of the distribution. Useful for forcing a broadcast beyond that
-        inferred by mean and cov shapes
-
-    diagonal_cov: bool, optional, default=False
-        If True, cov is interpreted as the diagonal of the covariance matrix.
-    """
-
-    def __init__(self, logA=0, mean=0, cov=1, shape=(), dim=0, diagonal_cov=False):
-        self.logA = logA
-        super().__init__(mean, cov, shape, dim, diagonal_cov)
-
-    @property
-    def shape(self):
-        """Shape of the distribution."""
-        return np.broadcast_shapes(np.shape(self.logA), super().shape)
-
-    @property
-    def k(self):
-        """Number of components."""
-        if self.shape == ():
-            return 1
-        return self.shape[-1]
-
-    def logpdf(self, x, broadcast=False, joint=False):
-        """Log of the probability density function.
-
-        Parameters
-        ----------
-        x : array_like, shape (*size, dim)
-            Points at which to evaluate the log of the probability density
-            function.
-
-        broadcast : bool, optional, default=False
-            If True, broadcast x across the distribution parameters.
-
-        Returns
-        -------
-        logpdf : array_like, shape (*size, *shape[:-1])
-            Log of the probability density function evaluated at x.
-        """
-        if broadcast:
-            x = np.expand_dims(x, -2)
-        logpdf = super().logpdf(x, broadcast=broadcast)
-        if self.shape == ():
-            return logpdf
-        logA = np.broadcast_to(self.logA, self.shape).copy()
-        logA -= logsumexp(logA, axis=-1, keepdims=True)
-        if joint:
-            return logpdf + logA
-        return logsumexp(logpdf + logA, axis=-1)
-
-    def rvs(self, size=()):
-        """Draw random samples from the distribution.
-
-        Parameters
-        ----------
-        size : int or tuple of ints, optional, default=1
-
-        Returns
-        -------
-        rvs : array_like, shape (*size, *shape[:-1], dim)
-        """
-        if self.shape == ():
-            return super().rvs(size=size)
-        size = np.atleast_1d(np.array(size, dtype=int))
-        logA = np.broadcast_to(self.logA, self.shape).copy()
-        logA -= logsumexp(logA, axis=-1, keepdims=True)
-        p = np.exp(logA)
-        cump = np.cumsum(p, axis=-1)
-        u = np.random.rand(*size, *p.shape[:-1])
-        i = np.argmax(np.array(u)[..., None] < cump, axis=-1)
-        mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
-        mean = np.choose(i[..., None], np.moveaxis(mean, -2, 0))
-        x = np.random.randn(*size, *self.shape[:-1], self.dim)
-        if self.diagonal_cov:
-            L = np.sqrt(self.cov)
-            L = np.broadcast_to(L, (*self.shape, self.dim))
-            L = np.choose(i[..., None], np.moveaxis(L, -2, 0))
-            return mean + L * x
-        else:
-            L = cholesky(self.cov)
-            L = np.broadcast_to(L, (*self.shape, self.dim, self.dim))
-            L = np.choose(i[..., None, None], np.moveaxis(L, -3, 0))
-            return mean + np.einsum("...ij,...j->...i", L, x)
-
-    def condition(self, indices, values):
-        """Condition on indices with values.
-
-        Parameters
-        ----------
-        indices : array_like
-            Indices to condition over.
-        values : array_like shape (..., len(indices))
-            Values to condition on.
-
-        where self.shape[:-1] is broadcastable to ...
-
-        Returns
-        -------
-        conditioned distribution, shape (*shape, len(indices))
-        """
-        dist = super().condition(indices, np.expand_dims(values, -2))
-        dist.__class__ = mixture_normal
-        marg = self.marginalise(self._bar(indices))
-        dist.logA = marg.logpdf(values, broadcast=True, joint=True)
-        return dist
-
-    def bijector(self, x, inverse=False):
-        """Bijector between U([0, 1])^d and the distribution.
-
-        - x in [0, 1]^d is the hypercube space.
-        - theta in R^d is the physical space.
-
-        Computes the transformation from x to theta or theta to x depending on
-        the value of inverse.
-
-        Parameters
-        ----------
-        x : array_like, shape (..., d)
-            if inverse: x is theta
-            else: x is x
-        inverse : bool, optional, default=False
-            If True: compute the inverse transformation from physical to
-            hypercube space.
-
-        where self.shape[:-1] is broadcastable to ...
-
-        Returns
-        -------
-        transformed x or theta: array_like, shape (..., d)
-        """
-        x = np.array(x)
-        theta = np.empty(np.broadcast_shapes(x.shape, self.shape[:-1] + (self.dim,)))
-
-        if inverse:
-            theta[:] = x
-            x = np.empty(np.broadcast_shapes(x.shape, self.shape[:-1] + (self.dim,)))
-
-        for i in range(self.dim):
-            dist = self.marginalise(np.s_[i + 1 :]).condition(
-                np.s_[:-1], theta[..., :i]
-            )
-            m = np.atleast_1d(dist.mean)[..., 0]
-            if dist.diagonal_cov:
-                c = np.atleast_1d(dist.cov)[..., 0]
-            else:
-                c = np.atleast_2d(dist.cov)[..., 0, 0]
-            A = np.exp(dist.logA - logsumexp(dist.logA, axis=-1)[..., None])
-            m = np.broadcast_to(m, dist.shape)
-
-            def f(t):
-                return (A * 0.5 * (1 + erf((t[..., None] - m) / np.sqrt(2 * c)))).sum(
-                    axis=-1
-                ) - y
-
-            if inverse:
-                y = 0
-                x[..., i] = f(theta[..., i])
-            else:
-                y = x[..., i]
-                a = (m - 10 * np.sqrt(c)).min(axis=-1)
-                b = (m + 10 * np.sqrt(c)).max(axis=-1)
-                theta[..., i] = bisect(f, a, b)
-        if inverse:
-            return x
-        else:
-            return theta
-
-    def __getitem__(self, arg):  # noqa: D105
-        dist = super().__getitem__(arg)
-        dist.logA = np.broadcast_to(self.logA, self.shape)[arg]
-        return dist
diff --git a/tests/test_model.py b/tests/test_model.py
index 3e110d7..d3b937a 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -1,264 +1,729 @@
 import numpy as np
 import pytest
-from numpy.random import rand
 from numpy.testing import assert_allclose
-from scipy.stats import invwishart, kstest
 
-from lsbi.model import (
-    LinearMixtureModel,
+
+def assert_allclose_broadcast(a, b, *args, **kwargs):
+    shape = np.broadcast_shapes(np.shape(a), np.shape(b))
+    return assert_allclose(
+        np.broadcast_to(a, shape), np.broadcast_to(b, shape), *args, **kwargs
+    )
+
+
+from lsbi.model_1 import (
     LinearModel,
-    MultiLinearModel,
+    MixtureModel,
     ReducedLinearModel,
     ReducedLinearModelUniformPrior,
+    _de_diagonalise,
 )
 
-N = 1000
-
-
-@pytest.mark.parametrize("n", [1, 2, 5, 10])
-@pytest.mark.parametrize("d", [1, 2, 5, 10])
+shapes = [(2, 3), (3,), ()]
+dims = [1, 2, 4]
+
+tests = []
+for d in dims:
+    for n in dims:
+        for diagonal_Sigma in [True, False]:
+            for diagonal_C in [True, False]:
+                for diagonal_M in [True, False]:
+                    for base_shape in shapes + ["scalar"]:
+                        shape = base_shape
+                        m_shape = base_shape
+                        M_shape = base_shape
+                        mu_shape = base_shape
+                        C_shape = base_shape
+                        Sigma_shape = base_shape
+                        base_test = (
+                            d,
+                            n,
+                            shape,
+                            m_shape,
+                            M_shape,
+                            mu_shape,
+                            C_shape,
+                            Sigma_shape,
+                            diagonal_Sigma,
+                            diagonal_C,
+                            diagonal_M,
+                        )
+                        for alt_shape in shapes + ["scalar"]:
+                            for i in range(2, 8):
+                                test = base_test[:i] + (alt_shape,) + base_test[i + 1 :]
+                                if test[2] == "scalar":
+                                    continue
+                                tests.append(test)
+
+
+@pytest.mark.parametrize(
+    "d,n,shape,m_shape,M_shape,mu_shape,C_shape,Sigma_shape,diagonal_Sigma,diagonal_C,diagonal_M",
+    tests,
+)
 class TestLinearModel(object):
-    cls = LinearModel
-
-    def random(self, d, n):
-        M = rand(d, n)
-        m = rand(d)
-        C = invwishart(scale=np.eye(d), df=10 * d).rvs()
-        mu = rand(n)
-        Sigma = invwishart(scale=np.eye(n), df=10 * n).rvs()
-        return self.cls(M=M, m=m, C=C, mu=mu, Sigma=Sigma)
-
-    def _test_shape(self, model, d, n):
-        assert model.n == n
+    def random(
+        self,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        if M_shape == "scalar":
+            M = np.random.randn()
+        elif diagonal_M:
+            M = np.random.randn(*M_shape, n)
+        else:
+            M = np.random.randn(*M_shape, d, n)
+
+        if m_shape == "scalar":
+            m = np.random.randn()
+        else:
+            m = np.random.randn(*m_shape, d)
+
+        if C_shape == "scalar":
+            C = np.random.randn() ** 2
+        elif diagonal_C:
+            C = np.random.randn(*C_shape, d) ** 2
+        else:
+            C = np.random.randn(*C_shape, d, d)
+            C = np.einsum("...ij,...kj->...ik", C, C) + d * np.eye(d)
+
+        if mu_shape == "scalar":
+            mu = np.random.randn()
+        else:
+            mu = np.random.randn(*mu_shape, n)
+
+        if Sigma_shape == "scalar":
+            Sigma = np.random.randn() ** 2
+        elif diagonal_Sigma:
+            Sigma = np.random.randn(*Sigma_shape, n) ** 2
+        else:
+            Sigma = np.random.randn(*Sigma_shape, n, n)
+            Sigma = np.einsum("...ij,...kj->...ik", Sigma, Sigma) + n * np.eye(n)
+
+        model = LinearModel(
+            M, m, C, mu, Sigma, shape, n, d, diagonal_M, diagonal_C, diagonal_Sigma
+        )
         assert model.d == d
-        assert model.M.shape == (d, n)
-        assert model.m.shape == (d,)
-        assert model.C.shape == (d, d)
-        assert model.mu.shape == (n,)
-        assert model.Sigma.shape == (n, n)
-
-    def test_init_M(self, d, n):
-        self._test_shape(self.cls(M=rand()), 1, 1)
-        self._test_shape(self.cls(M=rand(), n=n), 1, n)
-        self._test_shape(self.cls(M=rand(), d=d), d, 1)
-        self._test_shape(self.cls(M=rand(), d=d, n=n), d, n)
-        self._test_shape(self.cls(M=rand(n)), 1, n)
-        self._test_shape(self.cls(M=rand(n), d=d), d, n)
-        self._test_shape(self.cls(M=rand(d, 1)), d, 1)
-        self._test_shape(self.cls(M=rand(d, 1), n=n), d, n)
-        self._test_shape(self.cls(M=rand(d, n)), d, n)
-
-        M = rand()
-        model = self.cls(M=M, d=d, n=n)
-        assert_allclose(np.diag(model.M), M)
-
-        M = rand(n)
-        model = self.cls(M=M, d=d)
-        assert_allclose(np.diag(model.M), M[: min(d, n)])
-
-        M = rand(d, n)
-        model = self.cls(M=M)
-        assert_allclose(model.M, M)
-
-    def test_init_mu(self, d, n):
-        self._test_shape(self.cls(mu=rand(), d=d), d, 1)
-        self._test_shape(self.cls(mu=rand(), d=d, n=n), d, n)
-        self._test_shape(self.cls(mu=rand(n), d=d), d, n)
-
-        mu = rand()
-        model = self.cls(mu=mu, d=d, n=n)
-        assert_allclose(model.mu, mu)
-
-        mu = rand(n)
-        model = self.cls(mu=mu, d=d)
-        assert_allclose(model.mu, mu)
-
-    def test_init_Sigma(self, d, n):
-        self._test_shape(self.cls(Sigma=rand(), d=d), d, 1)
-        self._test_shape(self.cls(Sigma=rand(), d=d, n=n), d, n)
-        self._test_shape(self.cls(Sigma=rand(n), d=d), d, n)
-        self._test_shape(self.cls(Sigma=rand(n, n), d=d), d, n)
-
-        Sigma = rand()
-        model = self.cls(Sigma=Sigma, d=d, n=n)
-        assert_allclose(np.diag(model.Sigma), Sigma)
-
-        Sigma = rand(n)
-        model = self.cls(Sigma=Sigma, d=d)
-        assert_allclose(np.diag(model.Sigma), Sigma)
-
-        Sigma = rand(n, n)
-        model = self.cls(Sigma=Sigma, d=d)
-        assert_allclose(model.Sigma, Sigma)
-
-    def test_init_m(self, d, n):
-        self._test_shape(self.cls(m=rand(), n=n), 1, n)
-        self._test_shape(self.cls(m=rand(), d=d, n=n), d, n)
-        self._test_shape(self.cls(m=rand(d), n=n), d, n)
-
-        m = rand()
-        model = self.cls(m=m, d=d, n=n)
-        assert_allclose(model.m, m)
-
-        m = rand(d)
-        model = self.cls(m=m, n=n)
-        assert_allclose(model.m, m)
-
-    def test_init_C(self, d, n):
-        self._test_shape(self.cls(C=rand(), n=n), 1, n)
-        self._test_shape(self.cls(C=rand(), d=d, n=n), d, n)
-        self._test_shape(self.cls(C=rand(d), n=n), d, n)
-        self._test_shape(self.cls(C=rand(d, d), n=n), d, n)
-
-        C = rand()
-        model = self.cls(C=C, d=d, n=n)
-        assert_allclose(np.diag(model.C), C)
-
-        C = rand(d)
-        model = self.cls(C=C, n=n)
-        assert_allclose(np.diag(model.C), C)
-
-        C = rand(d, d)
-        model = self.cls(C=C, n=n)
-        assert_allclose(model.C, C)
-
-    def test_failure(self, d, n):
-        with pytest.raises(ValueError) as excinfo:
-            self.cls(m=rand(d))
-        string = "Unable to determine number of parameters n"
-        assert string in str(excinfo.value)
-
-        with pytest.raises(ValueError) as excinfo:
-            self.cls(mu=rand(n))
-        string = "Unable to determine data dimensions d"
-        assert string in str(excinfo.value)
-
-    def test_joint(self, d, n):
-        model = self.random(d, n)
-        prior = model.prior()
-        evidence = model.evidence()
-        joint = model.joint()
-
-        samples_1 = prior.rvs(N)
-        samples_2 = joint.rvs(N)[:, -n:]
-
-        if n == 1:
-            samples_1 = np.atleast_2d(samples_1).T
-
-        for i in range(n):
-            p = kstest(samples_1[:, i], samples_2[:, i]).pvalue
-            assert p > 1e-5
-
-        p = kstest(prior.logpdf(samples_2), prior.logpdf(samples_1)).pvalue
-        assert p > 1e-5
-
-        samples_1 = evidence.rvs(N)
-        samples_2 = joint.rvs(N)[:, :d]
-
-        if d == 1:
-            samples_1 = np.atleast_2d(samples_1).T
-
-        for i in range(d):
-            p = kstest(samples_1[:, i], samples_2[:, i]).pvalue
-            assert p > 1e-5
-
-        p = kstest(evidence.logpdf(samples_2), evidence.logpdf(samples_1)).pvalue
-        assert p > 1e-5
+        assert model.n == n
+        assert np.all(model.M == M)
+        assert np.all(model.m == m)
+        assert np.all(model.C == C)
+        assert np.all(model.mu == mu)
+        assert np.all(model.Sigma == Sigma)
+        assert model.diagonal_M == diagonal_M or (
+            M_shape == "scalar" and model.diagonal_M
+        )
+        assert model.diagonal_C == diagonal_C or (
+            C_shape == "scalar" and model.diagonal_C
+        )
+        assert model.diagonal_Sigma == diagonal_Sigma or (
+            Sigma_shape == "scalar" and model.diagonal_Sigma
+        )
+        return model
+
+    @pytest.mark.parametrize("theta_shape", shapes)
+    def test_likelihood(
+        self,
+        theta_shape,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
+        theta = np.random.randn(*theta_shape, n)
+        dist = model.likelihood(theta)
+        assert dist.shape == np.broadcast_shapes(model.shape, theta_shape)
+        assert dist.dim == model.d
+
+    def test_prior(
+        self,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
+        dist = model.prior()
+        assert dist.shape == model.shape
+        assert dist.dim == model.n
+
+    @pytest.mark.parametrize("D_shape", shapes)
+    def test_posterior(
+        self,
+        D_shape,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
+        D = np.random.randn(*D_shape, d)
+        dist = model.posterior(D)
+        assert dist.shape == np.broadcast_shapes(model.shape, D_shape)
+        assert dist.dim == model.n
+
+    def test_evidence(
+        self,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
+        dist = model.evidence()
+        assert dist.shape == model.shape
+        assert dist.dim == model.d
+
+    def test_joint(
+        self,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
+        dist = model.joint()
+        assert dist.shape == model.shape
+        assert dist.dim == model.n + model.d
+
+    def test_marginal_conditional(
+        self,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
+        atol = 1e-5
 
-    def test_likelihood_posterior(self, d, n):
-        model = self.random(d, n)
-        joint = model.joint()
+        i = np.arange(d + n)[-n:]
+        model_1 = model.evidence()
+        model_2 = model.joint().marginalise(i)
+        assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
+        assert_allclose_broadcast(
+            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
+            model_2.cov,
+            atol=atol,
+        )
 
-        samples = []
-        model.prior()
         theta = model.prior().rvs()
-        for _ in range(N):
-            data = np.atleast_1d(model.likelihood(theta).rvs())
-            theta = np.atleast_1d(model.posterior(data).rvs())
-            samples.append(np.concatenate([data, theta])[:])
-        samples_1 = np.array(samples)[::100]
-        samples_2 = joint.rvs(len(samples_1))
-
-        for i in range(n + d):
-            p = kstest(samples_1[:, i], samples_2[:, i]).pvalue
-            assert p > 1e-5
+        model_1 = model.likelihood(theta)
+        model_2 = model.joint().condition(i, theta)
+        assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
+        assert_allclose_broadcast(
+            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
+            model_2.cov,
+            atol=atol,
+        )
 
-        p = kstest(joint.logpdf(samples_2), joint.logpdf(samples_1)).pvalue
-        assert p > 1e-5
+        i = np.arange(d + n)[:d]
+        model_1 = model.prior()
+        model_2 = model.joint().marginalise(i)
+        assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
+        assert_allclose_broadcast(
+            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
+            model_2.cov,
+            atol=atol,
+        )
 
-    def test_DKL(self, d, n):
-        model = self.random(d, n)
+        D = model.evidence().rvs()
+        model_1 = model.posterior(D)
+        model_2 = model.joint().condition(i, D)
+        assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
+        assert_allclose_broadcast(
+            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
+            model_2.cov,
+            atol=atol,
+        )
 
-        data = model.evidence().rvs()
-        posterior = model.posterior(data)
-        prior = model.prior()
+    def test_bayes_theorem(
+        self,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
+        theta = model.prior().rvs()
+        D = model.evidence().rvs()
+        assert_allclose(
+            model.posterior(D).logpdf(theta, broadcast=True)
+            + model.evidence().logpdf(D, broadcast=True),
+            model.likelihood(theta).logpdf(D, broadcast=True)
+            + model.prior().logpdf(theta, broadcast=True),
+        )
 
-        samples = posterior.rvs(N)
-        Info = posterior.logpdf(samples) - prior.logpdf(samples)
-        assert_allclose(Info.mean(), model.DKL(data), atol=5 * Info.std() / np.sqrt(N))
 
-    def test_from_joint(self, d, n):
-        model = self.random(d, n)
-        joint = model.joint()
-        mean = joint.mean
-        cov = joint.cov
-        model2 = self.cls.from_joint(mean, cov, n)
-        assert model2.n == model.n
-        assert model2.d == model.d
-        assert_allclose(model2.M, model.M)
-        assert_allclose(model2.m, model.m)
-        assert_allclose(model2.C, model.C)
-        assert_allclose(model2.mu, model.mu)
-        assert_allclose(model2.Sigma, model.Sigma)
+@pytest.mark.parametrize("logA_shape", shapes)
+class TestMixtureModel(TestLinearModel):
+    def random(
+        self,
+        logA_shape,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = super().random(
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
+        logA = np.random.randn(*logA_shape)
+        model = MixtureModel(
+            logA,
+            model.M,
+            model.m,
+            model.C,
+            model.mu,
+            model.Sigma,
+            shape,
+            n,
+            d,
+            diagonal_M,
+            diagonal_C,
+            diagonal_Sigma,
+        )
+        assert np.all(model.logA == logA)
+        return model
+
+    @pytest.mark.parametrize("theta_shape", shapes)
+    def test_likelihood(
+        self,
+        theta_shape,
+        logA_shape,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            logA_shape,
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
+        theta = np.random.randn(*theta_shape[:-1], n)
+        dist = model.likelihood(theta)
+        if model.shape != ():
+            assert dist.shape == np.broadcast_shapes(model.shape, theta_shape)
+        assert dist.dim == model.d
+
+    def test_prior(
+        self,
+        logA_shape,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            logA_shape,
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
+        dist = model.prior()
+        assert dist.shape == model.shape
+        assert dist.dim == model.n
+
+    @pytest.mark.parametrize("D_shape", shapes)
+    def test_posterior(
+        self,
+        D_shape,
+        logA_shape,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            logA_shape,
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
+        D = np.random.randn(*D_shape[:-1], d)
+        dist = model.posterior(D)
+        if model.shape != ():
+            assert dist.shape == np.broadcast_shapes(model.shape, D_shape)
+        assert dist.dim == model.n
+
+    def test_evidence(
+        self,
+        logA_shape,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            logA_shape,
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
+        dist = model.evidence()
+        assert dist.shape == model.shape
+        assert dist.dim == model.d
+
+    def test_joint(
+        self,
+        logA_shape,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            logA_shape,
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
+        dist = model.joint()
+        assert dist.shape == model.shape
+        assert dist.dim == model.n + model.d
+
+    def test_marginal_conditional(
+        self,
+        logA_shape,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            logA_shape,
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
 
-    def test_reduce(self, d, n):
-        if n > d:
-            pytest.skip("n > d")
-        model = self.cls(M=rand(d, n))
-        data = model.evidence().rvs()
-        reduced_model = model.reduce(data)
-        assert isinstance(reduced_model, ReducedLinearModel)
-        reduced_model.prior().mean
-        assert_allclose(reduced_model.prior().mean, model.prior().mean)
-        assert_allclose(reduced_model.prior().cov, model.prior().cov)
-        assert_allclose(reduced_model.posterior().mean, model.posterior(data).mean)
-        assert_allclose(reduced_model.posterior().cov, model.posterior(data).cov)
-        assert_allclose(model.evidence().logpdf(data), reduced_model.logZ())
-        assert_allclose(model.DKL(data), reduced_model.DKL())
+        atol = 1e-5
 
-    def test_marginal_conditional(self, d, n):
-        model = self.random(d, n)
         i = np.arange(d + n)[-n:]
         model_1 = model.evidence()
         model_2 = model.joint().marginalise(i)
-        assert_allclose(model_1.mean, model_2.mean)
-        assert_allclose(model_1.cov, model_2.cov, rtol=1e-6, atol=1e-6)
+        assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
+        assert_allclose_broadcast(
+            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
+            model_2.cov,
+            atol=atol,
+        )
 
         theta = model.prior().rvs()
         model_1 = model.likelihood(theta)
         model_2 = model.joint().condition(i, theta)
-        assert_allclose(model_1.mean, model_2.mean)
-        assert_allclose(model_1.cov, model_2.cov, rtol=1e-6, atol=1e-6)
+        assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
+        assert_allclose_broadcast(
+            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
+            model_2.cov,
+            atol=atol,
+        )
 
         i = np.arange(d + n)[:d]
         model_1 = model.prior()
         model_2 = model.joint().marginalise(i)
-        assert_allclose(model_1.mean, model_2.mean)
-        assert_allclose(model_1.cov, model_2.cov, rtol=1e-6, atol=1e-6)
+        assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
+        assert_allclose_broadcast(
+            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
+            model_2.cov,
+            atol=atol,
+        )
 
         D = model.evidence().rvs()
         model_1 = model.posterior(D)
         model_2 = model.joint().condition(i, D)
-        assert_allclose(model_1.mean, model_2.mean)
-        assert_allclose(model_1.cov, model_2.cov, rtol=1e-6, atol=1e-6)
+        assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
+        assert_allclose_broadcast(
+            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
+            model_2.cov,
+            atol=atol,
+        )
 
-    def test_bayes_theorem(self, d, n):
-        model = self.random(d, n)
+    def test_bayes_theorem(
+        self,
+        logA_shape,
+        M_shape,
+        diagonal_M,
+        m_shape,
+        C_shape,
+        diagonal_C,
+        mu_shape,
+        Sigma_shape,
+        diagonal_Sigma,
+        shape,
+        n,
+        d,
+    ):
+        model = self.random(
+            logA_shape,
+            M_shape,
+            diagonal_M,
+            m_shape,
+            C_shape,
+            diagonal_C,
+            mu_shape,
+            Sigma_shape,
+            diagonal_Sigma,
+            shape,
+            n,
+            d,
+        )
         theta = model.prior().rvs()
         D = model.evidence().rvs()
         assert_allclose(
-            model.posterior(D).logpdf(theta) + model.evidence().logpdf(D),
-            model.likelihood(theta).logpdf(D) + model.prior().logpdf(theta),
+            model.posterior(D).logpdf(theta, broadcast=True)
+            + model.evidence().logpdf(D, broadcast=True),
+            model.likelihood(theta).logpdf(D, broadcast=True)
+            + model.prior().logpdf(theta, broadcast=True),
         )
 
 
@@ -325,792 +790,3 @@ def test_bayes_theorem(self, n):
         assert_allclose(
             model.logP(theta) + model.logZ(), model.logL(theta) + model.logpi(theta)
         )
-
-
-@pytest.mark.parametrize("k", np.arange(1, 6))
-@pytest.mark.parametrize("d", np.arange(1, 6))
-@pytest.mark.parametrize("n", np.arange(1, 6))
-class TestLinearMixtureModel(object):
-    cls = LinearMixtureModel
-
-    def random(self, k, d, n):
-        M = rand(k, d, n)
-        m = rand(k, d)
-        C = np.array(
-            [
-                np.atleast_2d(invwishart(scale=np.eye(d), df=d * 10).rvs())
-                for _ in range(k)
-            ]
-        )
-
-        mu = rand(k, n)
-        Sigma = np.array(
-            [
-                np.atleast_2d(invwishart(scale=np.eye(n), df=d * 10).rvs())
-                for _ in range(k)
-            ]
-        )
-        logA = np.log(rand(k))
-        return self.cls(M=M, m=m, C=C, mu=mu, Sigma=Sigma, logA=logA)
-
-    def _test_shape(self, model, k, d, n):
-        assert model.n == n
-        assert model.d == d
-        assert model.k == k
-        assert model.M.shape == (k, d, n)
-        assert model.m.shape == (
-            k,
-            d,
-        )
-        assert model.C.shape == (k, d, d)
-        assert model.mu.shape == (
-            k,
-            n,
-        )
-        assert model.Sigma.shape == (k, n, n)
-        assert model.logA.shape == (k,)
-
-    def test_init_M(self, k, d, n):
-        self._test_shape(self.cls(M=rand()), 1, 1, 1)
-        self._test_shape(self.cls(M=rand(), n=n), 1, 1, n)
-        self._test_shape(self.cls(M=rand(), d=d), 1, d, 1)
-        self._test_shape(self.cls(M=rand(), k=k), k, 1, 1)
-        self._test_shape(self.cls(M=rand(), d=d, n=n), 1, d, n)
-        self._test_shape(self.cls(M=rand(), k=k, n=n), k, 1, n)
-        self._test_shape(self.cls(M=rand(), k=k, d=d), k, d, 1)
-        self._test_shape(self.cls(M=rand(), k=k, d=d, n=n), k, d, n)
-        self._test_shape(self.cls(M=rand(n)), 1, 1, n)
-        self._test_shape(self.cls(M=rand(n), d=d), 1, d, n)
-        self._test_shape(self.cls(M=rand(n), k=k), k, 1, n)
-        self._test_shape(self.cls(M=rand(n), k=k, d=d), k, d, n)
-
-        self._test_shape(self.cls(M=rand(d, 1)), 1, d, 1)
-        self._test_shape(self.cls(M=rand(d, 1), k=k), k, d, 1)
-        self._test_shape(self.cls(M=rand(d, 1), n=n), 1, d, n)
-        self._test_shape(self.cls(M=rand(d, 1), k=k, n=n), k, d, n)
-
-        self._test_shape(self.cls(M=rand(k, 1, 1)), k, 1, 1)
-        self._test_shape(self.cls(M=rand(k, 1, 1), d=d), k, d, 1)
-        self._test_shape(self.cls(M=rand(k, 1, 1), n=n), k, 1, n)
-        self._test_shape(self.cls(M=rand(k, 1, 1), d=d, n=n), k, d, n)
-
-        self._test_shape(self.cls(M=rand(k, d, 1)), k, d, 1)
-        self._test_shape(self.cls(M=rand(k, d, 1), n=n), k, d, n)
-
-        self._test_shape(self.cls(M=rand(k, 1, n)), k, 1, n)
-        self._test_shape(self.cls(M=rand(k, 1, n), d=d), k, d, n)
-
-        self._test_shape(self.cls(M=rand(1, d, n)), 1, d, n)
-        self._test_shape(self.cls(M=rand(1, d, n), k=k), k, d, n)
-
-        self._test_shape(self.cls(M=rand(d, n)), 1, d, n)
-        self._test_shape(self.cls(M=rand(d, n), k=k), k, d, n)
-
-        self._test_shape(self.cls(M=rand(k, d, n)), k, d, n)
-
-        M = rand()
-        model = self.cls(M=M, d=d, n=n)
-        assert_allclose(np.diag(model.M[0]), M)
-
-        M = rand(n)
-        model = self.cls(M=M, d=d)
-        assert_allclose(np.diag(model.M[0]), M[: min(d, n)])
-
-        M = rand(d, n)
-        model = self.cls(M=M)
-        assert_allclose(model.M[0], M)
-
-        M = rand()
-        model = self.cls(M=M, k=k, d=d, n=n)
-        for M_ in model.M:
-            assert_allclose(M_, model.M[0])
-            assert_allclose(np.diag(M_), M)
-
-        M = rand(n)
-        model = self.cls(M=M, d=d)
-        for M_ in model.M:
-            assert_allclose(M_, model.M[0])
-            assert_allclose(np.diag(M_), M[: min(d, n)])
-
-        M = rand(d, n)
-        model = self.cls(M=M)
-        for M_ in model.M:
-            assert_allclose(M_, model.M[0])
-            assert_allclose(M_, M)
-
-        M = rand(k, d, n)
-        model = self.cls(M=M)
-        assert_allclose(model.M, M)
-
-    def test_init_mu(self, k, d, n):
-        self._test_shape(self.cls(mu=rand(), d=d), 1, d, 1)
-        self._test_shape(self.cls(mu=rand(), k=k, d=d), k, d, 1)
-        self._test_shape(self.cls(mu=rand(), d=d, n=n), 1, d, n)
-        self._test_shape(self.cls(mu=rand(), k=k, d=d, n=n), k, d, n)
-        self._test_shape(self.cls(mu=rand(n), d=d), 1, d, n)
-        self._test_shape(self.cls(mu=rand(n), k=k, d=d), k, d, n)
-        self._test_shape(self.cls(mu=rand(k, n), d=d), k, d, n)
-
-        mu = rand()
-        model = self.cls(mu=mu, d=d, n=n)
-        assert_allclose(model.mu, mu)
-
-        mu = rand(n)
-        model = self.cls(mu=mu, d=d)
-        assert_allclose(model.mu[0], mu)
-
-        mu = rand()
-        model = self.cls(mu=mu, k=k, d=d, n=n)
-        assert_allclose(model.mu, mu)
-
-        mu = rand(n)
-        model = self.cls(mu=mu, k=k, d=d)
-        for mu_ in model.mu:
-            assert_allclose(mu_, mu)
-
-        mu = rand(k, n)
-        model = self.cls(mu=mu, d=d)
-        assert_allclose(model.mu, mu)
-
-    def test_init_Sigma(self, k, d, n):
-        self._test_shape(self.cls(Sigma=rand(), d=d), 1, d, 1)
-        self._test_shape(self.cls(Sigma=rand(), k=k, d=d), k, d, 1)
-        self._test_shape(self.cls(Sigma=rand(), d=d, n=n), 1, d, n)
-        self._test_shape(self.cls(Sigma=rand(), k=k, d=d, n=n), k, d, n)
-        self._test_shape(self.cls(Sigma=rand(n), d=d), 1, d, n)
-        self._test_shape(self.cls(Sigma=rand(n), k=k, d=d), k, d, n)
-        self._test_shape(self.cls(Sigma=rand(n, n), d=d), 1, d, n)
-        self._test_shape(self.cls(Sigma=rand(n, n), k=k, d=d), k, d, n)
-        self._test_shape(self.cls(Sigma=rand(k, n, n), d=d), k, d, n)
-
-        Sigma = rand()
-        model = self.cls(Sigma=Sigma, d=d, n=n)
-        assert_allclose(np.diag(model.Sigma[0]), Sigma)
-
-        Sigma = rand(n)
-        model = self.cls(Sigma=Sigma, d=d)
-        assert_allclose(np.diag(model.Sigma[0]), Sigma)
-
-        Sigma = rand(n, n)
-        model = self.cls(Sigma=Sigma, d=d)
-        assert_allclose(model.Sigma[0], Sigma)
-
-        Sigma = rand()
-        model = self.cls(Sigma=Sigma, k=k, d=d, n=n)
-        for Sigma_ in model.Sigma:
-            assert_allclose(np.diag(Sigma_), Sigma)
-
-        Sigma = rand(n)
-        model = self.cls(Sigma=Sigma, k=k, d=d)
-        for Sigma_ in model.Sigma:
-            assert_allclose(np.diag(Sigma_), Sigma)
-
-        Sigma = rand(n, n)
-        model = self.cls(Sigma=Sigma, k=k, d=d)
-        for Sigma_ in model.Sigma:
-            assert_allclose(Sigma_, Sigma)
-
-        Sigma = rand(k, n, n)
-        model = self.cls(Sigma=Sigma, d=d)
-        assert_allclose(model.Sigma, Sigma)
-
-    def test_init_m(self, k, d, n):
-        self._test_shape(self.cls(m=rand(), n=n), 1, 1, n)
-        self._test_shape(self.cls(m=rand(), k=k, n=n), k, 1, n)
-        self._test_shape(self.cls(m=rand(), n=n, d=d), 1, d, n)
-        self._test_shape(self.cls(m=rand(), k=k, n=n, d=d), k, d, n)
-        self._test_shape(self.cls(m=rand(d), n=n), 1, d, n)
-        self._test_shape(self.cls(m=rand(d), k=k, n=n), k, d, n)
-        self._test_shape(self.cls(m=rand(k, d), n=n), k, d, n)
-
-        m = rand()
-        model = self.cls(m=m, d=d, n=n)
-        assert_allclose(model.m, m)
-
-        m = rand(d)
-        model = self.cls(m=m, n=n)
-        assert_allclose(model.m[0], m)
-
-        m = rand()
-        model = self.cls(m=m, k=k, d=d, n=n)
-        assert_allclose(model.m, m)
-
-        m = rand(d)
-        model = self.cls(m=m, k=k, n=n)
-        for mu_ in model.m:
-            assert_allclose(mu_, m)
-
-        m = rand(k, d)
-        model = self.cls(m=m, n=n)
-        assert_allclose(model.m, m)
-
-    def test_init_C(self, k, d, n):
-        self._test_shape(self.cls(C=rand(), n=n), 1, 1, n)
-        self._test_shape(self.cls(C=rand(), k=k, n=n), k, 1, n)
-        self._test_shape(self.cls(C=rand(), n=n, d=d), 1, d, n)
-        self._test_shape(self.cls(C=rand(), k=k, n=n, d=d), k, d, n)
-        self._test_shape(self.cls(C=rand(d), n=n), 1, d, n)
-        self._test_shape(self.cls(C=rand(d), k=k, n=n), k, d, n)
-        self._test_shape(self.cls(C=rand(d, d), n=n), 1, d, n)
-        self._test_shape(self.cls(C=rand(d, d), k=k, n=n), k, d, n)
-        self._test_shape(self.cls(C=rand(k, d, d), n=n), k, d, n)
-
-        C = rand()
-        model = self.cls(C=C, d=d, n=n)
-        assert_allclose(np.diag(model.C[0]), C)
-
-        C = rand(d)
-        model = self.cls(C=C, n=n)
-        assert_allclose(np.diag(model.C[0]), C)
-
-        C = rand(d, d)
-        model = self.cls(C=C, n=n)
-        assert_allclose(model.C[0], C)
-
-        C = rand()
-        model = self.cls(C=C, k=k, d=d, n=n)
-        for Sigma_ in model.C:
-            assert_allclose(np.diag(Sigma_), C)
-
-        C = rand(d)
-        model = self.cls(C=C, k=k, n=n)
-        for Sigma_ in model.C:
-            assert_allclose(np.diag(Sigma_), C)
-
-        C = rand(d, d)
-        model = self.cls(C=C, k=k, n=n)
-        for Sigma_ in model.C:
-            assert_allclose(Sigma_, C)
-
-        C = rand(k, d, d)
-        model = self.cls(C=C, n=n)
-        assert_allclose(model.C, C)
-
-    def test_init_logA(self, k, d, n):
-        self._test_shape(self.cls(logA=rand(), d=d, n=n), 1, d, n)
-        self._test_shape(self.cls(logA=rand(), k=k, d=d, n=n), k, d, n)
-        self._test_shape(self.cls(logA=rand(k), d=d, n=n), k, d, n)
-
-        logA = rand()
-        model = self.cls(logA=logA, d=d, n=n)
-        assert_allclose(model.logA, logA)
-
-        logA = rand(k)
-        model = self.cls(logA=logA, d=d, n=n)
-        assert_allclose(model.logA, logA)
-
-    def test_failure(self, k, d, n):
-        with pytest.raises(ValueError) as excinfo:
-            self.cls(m=rand(d))
-        string = "Unable to determine number of parameters n"
-        assert string in str(excinfo.value)
-
-        with pytest.raises(ValueError) as excinfo:
-            self.cls(mu=rand(n))
-        string = "Unable to determine data dimensions d"
-        assert string in str(excinfo.value)
-
-    def test_joint(self, k, d, n):
-        model = self.random(k, d, n)
-        prior = model.prior()
-        evidence = model.evidence()
-        joint = model.joint()
-
-        samples_1 = prior.rvs(N)
-        samples_2 = joint.rvs(N)[:, -n:]
-
-        if n == 1:
-            samples_1 = np.atleast_2d(samples_1).T
-
-        for i in range(n):
-            p = kstest(samples_1[:, i], samples_2[:, i]).pvalue
-            assert p > 1e-5
-
-        p = kstest(prior.logpdf(samples_2), prior.logpdf(samples_1)).pvalue
-        assert p > 1e-5
-
-        samples_1 = evidence.rvs(N)
-        samples_2 = joint.rvs(N)[:, :d]
-
-        if d == 1:
-            samples_1 = np.atleast_2d(samples_1).T
-
-        for i in range(d):
-            p = kstest(samples_1[:, i], samples_2[:, i]).pvalue
-            assert p > 1e-5
-
-        p = kstest(evidence.logpdf(samples_2), evidence.logpdf(samples_1)).pvalue
-        assert p > 1e-5
-
-    def test_likelihood_posterior(self, k, d, n):
-        model = self.random(k, d, n)
-        joint = model.joint()
-
-        samples = []
-        model.prior()
-        theta = model.prior().rvs()
-        for _ in range(N):
-            data = np.atleast_1d(model.likelihood(theta).rvs())
-            theta = np.atleast_1d(model.posterior(data).rvs())
-            samples.append(np.concatenate([data, theta])[:])
-        samples_1 = np.array(samples)[::100]
-        samples_2 = joint.rvs(len(samples_1))
-
-        for i in range(n + d):
-            p = kstest(samples_1[:, i], samples_2[:, i]).pvalue
-            assert p > 1e-5
-
-        p = kstest(joint.logpdf(samples_2), joint.logpdf(samples_1)).pvalue
-        assert p > 1e-5
-
-    def test_from_joint(self, k, d, n):
-        model = self.random(k, d, n)
-        joint = model.joint()
-        means = joint.means
-        covs = joint.covs
-        logA = joint.logA
-        model2 = self.cls.from_joint(means, covs, logA, n)
-        assert model2.n == model.n
-        assert model2.d == model.d
-        assert_allclose(model2.M, model.M)
-        assert_allclose(model2.m, model.m)
-        assert_allclose(model2.C, model.C)
-        assert_allclose(model2.mu, model.mu)
-        assert_allclose(model2.Sigma, model.Sigma)
-        assert_allclose(model2.logA, model.logA)
-
-    def test_marginal_conditional(self, k, d, n):
-        model = self.random(k, d, n)
-        i = np.arange(d + n)[-n:]
-        model_1 = model.evidence()
-        model_2 = model.joint().marginalise(i)
-        assert_allclose(model_1.means, model_2.means)
-        assert_allclose(model_1.covs, model_2.covs, rtol=1e-6, atol=1e-6)
-        assert_allclose(model_1.logA, model_2.logA)
-
-        theta = model.prior().rvs()
-        model_1 = model.likelihood(theta)
-        model_2 = model.joint().condition(i, theta)
-        assert_allclose(model_1.means, model_2.means)
-        assert_allclose(model_1.covs, model_2.covs, rtol=1e-6, atol=1e-6)
-        assert_allclose(model_1.logA, model_2.logA)
-
-        i = np.arange(d + n)[:d]
-        model_1 = model.prior()
-        model_2 = model.joint().marginalise(i)
-        assert_allclose(model_1.means, model_2.means)
-        assert_allclose(model_1.covs, model_2.covs, rtol=1e-6, atol=1e-6)
-        assert_allclose(model_1.logA, model_2.logA)
-
-        D = model.evidence().rvs()
-        model_1 = model.posterior(D)
-        model_2 = model.joint().condition(i, D)
-        assert_allclose(model_1.means, model_2.means)
-        assert_allclose(model_1.covs, model_2.covs, rtol=1e-6, atol=1e-6)
-        assert_allclose(model_1.logA, model_2.logA)
-
-    def test_bayes_theorem(self, k, d, n):
-        model = self.random(k, d, n)
-        theta = model.prior().rvs()
-        D = model.evidence().rvs()
-        assert_allclose(
-            model.posterior(D).logpdf(theta) + model.evidence().logpdf(D),
-            model.likelihood(theta).logpdf(D) + model.prior().logpdf(theta),
-        )
-
-
-@pytest.mark.parametrize("k", np.arange(1, 6))
-@pytest.mark.parametrize("d", np.arange(1, 6))
-@pytest.mark.parametrize("n", np.arange(1, 6))
-class TestMultiLinearModel(object):
-    cls = MultiLinearModel
-
-    def random(self, k, d, n):
-        M = rand(k, d, n)
-        m = rand(k, d)
-        C = np.array(
-            [
-                np.atleast_2d(invwishart(scale=np.eye(d), df=d * 10).rvs())
-                for _ in range(k)
-            ]
-        )
-
-        mu = rand(k, n)
-        Sigma = np.array(
-            [
-                np.atleast_2d(invwishart(scale=np.eye(n), df=d * 10).rvs())
-                for _ in range(k)
-            ]
-        )
-        return self.cls(M=M, m=m, C=C, mu=mu, Sigma=Sigma)
-
-    def _test_shape(self, model, k, d, n):
-        assert model.n == n
-        assert model.d == d
-        assert model.k == k
-        assert model.M.shape == (k, d, n)
-        assert model.m.shape == (
-            k,
-            d,
-        )
-        assert model.C.shape == (k, d, d)
-        assert model.mu.shape == (
-            k,
-            n,
-        )
-        assert model.Sigma.shape == (k, n, n)
-
-    def test_init_M(self, k, d, n):
-        self._test_shape(self.cls(M=rand()), 1, 1, 1)
-        self._test_shape(self.cls(M=rand(), n=n), 1, 1, n)
-        self._test_shape(self.cls(M=rand(), d=d), 1, d, 1)
-        self._test_shape(self.cls(M=rand(), k=k), k, 1, 1)
-        self._test_shape(self.cls(M=rand(), d=d, n=n), 1, d, n)
-        self._test_shape(self.cls(M=rand(), k=k, n=n), k, 1, n)
-        self._test_shape(self.cls(M=rand(), k=k, d=d), k, d, 1)
-        self._test_shape(self.cls(M=rand(), k=k, d=d, n=n), k, d, n)
-        self._test_shape(self.cls(M=rand(n)), 1, 1, n)
-        self._test_shape(self.cls(M=rand(n), d=d), 1, d, n)
-        self._test_shape(self.cls(M=rand(n), k=k), k, 1, n)
-        self._test_shape(self.cls(M=rand(n), k=k, d=d), k, d, n)
-
-        self._test_shape(self.cls(M=rand(d, 1)), 1, d, 1)
-        self._test_shape(self.cls(M=rand(d, 1), k=k), k, d, 1)
-        self._test_shape(self.cls(M=rand(d, 1), n=n), 1, d, n)
-        self._test_shape(self.cls(M=rand(d, 1), k=k, n=n), k, d, n)
-
-        self._test_shape(self.cls(M=rand(k, 1, 1)), k, 1, 1)
-        self._test_shape(self.cls(M=rand(k, 1, 1), d=d), k, d, 1)
-        self._test_shape(self.cls(M=rand(k, 1, 1), n=n), k, 1, n)
-        self._test_shape(self.cls(M=rand(k, 1, 1), d=d, n=n), k, d, n)
-
-        self._test_shape(self.cls(M=rand(k, d, 1)), k, d, 1)
-        self._test_shape(self.cls(M=rand(k, d, 1), n=n), k, d, n)
-
-        self._test_shape(self.cls(M=rand(k, 1, n)), k, 1, n)
-        self._test_shape(self.cls(M=rand(k, 1, n), d=d), k, d, n)
-
-        self._test_shape(self.cls(M=rand(1, d, n)), 1, d, n)
-        self._test_shape(self.cls(M=rand(1, d, n), k=k), k, d, n)
-
-        self._test_shape(self.cls(M=rand(d, n)), 1, d, n)
-        self._test_shape(self.cls(M=rand(d, n), k=k), k, d, n)
-
-        self._test_shape(self.cls(M=rand(k, d, n)), k, d, n)
-
-        M = rand()
-        model = self.cls(M=M, d=d, n=n)
-        assert_allclose(np.diag(model.M[0]), M)
-
-        M = rand(n)
-        model = self.cls(M=M, d=d)
-        assert_allclose(np.diag(model.M[0]), M[: min(d, n)])
-
-        M = rand(d, n)
-        model = self.cls(M=M)
-        assert_allclose(model.M[0], M)
-
-        M = rand()
-        model = self.cls(M=M, k=k, d=d, n=n)
-        for M_ in model.M:
-            assert_allclose(M_, model.M[0])
-            assert_allclose(np.diag(M_), M)
-
-        M = rand(n)
-        model = self.cls(M=M, d=d)
-        for M_ in model.M:
-            assert_allclose(M_, model.M[0])
-            assert_allclose(np.diag(M_), M[: min(d, n)])
-
-        M = rand(d, n)
-        model = self.cls(M=M)
-        for M_ in model.M:
-            assert_allclose(M_, model.M[0])
-            assert_allclose(M_, M)
-
-        M = rand(k, d, n)
-        model = self.cls(M=M)
-        assert_allclose(model.M, M)
-
-    def test_init_mu(self, k, d, n):
-        self._test_shape(self.cls(mu=rand(), d=d), 1, d, 1)
-        self._test_shape(self.cls(mu=rand(), k=k, d=d), k, d, 1)
-        self._test_shape(self.cls(mu=rand(), d=d, n=n), 1, d, n)
-        self._test_shape(self.cls(mu=rand(), k=k, d=d, n=n), k, d, n)
-        self._test_shape(self.cls(mu=rand(n), d=d), 1, d, n)
-        self._test_shape(self.cls(mu=rand(n), k=k, d=d), k, d, n)
-        self._test_shape(self.cls(mu=rand(k, n), d=d), k, d, n)
-
-        mu = rand()
-        model = self.cls(mu=mu, d=d, n=n)
-        assert_allclose(model.mu, mu)
-
-        mu = rand(n)
-        model = self.cls(mu=mu, d=d)
-        assert_allclose(model.mu[0], mu)
-
-        mu = rand()
-        model = self.cls(mu=mu, k=k, d=d, n=n)
-        assert_allclose(model.mu, mu)
-
-        mu = rand(n)
-        model = self.cls(mu=mu, k=k, d=d)
-        for mu_ in model.mu:
-            assert_allclose(mu_, mu)
-
-        mu = rand(k, n)
-        model = self.cls(mu=mu, d=d)
-        assert_allclose(model.mu, mu)
-
-    def test_init_Sigma(self, k, d, n):
-        self._test_shape(self.cls(Sigma=rand(), d=d), 1, d, 1)
-        self._test_shape(self.cls(Sigma=rand(), k=k, d=d), k, d, 1)
-        self._test_shape(self.cls(Sigma=rand(), d=d, n=n), 1, d, n)
-        self._test_shape(self.cls(Sigma=rand(), k=k, d=d, n=n), k, d, n)
-        self._test_shape(self.cls(Sigma=rand(n), d=d), 1, d, n)
-        self._test_shape(self.cls(Sigma=rand(n), k=k, d=d), k, d, n)
-        self._test_shape(self.cls(Sigma=rand(n, n), d=d), 1, d, n)
-        self._test_shape(self.cls(Sigma=rand(n, n), k=k, d=d), k, d, n)
-        self._test_shape(self.cls(Sigma=rand(k, n, n), d=d), k, d, n)
-
-        Sigma = rand()
-        model = self.cls(Sigma=Sigma, d=d, n=n)
-        assert_allclose(np.diag(model.Sigma[0]), Sigma)
-
-        Sigma = rand(n)
-        model = self.cls(Sigma=Sigma, d=d)
-        assert_allclose(np.diag(model.Sigma[0]), Sigma)
-
-        Sigma = rand(n, n)
-        model = self.cls(Sigma=Sigma, d=d)
-        assert_allclose(model.Sigma[0], Sigma)
-
-        Sigma = rand()
-        model = self.cls(Sigma=Sigma, k=k, d=d, n=n)
-        for Sigma_ in model.Sigma:
-            assert_allclose(np.diag(Sigma_), Sigma)
-
-        Sigma = rand(n)
-        model = self.cls(Sigma=Sigma, k=k, d=d)
-        for Sigma_ in model.Sigma:
-            assert_allclose(np.diag(Sigma_), Sigma)
-
-        Sigma = rand(n, n)
-        model = self.cls(Sigma=Sigma, k=k, d=d)
-        for Sigma_ in model.Sigma:
-            assert_allclose(Sigma_, Sigma)
-
-        Sigma = rand(k, n, n)
-        model = self.cls(Sigma=Sigma, d=d)
-        assert_allclose(model.Sigma, Sigma)
-
-    def test_init_m(self, k, d, n):
-        self._test_shape(self.cls(m=rand(), n=n), 1, 1, n)
-        self._test_shape(self.cls(m=rand(), k=k, n=n), k, 1, n)
-        self._test_shape(self.cls(m=rand(), n=n, d=d), 1, d, n)
-        self._test_shape(self.cls(m=rand(), k=k, n=n, d=d), k, d, n)
-        self._test_shape(self.cls(m=rand(d), n=n), 1, d, n)
-        self._test_shape(self.cls(m=rand(d), k=k, n=n), k, d, n)
-        self._test_shape(self.cls(m=rand(k, d), n=n), k, d, n)
-
-        m = rand()
-        model = self.cls(m=m, d=d, n=n)
-        assert_allclose(model.m, m)
-
-        m = rand(d)
-        model = self.cls(m=m, n=n)
-        assert_allclose(model.m[0], m)
-
-        m = rand()
-        model = self.cls(m=m, k=k, d=d, n=n)
-        assert_allclose(model.m, m)
-
-        m = rand(d)
-        model = self.cls(m=m, k=k, n=n)
-        for mu_ in model.m:
-            assert_allclose(mu_, m)
-
-        m = rand(k, d)
-        model = self.cls(m=m, n=n)
-        assert_allclose(model.m, m)
-
-    def test_init_C(self, k, d, n):
-        self._test_shape(self.cls(C=rand(), n=n), 1, 1, n)
-        self._test_shape(self.cls(C=rand(), k=k, n=n), k, 1, n)
-        self._test_shape(self.cls(C=rand(), n=n, d=d), 1, d, n)
-        self._test_shape(self.cls(C=rand(), k=k, n=n, d=d), k, d, n)
-        self._test_shape(self.cls(C=rand(d), n=n), 1, d, n)
-        self._test_shape(self.cls(C=rand(d), k=k, n=n), k, d, n)
-        self._test_shape(self.cls(C=rand(d, d), n=n), 1, d, n)
-        self._test_shape(self.cls(C=rand(d, d), k=k, n=n), k, d, n)
-        self._test_shape(self.cls(C=rand(k, d, d), n=n), k, d, n)
-
-        C = rand()
-        model = self.cls(C=C, d=d, n=n)
-        assert_allclose(np.diag(model.C[0]), C)
-
-        C = rand(d)
-        model = self.cls(C=C, n=n)
-        assert_allclose(np.diag(model.C[0]), C)
-
-        C = rand(d, d)
-        model = self.cls(C=C, n=n)
-        assert_allclose(model.C[0], C)
-
-        C = rand()
-        model = self.cls(C=C, k=k, d=d, n=n)
-        for Sigma_ in model.C:
-            assert_allclose(np.diag(Sigma_), C)
-
-        C = rand(d)
-        model = self.cls(C=C, k=k, n=n)
-        for Sigma_ in model.C:
-            assert_allclose(np.diag(Sigma_), C)
-
-        C = rand(d, d)
-        model = self.cls(C=C, k=k, n=n)
-        for Sigma_ in model.C:
-            assert_allclose(Sigma_, C)
-
-        C = rand(k, d, d)
-        model = self.cls(C=C, n=n)
-        assert_allclose(model.C, C)
-
-    def test_failure(self, k, d, n):
-        with pytest.raises(ValueError) as excinfo:
-            self.cls(m=rand(d))
-        string = "Unable to determine number of parameters n"
-        assert string in str(excinfo.value)
-
-        with pytest.raises(ValueError) as excinfo:
-            self.cls(mu=rand(n))
-        string = "Unable to determine data dimensions d"
-        assert string in str(excinfo.value)
-
-    def test_joint(self, k, d, n):
-        model = self.random(k, d, n)
-        prior = model.prior()
-        evidence = model.evidence()
-        joint = model.joint()
-
-        samples_1 = prior.rvs(N)
-        samples_2 = joint.rvs(N)[..., -n:]
-        samples_1.shape
-
-        if n == 1:
-            samples_1 = samples_1[..., None]
-
-        for j in range(k):
-            for i in range(n):
-                if k == 1:
-                    p = kstest(samples_1[:, i], samples_2[:, i]).pvalue
-                else:
-                    p = kstest(samples_1[:, j, i], samples_2[:, j, i]).pvalue
-                assert p > 1e-5
-
-            if k == 1:
-                p = kstest(prior.logpdf(samples_2), prior.logpdf(samples_1)).pvalue
-            else:
-                p = kstest(
-                    prior.logpdf(samples_2)[:, j], prior.logpdf(samples_1)[:, j]
-                ).pvalue
-            assert p > 1e-5
-
-        samples_1 = evidence.rvs(N)
-        samples_2 = joint.rvs(N)[..., :d]
-
-        if d == 1:
-            samples_1 = samples_1[..., None]
-
-        for j in range(k):
-            for i in range(d):
-                if k == 1:
-                    p = kstest(samples_1[:, i], samples_2[:, i]).pvalue
-                else:
-                    p = kstest(samples_1[:, j, i], samples_2[:, j, i]).pvalue
-                assert p > 1e-5
-
-            if k == 1:
-                p = kstest(
-                    evidence.logpdf(samples_2), evidence.logpdf(samples_1)
-                ).pvalue
-            else:
-                p = kstest(
-                    evidence.logpdf(samples_2)[:, j], evidence.logpdf(samples_1)[:, j]
-                ).pvalue
-            assert p > 1e-5
-
-    def test_likelihood_posterior(self, k, d, n):
-        model = self.random(k, d, n)
-        joint = model.joint()
-
-        samples = []
-        theta = model.prior().rvs()
-        for _ in range(N):
-            data = np.atleast_1d(model.likelihood(theta).rvs()).reshape(k, d)
-            theta = np.atleast_1d(model.posterior(data).rvs()).reshape(k, n)
-            samples.append(np.concatenate([data, theta], axis=1)[:])
-
-        samples_1 = np.array(samples)[::100]
-        samples_2 = joint.rvs(len(samples_1)).reshape(len(samples_1), k, d + n)
-
-        for j in range(k):
-            for i in range(n + d):
-                p = kstest(samples_1[:, j, i], samples_2[:, j, i]).pvalue
-
-            if k == 1:
-                p = kstest(joint.logpdf(samples_2), joint.logpdf(samples_1)).pvalue
-            else:
-                p = kstest(
-                    joint.logpdf(samples_2)[:, j], joint.logpdf(samples_1)[:, j]
-                ).pvalue
-            assert p > 1e-5
-
-    def test_from_joint(self, k, d, n):
-        model = self.random(k, d, n)
-        joint = model.joint()
-        means = joint.means
-        covs = joint.covs
-        model2 = self.cls.from_joint(means, covs, n)
-        assert model2.n == model.n
-        assert model2.d == model.d
-        assert_allclose(model2.M, model.M)
-        assert_allclose(model2.m, model.m)
-        assert_allclose(model2.C, model.C)
-        assert_allclose(model2.mu, model.mu)
-        assert_allclose(model2.Sigma, model.Sigma)
-
-    def test_marginal_conditional(self, k, d, n):
-        model = self.random(k, d, n)
-        i = np.arange(d + n)[-n:]
-        model_1 = model.evidence()
-        model_2 = model.joint().marginalise(i)
-        assert_allclose(model_1.means, model_2.means)
-        assert_allclose(model_1.covs, model_2.covs, rtol=1e-6, atol=1e-6)
-
-        theta = model.prior().rvs()
-        model_1 = model.likelihood(theta)
-        model_2 = model.joint().condition(i, theta)
-        assert_allclose(model_1.means, model_2.means)
-        assert_allclose(model_1.covs, model_2.covs, rtol=1e-6, atol=1e-6)
-
-        i = np.arange(d + n)[:d]
-        model_1 = model.prior()
-        model_2 = model.joint().marginalise(i)
-        assert_allclose(model_1.means, model_2.means)
-        assert_allclose(model_1.covs, model_2.covs, rtol=1e-6, atol=1e-6)
-
-        D = model.evidence().rvs()
-        model_1 = model.posterior(D)
-        model_2 = model.joint().condition(i, D)
-        assert_allclose(model_1.means, model_2.means)
-        assert_allclose(model_1.covs, model_2.covs, rtol=1e-6, atol=1e-6)
-
-    def test_bayes_theorem(self, k, d, n):
-        model = self.random(k, d, n)
-        theta = model.prior().rvs()
-        D = model.evidence().rvs()
-        assert_allclose(
-            model.posterior(D).logpdf(theta) + model.evidence().logpdf(D),
-            model.likelihood(theta).logpdf(D) + model.prior().logpdf(theta),
-        )
diff --git a/tests/test_model_1.py b/tests/test_model_1.py
deleted file mode 100644
index d3b937a..0000000
--- a/tests/test_model_1.py
+++ /dev/null
@@ -1,792 +0,0 @@
-import numpy as np
-import pytest
-from numpy.testing import assert_allclose
-
-
-def assert_allclose_broadcast(a, b, *args, **kwargs):
-    shape = np.broadcast_shapes(np.shape(a), np.shape(b))
-    return assert_allclose(
-        np.broadcast_to(a, shape), np.broadcast_to(b, shape), *args, **kwargs
-    )
-
-
-from lsbi.model_1 import (
-    LinearModel,
-    MixtureModel,
-    ReducedLinearModel,
-    ReducedLinearModelUniformPrior,
-    _de_diagonalise,
-)
-
-shapes = [(2, 3), (3,), ()]
-dims = [1, 2, 4]
-
-tests = []
-for d in dims:
-    for n in dims:
-        for diagonal_Sigma in [True, False]:
-            for diagonal_C in [True, False]:
-                for diagonal_M in [True, False]:
-                    for base_shape in shapes + ["scalar"]:
-                        shape = base_shape
-                        m_shape = base_shape
-                        M_shape = base_shape
-                        mu_shape = base_shape
-                        C_shape = base_shape
-                        Sigma_shape = base_shape
-                        base_test = (
-                            d,
-                            n,
-                            shape,
-                            m_shape,
-                            M_shape,
-                            mu_shape,
-                            C_shape,
-                            Sigma_shape,
-                            diagonal_Sigma,
-                            diagonal_C,
-                            diagonal_M,
-                        )
-                        for alt_shape in shapes + ["scalar"]:
-                            for i in range(2, 8):
-                                test = base_test[:i] + (alt_shape,) + base_test[i + 1 :]
-                                if test[2] == "scalar":
-                                    continue
-                                tests.append(test)
-
-
-@pytest.mark.parametrize(
-    "d,n,shape,m_shape,M_shape,mu_shape,C_shape,Sigma_shape,diagonal_Sigma,diagonal_C,diagonal_M",
-    tests,
-)
-class TestLinearModel(object):
-    def random(
-        self,
-        M_shape,
-        diagonal_M,
-        m_shape,
-        C_shape,
-        diagonal_C,
-        mu_shape,
-        Sigma_shape,
-        diagonal_Sigma,
-        shape,
-        n,
-        d,
-    ):
-        if M_shape == "scalar":
-            M = np.random.randn()
-        elif diagonal_M:
-            M = np.random.randn(*M_shape, n)
-        else:
-            M = np.random.randn(*M_shape, d, n)
-
-        if m_shape == "scalar":
-            m = np.random.randn()
-        else:
-            m = np.random.randn(*m_shape, d)
-
-        if C_shape == "scalar":
-            C = np.random.randn() ** 2
-        elif diagonal_C:
-            C = np.random.randn(*C_shape, d) ** 2
-        else:
-            C = np.random.randn(*C_shape, d, d)
-            C = np.einsum("...ij,...kj->...ik", C, C) + d * np.eye(d)
-
-        if mu_shape == "scalar":
-            mu = np.random.randn()
-        else:
-            mu = np.random.randn(*mu_shape, n)
-
-        if Sigma_shape == "scalar":
-            Sigma = np.random.randn() ** 2
-        elif diagonal_Sigma:
-            Sigma = np.random.randn(*Sigma_shape, n) ** 2
-        else:
-            Sigma = np.random.randn(*Sigma_shape, n, n)
-            Sigma = np.einsum("...ij,...kj->...ik", Sigma, Sigma) + n * np.eye(n)
-
-        model = LinearModel(
-            M, m, C, mu, Sigma, shape, n, d, diagonal_M, diagonal_C, diagonal_Sigma
-        )
-        assert model.d == d
-        assert model.n == n
-        assert np.all(model.M == M)
-        assert np.all(model.m == m)
-        assert np.all(model.C == C)
-        assert np.all(model.mu == mu)
-        assert np.all(model.Sigma == Sigma)
-        assert model.diagonal_M == diagonal_M or (
-            M_shape == "scalar" and model.diagonal_M
-        )
-        assert model.diagonal_C == diagonal_C or (
-            C_shape == "scalar" and model.diagonal_C
-        )
-        assert model.diagonal_Sigma == diagonal_Sigma or (
-            Sigma_shape == "scalar" and model.diagonal_Sigma
-        )
-        return model
-
-    @pytest.mark.parametrize("theta_shape", shapes)
-    def test_likelihood(
-        self,
-        theta_shape,
-        M_shape,
-        diagonal_M,
-        m_shape,
-        C_shape,
-        diagonal_C,
-        mu_shape,
-        Sigma_shape,
-        diagonal_Sigma,
-        shape,
-        n,
-        d,
-    ):
-        model = self.random(
-            M_shape,
-            diagonal_M,
-            m_shape,
-            C_shape,
-            diagonal_C,
-            mu_shape,
-            Sigma_shape,
-            diagonal_Sigma,
-            shape,
-            n,
-            d,
-        )
-        theta = np.random.randn(*theta_shape, n)
-        dist = model.likelihood(theta)
-        assert dist.shape == np.broadcast_shapes(model.shape, theta_shape)
-        assert dist.dim == model.d
-
-    def test_prior(
-        self,
-        M_shape,
-        diagonal_M,
-        m_shape,
-        C_shape,
-        diagonal_C,
-        mu_shape,
-        Sigma_shape,
-        diagonal_Sigma,
-        shape,
-        n,
-        d,
-    ):
-        model = self.random(
-            M_shape,
-            diagonal_M,
-            m_shape,
-            C_shape,
-            diagonal_C,
-            mu_shape,
-            Sigma_shape,
-            diagonal_Sigma,
-            shape,
-            n,
-            d,
-        )
-        dist = model.prior()
-        assert dist.shape == model.shape
-        assert dist.dim == model.n
-
-    @pytest.mark.parametrize("D_shape", shapes)
-    def test_posterior(
-        self,
-        D_shape,
-        M_shape,
-        diagonal_M,
-        m_shape,
-        C_shape,
-        diagonal_C,
-        mu_shape,
-        Sigma_shape,
-        diagonal_Sigma,
-        shape,
-        n,
-        d,
-    ):
-        model = self.random(
-            M_shape,
-            diagonal_M,
-            m_shape,
-            C_shape,
-            diagonal_C,
-            mu_shape,
-            Sigma_shape,
-            diagonal_Sigma,
-            shape,
-            n,
-            d,
-        )
-        D = np.random.randn(*D_shape, d)
-        dist = model.posterior(D)
-        assert dist.shape == np.broadcast_shapes(model.shape, D_shape)
-        assert dist.dim == model.n
-
-    def test_evidence(
-        self,
-        M_shape,
-        diagonal_M,
-        m_shape,
-        C_shape,
-        diagonal_C,
-        mu_shape,
-        Sigma_shape,
-        diagonal_Sigma,
-        shape,
-        n,
-        d,
-    ):
-        model = self.random(
-            M_shape,
-            diagonal_M,
-            m_shape,
-            C_shape,
-            diagonal_C,
-            mu_shape,
-            Sigma_shape,
-            diagonal_Sigma,
-            shape,
-            n,
-            d,
-        )
-        dist = model.evidence()
-        assert dist.shape == model.shape
-        assert dist.dim == model.d
-
-    def test_joint(
-        self,
-        M_shape,
-        diagonal_M,
-        m_shape,
-        C_shape,
-        diagonal_C,
-        mu_shape,
-        Sigma_shape,
-        diagonal_Sigma,
-        shape,
-        n,
-        d,
-    ):
-        model = self.random(
-            M_shape,
-            diagonal_M,
-            m_shape,
-            C_shape,
-            diagonal_C,
-            mu_shape,
-            Sigma_shape,
-            diagonal_Sigma,
-            shape,
-            n,
-            d,
-        )
-        dist = model.joint()
-        assert dist.shape == model.shape
-        assert dist.dim == model.n + model.d
-
-    def test_marginal_conditional(
-        self,
-        M_shape,
-        diagonal_M,
-        m_shape,
-        C_shape,
-        diagonal_C,
-        mu_shape,
-        Sigma_shape,
-        diagonal_Sigma,
-        shape,
-        n,
-        d,
-    ):
-        model = self.random(
-            M_shape,
-            diagonal_M,
-            m_shape,
-            C_shape,
-            diagonal_C,
-            mu_shape,
-            Sigma_shape,
-            diagonal_Sigma,
-            shape,
-            n,
-            d,
-        )
-        atol = 1e-5
-
-        i = np.arange(d + n)[-n:]
-        model_1 = model.evidence()
-        model_2 = model.joint().marginalise(i)
-        assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
-        assert_allclose_broadcast(
-            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
-            model_2.cov,
-            atol=atol,
-        )
-
-        theta = model.prior().rvs()
-        model_1 = model.likelihood(theta)
-        model_2 = model.joint().condition(i, theta)
-        assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
-        assert_allclose_broadcast(
-            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
-            model_2.cov,
-            atol=atol,
-        )
-
-        i = np.arange(d + n)[:d]
-        model_1 = model.prior()
-        model_2 = model.joint().marginalise(i)
-        assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
-        assert_allclose_broadcast(
-            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
-            model_2.cov,
-            atol=atol,
-        )
-
-        D = model.evidence().rvs()
-        model_1 = model.posterior(D)
-        model_2 = model.joint().condition(i, D)
-        assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
-        assert_allclose_broadcast(
-            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
-            model_2.cov,
-            atol=atol,
-        )
-
-    def test_bayes_theorem(
-        self,
-        M_shape,
-        diagonal_M,
-        m_shape,
-        C_shape,
-        diagonal_C,
-        mu_shape,
-        Sigma_shape,
-        diagonal_Sigma,
-        shape,
-        n,
-        d,
-    ):
-        model = self.random(
-            M_shape,
-            diagonal_M,
-            m_shape,
-            C_shape,
-            diagonal_C,
-            mu_shape,
-            Sigma_shape,
-            diagonal_Sigma,
-            shape,
-            n,
-            d,
-        )
-        theta = model.prior().rvs()
-        D = model.evidence().rvs()
-        assert_allclose(
-            model.posterior(D).logpdf(theta, broadcast=True)
-            + model.evidence().logpdf(D, broadcast=True),
-            model.likelihood(theta).logpdf(D, broadcast=True)
-            + model.prior().logpdf(theta, broadcast=True),
-        )
-
-
-@pytest.mark.parametrize("logA_shape", shapes)
-class TestMixtureModel(TestLinearModel):
-    def random(
-        self,
-        logA_shape,
-        M_shape,
-        diagonal_M,
-        m_shape,
-        C_shape,
-        diagonal_C,
-        mu_shape,
-        Sigma_shape,
-        diagonal_Sigma,
-        shape,
-        n,
-        d,
-    ):
-        model = super().random(
-            M_shape,
-            diagonal_M,
-            m_shape,
-            C_shape,
-            diagonal_C,
-            mu_shape,
-            Sigma_shape,
-            diagonal_Sigma,
-            shape,
-            n,
-            d,
-        )
-        logA = np.random.randn(*logA_shape)
-        model = MixtureModel(
-            logA,
-            model.M,
-            model.m,
-            model.C,
-            model.mu,
-            model.Sigma,
-            shape,
-            n,
-            d,
-            diagonal_M,
-            diagonal_C,
-            diagonal_Sigma,
-        )
-        assert np.all(model.logA == logA)
-        return model
-
-    @pytest.mark.parametrize("theta_shape", shapes)
-    def test_likelihood(
-        self,
-        theta_shape,
-        logA_shape,
-        M_shape,
-        diagonal_M,
-        m_shape,
-        C_shape,
-        diagonal_C,
-        mu_shape,
-        Sigma_shape,
-        diagonal_Sigma,
-        shape,
-        n,
-        d,
-    ):
-        model = self.random(
-            logA_shape,
-            M_shape,
-            diagonal_M,
-            m_shape,
-            C_shape,
-            diagonal_C,
-            mu_shape,
-            Sigma_shape,
-            diagonal_Sigma,
-            shape,
-            n,
-            d,
-        )
-        theta = np.random.randn(*theta_shape[:-1], n)
-        dist = model.likelihood(theta)
-        if model.shape != ():
-            assert dist.shape == np.broadcast_shapes(model.shape, theta_shape)
-        assert dist.dim == model.d
-
-    def test_prior(
-        self,
-        logA_shape,
-        M_shape,
-        diagonal_M,
-        m_shape,
-        C_shape,
-        diagonal_C,
-        mu_shape,
-        Sigma_shape,
-        diagonal_Sigma,
-        shape,
-        n,
-        d,
-    ):
-        model = self.random(
-            logA_shape,
-            M_shape,
-            diagonal_M,
-            m_shape,
-            C_shape,
-            diagonal_C,
-            mu_shape,
-            Sigma_shape,
-            diagonal_Sigma,
-            shape,
-            n,
-            d,
-        )
-        dist = model.prior()
-        assert dist.shape == model.shape
-        assert dist.dim == model.n
-
-    @pytest.mark.parametrize("D_shape", shapes)
-    def test_posterior(
-        self,
-        D_shape,
-        logA_shape,
-        M_shape,
-        diagonal_M,
-        m_shape,
-        C_shape,
-        diagonal_C,
-        mu_shape,
-        Sigma_shape,
-        diagonal_Sigma,
-        shape,
-        n,
-        d,
-    ):
-        model = self.random(
-            logA_shape,
-            M_shape,
-            diagonal_M,
-            m_shape,
-            C_shape,
-            diagonal_C,
-            mu_shape,
-            Sigma_shape,
-            diagonal_Sigma,
-            shape,
-            n,
-            d,
-        )
-        D = np.random.randn(*D_shape[:-1], d)
-        dist = model.posterior(D)
-        if model.shape != ():
-            assert dist.shape == np.broadcast_shapes(model.shape, D_shape)
-        assert dist.dim == model.n
-
-    def test_evidence(
-        self,
-        logA_shape,
-        M_shape,
-        diagonal_M,
-        m_shape,
-        C_shape,
-        diagonal_C,
-        mu_shape,
-        Sigma_shape,
-        diagonal_Sigma,
-        shape,
-        n,
-        d,
-    ):
-        model = self.random(
-            logA_shape,
-            M_shape,
-            diagonal_M,
-            m_shape,
-            C_shape,
-            diagonal_C,
-            mu_shape,
-            Sigma_shape,
-            diagonal_Sigma,
-            shape,
-            n,
-            d,
-        )
-        dist = model.evidence()
-        assert dist.shape == model.shape
-        assert dist.dim == model.d
-
-    def test_joint(
-        self,
-        logA_shape,
-        M_shape,
-        diagonal_M,
-        m_shape,
-        C_shape,
-        diagonal_C,
-        mu_shape,
-        Sigma_shape,
-        diagonal_Sigma,
-        shape,
-        n,
-        d,
-    ):
-        model = self.random(
-            logA_shape,
-            M_shape,
-            diagonal_M,
-            m_shape,
-            C_shape,
-            diagonal_C,
-            mu_shape,
-            Sigma_shape,
-            diagonal_Sigma,
-            shape,
-            n,
-            d,
-        )
-        dist = model.joint()
-        assert dist.shape == model.shape
-        assert dist.dim == model.n + model.d
-
-    def test_marginal_conditional(
-        self,
-        logA_shape,
-        M_shape,
-        diagonal_M,
-        m_shape,
-        C_shape,
-        diagonal_C,
-        mu_shape,
-        Sigma_shape,
-        diagonal_Sigma,
-        shape,
-        n,
-        d,
-    ):
-        model = self.random(
-            logA_shape,
-            M_shape,
-            diagonal_M,
-            m_shape,
-            C_shape,
-            diagonal_C,
-            mu_shape,
-            Sigma_shape,
-            diagonal_Sigma,
-            shape,
-            n,
-            d,
-        )
-
-        atol = 1e-5
-
-        i = np.arange(d + n)[-n:]
-        model_1 = model.evidence()
-        model_2 = model.joint().marginalise(i)
-        assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
-        assert_allclose_broadcast(
-            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
-            model_2.cov,
-            atol=atol,
-        )
-
-        theta = model.prior().rvs()
-        model_1 = model.likelihood(theta)
-        model_2 = model.joint().condition(i, theta)
-        assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
-        assert_allclose_broadcast(
-            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
-            model_2.cov,
-            atol=atol,
-        )
-
-        i = np.arange(d + n)[:d]
-        model_1 = model.prior()
-        model_2 = model.joint().marginalise(i)
-        assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
-        assert_allclose_broadcast(
-            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
-            model_2.cov,
-            atol=atol,
-        )
-
-        D = model.evidence().rvs()
-        model_1 = model.posterior(D)
-        model_2 = model.joint().condition(i, D)
-        assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
-        assert_allclose_broadcast(
-            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
-            model_2.cov,
-            atol=atol,
-        )
-
-    def test_bayes_theorem(
-        self,
-        logA_shape,
-        M_shape,
-        diagonal_M,
-        m_shape,
-        C_shape,
-        diagonal_C,
-        mu_shape,
-        Sigma_shape,
-        diagonal_Sigma,
-        shape,
-        n,
-        d,
-    ):
-        model = self.random(
-            logA_shape,
-            M_shape,
-            diagonal_M,
-            m_shape,
-            C_shape,
-            diagonal_C,
-            mu_shape,
-            Sigma_shape,
-            diagonal_Sigma,
-            shape,
-            n,
-            d,
-        )
-        theta = model.prior().rvs()
-        D = model.evidence().rvs()
-        assert_allclose(
-            model.posterior(D).logpdf(theta, broadcast=True)
-            + model.evidence().logpdf(D, broadcast=True),
-            model.likelihood(theta).logpdf(D, broadcast=True)
-            + model.prior().logpdf(theta, broadcast=True),
-        )
-
-
-@pytest.mark.parametrize("n", np.arange(1, 6))
-class TestReducedLinearModel(object):
-    def random(self, n):
-        mu_pi = np.random.randn(n)
-        Sigma_pi = invwishart(scale=np.eye(n)).rvs()
-        mu_L = np.random.randn(n)
-        Sigma_L = invwishart(scale=np.eye(n)).rvs()
-        logLmax = np.random.randn()
-
-        return ReducedLinearModel(
-            mu_pi=mu_pi, Sigma_pi=Sigma_pi, logLmax=logLmax, mu_L=mu_L, Sigma_L=Sigma_L
-        )
-
-    def test_bayes_theorem(self, n):
-        model = self.random(n)
-        theta = model.prior().rvs()
-        assert_allclose(
-            model.logP(theta) + model.logZ(), model.logL(theta) + model.logpi(theta)
-        )
-
-
-@pytest.mark.parametrize("n", np.arange(1, 6))
-class TestReducedLinearModelUniformPrior(object):
-    def random(self, n):
-        mu_L = np.random.randn(n)
-        Sigma_L = invwishart(scale=np.eye(n)).rvs()
-        logLmax = np.random.randn()
-        logV = np.random.randn()
-
-        return ReducedLinearModelUniformPrior(
-            logLmax=logLmax, logV=logV, mu_L=mu_L, Sigma_L=Sigma_L
-        )
-
-    def test_model(self, n):
-        model = self.random(n)
-        theta = model.posterior().rvs(N)
-        assert_allclose(
-            model.logpi(theta) + model.logL(theta), model.logP(theta) + model.logZ()
-        )
-
-        logV = 50
-        Sigma_pi = np.exp(2 * logV / n) / (2 * np.pi) * np.eye(n)
-
-        reduced_model = ReducedLinearModel(
-            logLmax=model.logLmax,
-            mu_L=model.mu_L,
-            Sigma_L=model.Sigma_L,
-            Sigma_pi=Sigma_pi,
-        )
-
-        model = ReducedLinearModelUniformPrior(
-            logLmax=model.logLmax, mu_L=model.mu_L, Sigma_L=model.Sigma_L, logV=logV
-        )
-
-        assert_allclose(reduced_model.logZ(), model.logZ())
-        assert_allclose(reduced_model.DKL(), model.DKL())
-
-    def test_bayes_theorem(self, n):
-        model = self.random(n)
-        theta = model.posterior().rvs()
-        assert_allclose(
-            model.logP(theta) + model.logZ(), model.logL(theta) + model.logpi(theta)
-        )
diff --git a/tests/test_stats.py b/tests/test_stats.py
index 0bf83f6..c340d67 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -1,375 +1,499 @@
-# import numpy as np
-# import pytest
-# import scipy.special
-# from numpy.testing import assert_allclose
-# from scipy.stats import invwishart, kstest
-#
-# from lsbi.stats import (
-#    mixture_multivariate_normal,
-#    multimultivariate_normal,
-#    multivariate_normal,
-# )
-#
-# N = 1000
-#
-#
-# @pytest.mark.parametrize("d", [1, 2, 5, 10])
-# @pytest.mark.parametrize("k", [1, 2, 5, 10])
-# class TestMixtureMultivariateNormal(object):
-#    cls = mixture_multivariate_normal
-#
-#    def random(self, k, d):
-#        means = np.random.randn(k, d)
-#        covs = invwishart(scale=np.eye(d), df=d * 10).rvs(k)
-#        if k == 1:
-#            covs = np.array([covs])
-#        logA = np.log(scipy.stats.dirichlet(np.ones(k)).rvs())[0] + 10
-#        return self.cls(means, covs, logA)
-#
-#    def test_rvs(self, k, d):
-#        dist = self.random(k, d)
-#        logA = dist.logA
-#        logA -= scipy.special.logsumexp(logA)
-#        mvns = [
-#            scipy.stats.multivariate_normal(dist.means[i], dist.covs[i])
-#            for i in range(k)
-#        ]
-#
-#        samples_1, logpdfs_1 = [], []
-#        for _ in range(N):
-#            i = np.random.choice(k, p=np.exp(logA))
-#            x = mvns[i].rvs()
-#            samples_1.append(x)
-#            logpdf = scipy.special.logsumexp(
-#                [mvns[j].logpdf(x) + logA[j] for j in range(k)]
-#            )
-#            assert_allclose(logpdf, dist.logpdf(x))
-#            logpdfs_1.append(logpdf)
-#        samples_1, logpdfs_1 = np.array(samples_1), np.array(logpdfs_1)
-#
-#        samples_2 = dist.rvs(N)
-#        logpdfs_2 = dist.logpdf(samples_2)
-#
-#        for i in range(d):
-#            if d == 1:
-#                p = kstest(samples_1, samples_2).pvalue
-#            else:
-#                p = kstest(samples_1[:, i], samples_2[:, i]).pvalue
-#            assert p > 1e-5
-#
-#        p = kstest(logpdfs_1, logpdfs_2).pvalue
-#        assert p > 1e-5
-#
-#        for shape in [(d,), (3, d), (3, 4, d)]:
-#            x = np.random.rand(*shape)
-#            assert mvns[0].logpdf(x).shape == dist.logpdf(x).shape
-#
-#    def test_bijector(self, k, d):
-#        dist = self.random(k, d)
-#
-#        # Test inversion
-#        x = np.random.rand(N, d)
-#        theta = dist.bijector(x)
-#        assert_allclose(dist.bijector(theta, inverse=True), x, atol=1e-6)
-#
-#        # Test sampling
-#        samples = dist.rvs(N)
-#        for i in range(d):
-#            if d == 1:
-#                p = kstest(np.squeeze(theta), samples).pvalue
-#            else:
-#                p = kstest(theta[:, i], samples[:, i]).pvalue
-#            assert p > 1e-5
-#
-#        p = kstest(dist.logpdf(samples), dist.logpdf(theta)).pvalue
-#        assert p > 1e-5
-#
-#        # Test shapes
-#        x = np.random.rand(d)
-#        theta = dist.bijector(x)
-#        assert theta.shape == x.shape
-#        assert dist.bijector(theta, inverse=True).shape == x.shape
-#
-#        x = np.random.rand(3, 4, d)
-#        theta = dist.bijector(x)
-#        assert theta.shape == x.shape
-#        assert dist.bijector(theta, inverse=True).shape == x.shape
-#
-#    @pytest.mark.parametrize("p", np.arange(1, 5))
-#    def test_marginalise_condition(self, k, d, p):
-#        if d <= p:
-#            pytest.skip("d <= p")
-#        i = np.random.choice(d, p, replace=False)
-#        j = np.array([x for x in range(d) if x not in i])
-#        dist = self.random(k, d)
-#        mixture_2 = dist.marginalise(i)
-#        assert isinstance(mixture_2, self.cls)
-#        assert mixture_2.means.shape == (k, d - p)
-#        assert mixture_2.covs.shape == (k, d - p, d - p)
-#        assert_allclose(dist.means[:, j], mixture_2.means)
-#        assert_allclose(dist.covs[:, j][:, :, j], mixture_2.covs)
-#
-#        v = np.random.randn(k, p)
-#        mixture_3 = dist.condition(i, v)
-#        assert isinstance(mixture_3, self.cls)
-#        assert mixture_3.means.shape == (k, d - p)
-#        assert mixture_3.covs.shape == (k, d - p, d - p)
-#
-#        v = np.random.randn(p)
-#        mixture_3 = dist.condition(i, v)
-#        assert mixture_3.means.shape == (k, d - p)
-#        assert mixture_3.covs.shape == (k, d - p, d - p)
-#
-#    @pytest.mark.parametrize("q", [1, 2, 5, 10])
-#    def test_predict(self, q, k, d):
-#        dist = self.random(k, d)
-#        A = np.random.randn(k, q, d)
-#        y = dist.predict(A)
-#        assert isinstance(y, self.cls)
-#        assert y.means.shape == (k, q)
-#        assert y.covs.shape == (k, q, q)
-#
-#        b = np.random.randn(q)
-#        y = dist.predict(A, b)
-#        assert isinstance(y, self.cls)
-#        assert y.means.shape == (
-#            k,
-#            q,
-#        )
-#        assert y.covs.shape == (k, q, q)
-#
-#
-# @pytest.mark.parametrize("d", [1, 2, 5, 10])
-# class TestMultivariateNormal(object):
-#    cls = multivariate_normal
-#
-#    def random(self, d):
-#        mean = np.random.randn(d)
-#        cov = invwishart(scale=np.eye(d), df=d * 10).rvs()
-#        return self.cls(mean, cov)
-#
-#    def test_rvs(self, d):
-#        dist = self.random(d)
-#        mvn = scipy.stats.multivariate_normal(dist.mean, dist.cov)
-#
-#        samples_1 = mvn.rvs(N)
-#        logpdfs_1 = mvn.logpdf(samples_1)
-#        assert_allclose(logpdfs_1, dist.logpdf(samples_1))
-#        samples_2 = dist.rvs(N)
-#        logpdfs_2 = dist.logpdf(samples_2)
-#
-#        for i in range(d):
-#            if d == 1:
-#                p = kstest(samples_1, samples_2).pvalue
-#            else:
-#                p = kstest(samples_1[:, i], samples_2[:, i]).pvalue
-#            assert p > 1e-5
-#
-#        p = kstest(logpdfs_1, logpdfs_2).pvalue
-#        assert p > 1e-5
-#
-#        for shape in [(), (d,), (3, d), (3, 4, d)]:
-#            x = np.random.rand(*shape)
-#            assert mvn.logpdf(x).shape == dist.logpdf(x).shape
-#
-#    def test_bijector(self, d):
-#        dist = self.random(d)
-#        # Test inversion
-#        x = np.random.rand(N, d)
-#        theta = dist.bijector(x)
-#        assert_allclose(dist.bijector(theta, inverse=True), x, atol=1e-6)
-#
-#        # Test sampling
-#        samples = dist.rvs(N)
-#        for i in range(d):
-#            if d == 1:
-#                p = kstest(np.squeeze(theta), samples).pvalue
-#            else:
-#                p = kstest(theta[:, i], samples[:, i]).pvalue
-#            assert p > 1e-5
-#
-#        p = kstest(dist.logpdf(samples), dist.logpdf(theta)).pvalue
-#        assert p > 1e-5
-#
-#        # Test shapes
-#        x = np.random.rand(d)
-#        theta = dist.bijector(x)
-#        assert theta.shape == x.shape
-#        assert dist.bijector(theta, inverse=True).shape == x.shape
-#
-#        x = np.random.rand(3, 4, d)
-#        theta = dist.bijector(x)
-#        assert theta.shape == x.shape
-#        assert dist.bijector(theta, inverse=True).shape == x.shape
-#
-#    @pytest.mark.parametrize("p", np.arange(1, 5))
-#    def test_marginalise_condition_multivariate_normal(self, d, p):
-#        if d <= p:
-#            pytest.skip("d <= p")
-#        i = np.random.choice(d, p, replace=False)
-#        j = np.array([x for x in range(d) if x not in i])
-#        dist_1 = self.random(d)
-#        dist_2 = dist_1.marginalise(i)
-#        assert isinstance(dist_2, self.cls)
-#        assert dist_2.mean.shape == (d - p,)
-#        assert dist_2.cov.shape == (d - p, d - p)
-#        assert_allclose(dist_1.mean[j], dist_2.mean)
-#        assert_allclose(dist_1.cov[j][:, j], dist_2.cov)
-#
-#        v = np.random.randn(p)
-#        dist_3 = dist_1.condition(i, v)
-#        assert isinstance(dist_3, self.cls)
-#        assert dist_3.mean.shape == (d - p,)
-#        assert dist_3.cov.shape == (d - p, d - p)
-#
-#    @pytest.mark.parametrize("q", [1, 2, 5, 10])
-#    def test_predict(self, q, d):
-#        dist = self.random(d)
-#        A = np.random.randn(q, d)
-#        y = dist.predict(A)
-#        assert isinstance(y, self.cls)
-#        assert y.mean.shape == (q,)
-#        assert y.cov.shape == (q, q)
-#
-#        b = np.random.randn(q)
-#        y = dist.predict(A, b)
-#        assert isinstance(y, self.cls)
-#        assert y.mean.shape == (q,)
-#        assert y.cov.shape == (q, q)
-#
-#
-# @pytest.mark.parametrize("d", [1, 2, 5, 10])
-# @pytest.mark.parametrize("k", [1, 2, 5, 10])
-# class TestMultiMultivariateNormal(object):
-#    cls = multimultivariate_normal
-#
-#    def random(self, k, d):
-#        means = np.random.randn(k, d)
-#        covs = invwishart(scale=np.eye(d), df=d * 10).rvs(k)
-#        if k == 1:
-#            covs = np.array([covs])
-#        return self.cls(means, covs)
-#
-#    def test_rvs(self, k, d):
-#        dist = self.random(k, d)
-#        mvns = [
-#            scipy.stats.multivariate_normal(dist.means[i], dist.covs[i])
-#            for i in range(k)
-#        ]
-#
-#        samples_1, logpdfs_1 = [], []
-#        for _ in range(N):
-#            xs = [mvn.rvs() for mvn in mvns]
-#            samples_1.append(xs)
-#            logpdf = [mvn.logpdf(x) for x, mvn in zip(xs, mvns)]
-#            assert_allclose(logpdf, dist.logpdf(xs))
-#            logpdfs_1.append(logpdf)
-#        samples_1, logpdfs_1 = np.array(samples_1), np.array(logpdfs_1)
-#
-#        samples_2 = dist.rvs(N)
-#        if d == 1:
-#            samples_2 = samples_2[..., None]
-#        logpdfs_2 = dist.logpdf(samples_2)
-#
-#        for j in range(k):
-#            for i in range(d):
-#                if k == 1 and d == 1:
-#                    p = kstest(samples_1[:, i], samples_2[:, i]).pvalue
-#                elif k == 1:
-#                    p = kstest(samples_1[:, j, i], samples_2[:, i]).pvalue
-#                elif d == 1:
-#                    p = kstest(samples_1[:, j], samples_2[:, j, i]).pvalue
-#                else:
-#                    p = kstest(samples_1[:, j, i], samples_2[:, j, i]).pvalue
-#                assert p > 1e-5
-#
-#            if k == 1:
-#                p = kstest(logpdfs_1[j], logpdfs_2).pvalue
-#            else:
-#                p = kstest(logpdfs_1[j], logpdfs_2[j]).pvalue
-#            assert p > 1e-5
-#
-#        for shape in [(k, d), (3, k, d), (3, 4, k, d)]:
-#            xs = np.random.rand(*shape)
-#            logpdfs_1 = [mvn.logpdf(xs[..., i, :]) for i, mvn in enumerate(mvns)]
-#            logpdfs_2 = dist.logpdf(xs)
-#            if k == 1:
-#                logpdfs_2 = np.array(logpdfs_2)[..., None]
-#            for j in range(k):
-#                assert np.shape(logpdfs_1[j]) == logpdfs_2[..., j].shape
-#
-#    def test_bijector(self, k, d):
-#        dist = self.random(k, d)
-#
-#        # Test inversion
-#        xs = np.random.rand(N, k, d)
-#        theta = dist.bijector(xs)
-#        assert_allclose(dist.bijector(theta, inverse=True), xs, atol=1e-6)
-#
-#        # Test sampling
-#        samples = dist.rvs(N)
-#        if d == 1:
-#            samples = samples[..., None]
-#        logpdf_1 = dist.logpdf(samples)
-#        logpdf_2 = dist.logpdf(theta)
-#        for j in range(k):
-#            for i in range(d):
-#                if k == 1:
-#                    p = kstest(theta[:, j, i], samples[:, i]).pvalue
-#                else:
-#                    p = kstest(theta[:, j, i], samples[:, j, i]).pvalue
-#                assert p > 1e-5
-#            if k == 1:
-#                p = kstest(logpdf_1, logpdf_2).pvalue
-#            else:
-#                p = kstest(logpdf_1[j], logpdf_2[j]).pvalue
-#            assert p > 1e-5
-#
-#        # Test shapes
-#        xs = np.random.rand(k, d)
-#        theta = dist.bijector(xs)
-#        assert theta.shape == xs.shape
-#        assert dist.bijector(theta, inverse=True).shape == xs.shape
-#
-#        xs = np.random.rand(3, 4, k, d)
-#        theta = dist.bijector(xs)
-#        assert theta.shape == xs.shape
-#        assert dist.bijector(theta, inverse=True).shape == xs.shape
-#
-#    @pytest.mark.parametrize("p", np.arange(1, 5))
-#    def test_marginalise_condition(self, k, d, p):
-#        if d <= p:
-#            pytest.skip("d <= p")
-#        i = np.random.choice(d, p, replace=False)
-#        j = np.array([x for x in range(d) if x not in i])
-#        dist = self.random(k, d)
-#        mixture_2 = dist.marginalise(i)
-#        assert isinstance(mixture_2, self.cls)
-#        assert mixture_2.means.shape == (k, d - p)
-#        assert mixture_2.covs.shape == (k, d - p, d - p)
-#        assert_allclose(dist.means[:, j], mixture_2.means)
-#        assert_allclose(dist.covs[:, j][:, :, j], mixture_2.covs)
-#
-#        v = np.random.randn(k, p)
-#        mixture_3 = dist.condition(i, v)
-#        assert isinstance(mixture_3, self.cls)
-#        assert mixture_3.means.shape == (k, d - p)
-#        assert mixture_3.covs.shape == (k, d - p, d - p)
-#
-#    @pytest.mark.parametrize("q", [1, 2, 5, 10])
-#    def test_predict(self, q, k, d):
-#        dist = self.random(k, d)
-#        A = np.random.randn(k, q, d)
-#        y = dist.predict(A)
-#        assert isinstance(y, self.cls)
-#        assert y.means.shape == (k, q)
-#        assert y.covs.shape == (k, q, q)
-#
-#        b = np.random.randn(q)
-#        y = dist.predict(A, b)
-#        assert isinstance(y, self.cls)
-#        assert y.means.shape == (
-#            k,
-#            q,
-#        )
-#        assert y.covs.shape == (k, q, q)
+import numpy as np
+import pytest
+import scipy
+from numpy.testing import assert_allclose
+from scipy.special import logsumexp
+from scipy.stats import multivariate_normal as scipy_multivariate_normal
+
+from lsbi.stats_1 import mixture_normal, multivariate_normal
+
+shapes = [(2, 3), (3,), ()]
+sizes = [(6, 5), (5,), ()]
+dims = [1, 2, 4]
+pvalue = 1e-7
+
+tests = []
+A_tests = []
+p_tests = []
+
+for dim in dims:
+    for shape in shapes:
+        for mean_shape in shapes + ["scalar"]:
+            for cov_shape in shapes + ["scalar"]:
+                for diagonal_cov in [True, False]:
+                    tests.append((dim, shape, mean_shape, cov_shape, diagonal_cov))
+                    for A_shape in shapes + ["scalar"]:
+                        for diagonal_A in [True, False]:
+                            for b_shape in shapes + ["scalar"]:
+                                for k in dims:
+                                    if (diagonal_A or A_shape == "scalar") and (
+                                        b_shape != "scalar" or k != dim
+                                    ):
+                                        continue
+                                    A_tests.append(
+                                        (
+                                            dim,
+                                            shape,
+                                            mean_shape,
+                                            cov_shape,
+                                            diagonal_cov,
+                                            A_shape,
+                                            diagonal_A,
+                                            b_shape,
+                                            k,
+                                        )
+                                    )
+
+                    for p in dims:
+                        if dim < p:
+                            continue
+                        p_tests.append(
+                            (dim, shape, mean_shape, cov_shape, diagonal_cov, p)
+                        )
+
+
+def flatten(dist):
+    """Convert a multivariate_normal to a list of scipy.stats.multivariate_normal"""
+    mean = np.broadcast_to(dist.mean, dist.shape + (dist.dim,)).reshape(-1, dist.dim)
+    if dist.diagonal_cov:
+        cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim,)).reshape(-1, dist.dim)
+    else:
+        cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim, dist.dim)).reshape(
+            -1, dist.dim, dist.dim
+        )
+
+    flat_dist = [
+        scipy_multivariate_normal(m, c, allow_singular=True)
+        for (m, c) in zip(mean, cov)
+    ]
+    return flat_dist
+
+
+class TestMultivariateNormal(object):
+    cls = multivariate_normal
+
+    def random(self, dim, shape, mean_shape, cov_shape, diagonal_cov):
+        if mean_shape == "scalar":
+            mean = np.random.randn()
+        else:
+            mean = np.random.randn(*mean_shape, dim)
+
+        if cov_shape == "scalar":
+            cov = np.random.randn() ** 2
+        elif diagonal_cov:
+            cov = np.random.randn(*cov_shape, dim) ** 2
+        else:
+            cov = np.random.randn(*cov_shape, dim, dim)
+            cov = np.einsum("...ij,...kj->...ik", cov, cov) + dim * np.eye(dim)
+
+        dist = multivariate_normal(mean, cov, shape, dim, diagonal_cov)
+
+        assert dist.dim == dim
+        assert np.all(dist.mean == mean)
+        assert np.all(dist.cov == cov)
+        return dist
+
+    @pytest.mark.parametrize("size", sizes)
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
+    def test_logpdf(self, dim, shape, mean_shape, cov_shape, diagonal_cov, size):
+        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
+        x = np.random.randn(*size, dim)
+        logpdf = dist.logpdf(x)
+        assert logpdf.shape == size + dist.shape
+
+        flat_dist = flatten(dist)
+        flat_logpdf = np.array([d.logpdf(x) for d in flat_dist])
+        flat_logpdf = np.moveaxis(flat_logpdf, 0, -1).reshape(logpdf.shape)
+        assert_allclose(logpdf, flat_logpdf)
+
+    @pytest.mark.parametrize("size", sizes)
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
+    def test_rvs_shape(self, dim, shape, mean_shape, cov_shape, diagonal_cov, size):
+        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
+        rvs = dist.rvs(size)
+        assert rvs.shape == size + dist.shape + (dim,)
+
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
+    def test_rvs(self, dim, shape, mean_shape, cov_shape, diagonal_cov):
+        size = 100
+        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
+        rvs = dist.rvs(size)
+
+        mean = np.broadcast_to(dist.mean, dist.shape + (dist.dim,)).reshape(
+            -1, dist.dim
+        )
+        if dist.diagonal_cov:
+            cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim,)).reshape(
+                -1, dist.dim
+            )
+        else:
+            cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim, dist.dim)).reshape(
+                -1, dist.dim, dist.dim
+            )
+
+        rvs_ = np.array(
+            [
+                scipy_multivariate_normal(ms, cs, allow_singular=True).rvs(size)
+                for ms, cs in zip(mean, cov)
+            ]
+        ).reshape(-1, size, dim)
+
+        rvs = np.moveaxis(rvs.reshape(size, -1, dim), 1, 0)
+
+        for a, b in zip(rvs, rvs_):
+            for i in range(dim):
+                assert scipy.stats.kstest(a[:, i], b[:, i]).pvalue > pvalue
+
+    @pytest.mark.parametrize(
+        "dim, shape, mean_shape, cov_shape, diagonal_cov, A_shape, diagonal_A, b_shape, k",
+        A_tests,
+    )
+    def test_predict(
+        self,
+        dim,
+        shape,
+        mean_shape,
+        cov_shape,
+        diagonal_cov,
+        k,
+        A_shape,
+        diagonal_A,
+        b_shape,
+    ):
+        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
+
+        if b_shape == "scalar":
+            b = np.random.randn()
+        else:
+            b = np.random.randn(*b_shape, k)
+
+        if A_shape == "scalar":
+            A = np.random.randn()
+        elif diagonal_A:
+            A = np.random.randn(*A_shape, dim)
+        else:
+            A = np.random.randn(*A_shape, k, dim)
+
+        dist_2 = dist.predict(A, b, diagonal_A)
+        assert isinstance(dist_2, self.cls)
+        assert dist_2.shape == np.broadcast_shapes(
+            dist.shape, np.shape(A)[: -2 + diagonal_A], np.shape(b)[:-1]
+        )
+        assert np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov] == np.broadcast_shapes(
+            np.shape(dist.cov)[: -2 + diagonal_cov], np.shape(A)[: -2 + diagonal_A]
+        )
+        assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
+            np.shape(dist.mean)[:-1], np.shape(A)[: -2 + diagonal_A], np.shape(b)[:-1]
+        )
+        assert dist_2.dim == k
+
+        dist_2 = dist.predict(A, diagonal_A=diagonal_A)
+        assert isinstance(dist_2, self.cls)
+        assert dist_2.shape == np.broadcast_shapes(
+            dist.shape, np.shape(A)[: -2 + diagonal_A]
+        )
+        assert np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov] == np.broadcast_shapes(
+            np.shape(dist.cov)[: -2 + diagonal_cov], np.shape(A)[: -2 + diagonal_A]
+        )
+        assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
+            np.shape(dist.mean)[:-1], np.shape(A)[: -2 + diagonal_A]
+        )
+        assert dist_2.dim == k
+
+    @pytest.mark.parametrize(
+        "dim, shape, mean_shape, cov_shape, diagonal_cov, p", p_tests
+    )
+    def test_marginalise(self, dim, shape, mean_shape, cov_shape, diagonal_cov, p):
+        indices = np.random.choice(dim, p, replace=False)
+        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
+        dist_2 = dist.marginalise(indices)
+
+        assert isinstance(dist_2, self.cls)
+        assert dist_2.shape == dist.shape
+        assert (
+            np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov]
+            == np.shape(dist.cov)[: -2 + diagonal_cov]
+        )
+        assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
+        assert dist_2.dim == dim - p
+
+    @pytest.mark.parametrize("values_shape", shapes)
+    @pytest.mark.parametrize(
+        "dim, shape, mean_shape, cov_shape, diagonal_cov, p", p_tests
+    )
+    def test_condition(
+        self, dim, shape, mean_shape, cov_shape, diagonal_cov, p, values_shape
+    ):
+        indices = np.random.choice(dim, p, replace=False)
+        values = np.random.randn(*values_shape, p)
+        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
+        dist_2 = dist.condition(indices, values)
+
+        assert isinstance(dist_2, self.cls)
+        assert dist_2.shape == np.broadcast_shapes(dist.shape, values_shape)
+        assert (
+            np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov]
+            == np.shape(dist.cov)[: -2 + diagonal_cov]
+        )
+        if cov_shape == "scalar" or diagonal_cov:
+            assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
+        else:
+            assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
+                np.shape(dist.mean)[:-1],
+                np.shape(dist.cov)[: -2 + diagonal_cov],
+                values_shape,
+            )
+        assert dist_2.dim == dim - p
+
+    @pytest.mark.parametrize("x_shape", shapes)
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
+    def test_bijector(self, dim, shape, mean_shape, cov_shape, diagonal_cov, x_shape):
+        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
+        x = np.random.rand(*x_shape, dim)
+        y = dist.bijector(x)
+        assert y.shape == np.broadcast_shapes(dist.shape + (dim,), x.shape)
+
+        y = np.random.rand(*x_shape, dim)
+        x = dist.bijector(y, inverse=True)
+
+        assert x.shape == np.broadcast_shapes(dist.shape + (dim,), x.shape)
+
+        x = np.random.rand(*x_shape, dim)
+        y = dist.bijector(x)
+        assert_allclose(np.broadcast_to(x, y.shape), dist.bijector(y, inverse=True))
+
+
+@pytest.mark.parametrize("logA_shape", shapes)
+class TestMixtureNormal(TestMultivariateNormal):
+    cls = mixture_normal
+
+    def random(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov):
+        dist = super().random(dim, shape, mean_shape, cov_shape, diagonal_cov)
+        logA = np.random.randn(*logA_shape)
+        dist = mixture_normal(
+            logA, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal_cov
+        )
+        assert np.all(dist.logA == logA)
+        return dist
+
+    @pytest.mark.parametrize("size", sizes)
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
+    def test_logpdf(
+        self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov, size
+    ):
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
+        x = np.random.randn(*size, dim)
+        logpdf = dist.logpdf(x)
+        assert logpdf.shape == size + dist.shape[:-1]
+
+        logA = np.broadcast_to(dist.logA, dist.shape).reshape(-1, dist.k).copy()
+        logA -= logsumexp(logA, axis=-1, keepdims=True)
+        mean = np.broadcast_to(dist.mean, dist.shape + (dist.dim,)).reshape(
+            -1, dist.k, dist.dim
+        )
+        if dist.diagonal_cov:
+            cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim,)).reshape(
+                -1, dist.k, dist.dim
+            )
+        else:
+            cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim, dist.dim)).reshape(
+                -1, dist.k, dist.dim, dist.dim
+            )
+
+        flat_dist = [
+            [
+                scipy_multivariate_normal(m, c, allow_singular=True)
+                for (m, c) in zip(ms, cs)
+            ]
+            for (ms, cs) in zip(mean, cov)
+        ]
+        flat_logpdf = np.array(
+            [
+                logsumexp([la + d.logpdf(x) for la, d in zip(las, ds)], axis=0)
+                for las, ds in zip(logA, flat_dist)
+            ]
+        )
+        flat_logpdf = np.moveaxis(flat_logpdf, 0, -1).reshape(logpdf.shape)
+        assert_allclose(logpdf, flat_logpdf)
+
+    @pytest.mark.parametrize("size", sizes)
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
+    def test_rvs_shape(
+        self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov, size
+    ):
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
+        rvs = dist.rvs(size)
+        assert rvs.shape == size + dist.shape[:-1] + (dim,)
+
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
+    def test_rvs(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov):
+        size = 100
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
+        rvs = dist.rvs(size)
+        logA = np.broadcast_to(dist.logA, dist.shape).reshape(-1, dist.k).copy()
+        logA -= logsumexp(logA, axis=-1, keepdims=True)
+        p = np.exp(logA)
+        mean = np.broadcast_to(dist.mean, dist.shape + (dist.dim,)).reshape(
+            -1, dist.k, dist.dim
+        )
+        if dist.diagonal_cov:
+            cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim,)).reshape(
+                -1, dist.k, dist.dim
+            )
+        else:
+            cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim, dist.dim)).reshape(
+                -1, dist.k, dist.dim, dist.dim
+            )
+
+        rvs_ = np.array(
+            [
+                [
+                    scipy_multivariate_normal(ms[j], cs[j], allow_singular=True).rvs()
+                    for j in np.random.choice(len(ms), p=ps, size=size)
+                ]
+                for ms, cs, ps in zip(mean, cov, p)
+            ]
+        ).reshape(-1, size, dim)
+        rvs = np.moveaxis(rvs, -2, 0).reshape(-1, size, dim)
+
+        for a, b in zip(rvs, rvs_):
+            for i in range(dim):
+                assert scipy.stats.kstest(a[:, i], b[:, i]).pvalue > pvalue
+
+    @pytest.mark.parametrize(
+        "dim, shape, mean_shape, cov_shape, diagonal_cov, A_shape, diagonal_A, b_shape, k",
+        A_tests,
+    )
+    def test_predict(
+        self,
+        dim,
+        shape,
+        logA_shape,
+        mean_shape,
+        cov_shape,
+        diagonal_cov,
+        A_shape,
+        diagonal_A,
+        b_shape,
+        k,
+    ):
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
+
+        if b_shape == "scalar":
+            b = np.random.randn()
+        else:
+            b = np.random.randn(*b_shape, k)
+
+        if A_shape == "scalar":
+            A = np.random.randn()
+        elif diagonal_A:
+            A = np.random.randn(*A_shape, dim)
+        else:
+            A = np.random.randn(*A_shape, k, dim)
+
+        dist_2 = dist.predict(A, b, diagonal_A)
+        assert isinstance(dist_2, self.cls)
+        assert dist_2.shape == np.broadcast_shapes(
+            dist.shape,
+            np.shape(A)[: -2 + diagonal_A],
+            np.shape(b)[:-1],
+        )
+        assert np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov] == np.broadcast_shapes(
+            np.shape(dist.cov)[: -2 + diagonal_cov], np.shape(A)[: -2 + diagonal_A]
+        )
+        assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
+            np.shape(dist.mean)[:-1],
+            np.shape(A)[: -2 + diagonal_A],
+            np.shape(b)[:-1],
+        )
+        assert dist_2.dim == k
+
+        dist_2 = dist.predict(A, diagonal_A=diagonal_A)
+        assert isinstance(dist_2, self.cls)
+        assert dist_2.shape == np.broadcast_shapes(
+            dist.shape, np.shape(A)[: -2 + diagonal_A]
+        )
+        assert np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov] == np.broadcast_shapes(
+            np.shape(dist.cov)[: -2 + diagonal_cov], np.shape(A)[: -2 + diagonal_A]
+        )
+        assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
+            np.shape(dist.mean)[:-1], np.shape(A)[: -2 + diagonal_A]
+        )
+        assert dist_2.dim == k
+
+    @pytest.mark.parametrize(
+        "dim, shape, mean_shape, cov_shape, diagonal_cov, p", p_tests
+    )
+    def test_marginalise(
+        self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov, p
+    ):
+        indices = np.random.choice(dim, p, replace=False)
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
+        dist_2 = dist.marginalise(indices)
+
+        assert isinstance(dist_2, self.cls)
+        assert dist_2.shape == dist.shape
+        assert (
+            np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov]
+            == np.shape(dist.cov)[: -2 + diagonal_cov]
+        )
+        assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
+        assert np.shape(dist_2.logA) == np.shape(dist.logA)
+        assert dist_2.dim == dim - p
+
+    @pytest.mark.parametrize("values_shape", shapes)
+    @pytest.mark.parametrize(
+        "dim, shape, mean_shape, cov_shape, diagonal_cov, p", p_tests
+    )
+    def test_condition(
+        self,
+        dim,
+        shape,
+        logA_shape,
+        mean_shape,
+        cov_shape,
+        diagonal_cov,
+        p,
+        values_shape,
+    ):
+        indices = np.random.choice(dim, p, replace=False)
+        values = np.random.randn(*values_shape[:-1], p)
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
+        dist_2 = dist.condition(indices, values)
+
+        assert isinstance(dist_2, self.cls)
+        assert dist_2.shape == np.broadcast_shapes(dist.shape, values_shape[:-1] + (1,))
+        assert (
+            np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov]
+            == np.shape(dist.cov)[: -2 + diagonal_cov]
+        )
+        if cov_shape == "scalar" or diagonal_cov:
+            assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
+        else:
+            assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
+                np.shape(dist.mean)[:-1],
+                np.shape(dist.cov)[: -2 + diagonal_cov],
+                values_shape[:-1] + (1,),
+            )
+        assert np.shape(dist_2.logA) == dist_2.shape
+        assert dist_2.dim == dim - p
+
+    @pytest.mark.parametrize("x_shape", shapes)
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
+    def test_bijector(
+        self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov, x_shape
+    ):
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
+        x = np.random.rand(*x_shape[:-1], dim)
+        y = dist.bijector(x)
+        assert y.shape == np.broadcast_shapes(x.shape, dist.shape[:-1] + (dim,))
+
+        y = np.random.rand(*x_shape[:-1], dim)
+        x = dist.bijector(y, inverse=True)
+        assert x.shape == np.broadcast_shapes(y.shape, dist.shape[:-1] + (dim,))
+
+        x = np.random.rand(*x_shape[:-1], dim)
+        y = dist.bijector(x)
+        assert_allclose(
+            np.broadcast_to(x, y.shape), dist.bijector(y, inverse=True), atol=1e-4
+        )
diff --git a/tests/test_stats_1.py b/tests/test_stats_1.py
deleted file mode 100644
index c340d67..0000000
--- a/tests/test_stats_1.py
+++ /dev/null
@@ -1,499 +0,0 @@
-import numpy as np
-import pytest
-import scipy
-from numpy.testing import assert_allclose
-from scipy.special import logsumexp
-from scipy.stats import multivariate_normal as scipy_multivariate_normal
-
-from lsbi.stats_1 import mixture_normal, multivariate_normal
-
-shapes = [(2, 3), (3,), ()]
-sizes = [(6, 5), (5,), ()]
-dims = [1, 2, 4]
-pvalue = 1e-7
-
-tests = []
-A_tests = []
-p_tests = []
-
-for dim in dims:
-    for shape in shapes:
-        for mean_shape in shapes + ["scalar"]:
-            for cov_shape in shapes + ["scalar"]:
-                for diagonal_cov in [True, False]:
-                    tests.append((dim, shape, mean_shape, cov_shape, diagonal_cov))
-                    for A_shape in shapes + ["scalar"]:
-                        for diagonal_A in [True, False]:
-                            for b_shape in shapes + ["scalar"]:
-                                for k in dims:
-                                    if (diagonal_A or A_shape == "scalar") and (
-                                        b_shape != "scalar" or k != dim
-                                    ):
-                                        continue
-                                    A_tests.append(
-                                        (
-                                            dim,
-                                            shape,
-                                            mean_shape,
-                                            cov_shape,
-                                            diagonal_cov,
-                                            A_shape,
-                                            diagonal_A,
-                                            b_shape,
-                                            k,
-                                        )
-                                    )
-
-                    for p in dims:
-                        if dim < p:
-                            continue
-                        p_tests.append(
-                            (dim, shape, mean_shape, cov_shape, diagonal_cov, p)
-                        )
-
-
-def flatten(dist):
-    """Convert a multivariate_normal to a list of scipy.stats.multivariate_normal"""
-    mean = np.broadcast_to(dist.mean, dist.shape + (dist.dim,)).reshape(-1, dist.dim)
-    if dist.diagonal_cov:
-        cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim,)).reshape(-1, dist.dim)
-    else:
-        cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim, dist.dim)).reshape(
-            -1, dist.dim, dist.dim
-        )
-
-    flat_dist = [
-        scipy_multivariate_normal(m, c, allow_singular=True)
-        for (m, c) in zip(mean, cov)
-    ]
-    return flat_dist
-
-
-class TestMultivariateNormal(object):
-    cls = multivariate_normal
-
-    def random(self, dim, shape, mean_shape, cov_shape, diagonal_cov):
-        if mean_shape == "scalar":
-            mean = np.random.randn()
-        else:
-            mean = np.random.randn(*mean_shape, dim)
-
-        if cov_shape == "scalar":
-            cov = np.random.randn() ** 2
-        elif diagonal_cov:
-            cov = np.random.randn(*cov_shape, dim) ** 2
-        else:
-            cov = np.random.randn(*cov_shape, dim, dim)
-            cov = np.einsum("...ij,...kj->...ik", cov, cov) + dim * np.eye(dim)
-
-        dist = multivariate_normal(mean, cov, shape, dim, diagonal_cov)
-
-        assert dist.dim == dim
-        assert np.all(dist.mean == mean)
-        assert np.all(dist.cov == cov)
-        return dist
-
-    @pytest.mark.parametrize("size", sizes)
-    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
-    def test_logpdf(self, dim, shape, mean_shape, cov_shape, diagonal_cov, size):
-        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
-        x = np.random.randn(*size, dim)
-        logpdf = dist.logpdf(x)
-        assert logpdf.shape == size + dist.shape
-
-        flat_dist = flatten(dist)
-        flat_logpdf = np.array([d.logpdf(x) for d in flat_dist])
-        flat_logpdf = np.moveaxis(flat_logpdf, 0, -1).reshape(logpdf.shape)
-        assert_allclose(logpdf, flat_logpdf)
-
-    @pytest.mark.parametrize("size", sizes)
-    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
-    def test_rvs_shape(self, dim, shape, mean_shape, cov_shape, diagonal_cov, size):
-        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
-        rvs = dist.rvs(size)
-        assert rvs.shape == size + dist.shape + (dim,)
-
-    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
-    def test_rvs(self, dim, shape, mean_shape, cov_shape, diagonal_cov):
-        size = 100
-        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
-        rvs = dist.rvs(size)
-
-        mean = np.broadcast_to(dist.mean, dist.shape + (dist.dim,)).reshape(
-            -1, dist.dim
-        )
-        if dist.diagonal_cov:
-            cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim,)).reshape(
-                -1, dist.dim
-            )
-        else:
-            cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim, dist.dim)).reshape(
-                -1, dist.dim, dist.dim
-            )
-
-        rvs_ = np.array(
-            [
-                scipy_multivariate_normal(ms, cs, allow_singular=True).rvs(size)
-                for ms, cs in zip(mean, cov)
-            ]
-        ).reshape(-1, size, dim)
-
-        rvs = np.moveaxis(rvs.reshape(size, -1, dim), 1, 0)
-
-        for a, b in zip(rvs, rvs_):
-            for i in range(dim):
-                assert scipy.stats.kstest(a[:, i], b[:, i]).pvalue > pvalue
-
-    @pytest.mark.parametrize(
-        "dim, shape, mean_shape, cov_shape, diagonal_cov, A_shape, diagonal_A, b_shape, k",
-        A_tests,
-    )
-    def test_predict(
-        self,
-        dim,
-        shape,
-        mean_shape,
-        cov_shape,
-        diagonal_cov,
-        k,
-        A_shape,
-        diagonal_A,
-        b_shape,
-    ):
-        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
-
-        if b_shape == "scalar":
-            b = np.random.randn()
-        else:
-            b = np.random.randn(*b_shape, k)
-
-        if A_shape == "scalar":
-            A = np.random.randn()
-        elif diagonal_A:
-            A = np.random.randn(*A_shape, dim)
-        else:
-            A = np.random.randn(*A_shape, k, dim)
-
-        dist_2 = dist.predict(A, b, diagonal_A)
-        assert isinstance(dist_2, self.cls)
-        assert dist_2.shape == np.broadcast_shapes(
-            dist.shape, np.shape(A)[: -2 + diagonal_A], np.shape(b)[:-1]
-        )
-        assert np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov] == np.broadcast_shapes(
-            np.shape(dist.cov)[: -2 + diagonal_cov], np.shape(A)[: -2 + diagonal_A]
-        )
-        assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
-            np.shape(dist.mean)[:-1], np.shape(A)[: -2 + diagonal_A], np.shape(b)[:-1]
-        )
-        assert dist_2.dim == k
-
-        dist_2 = dist.predict(A, diagonal_A=diagonal_A)
-        assert isinstance(dist_2, self.cls)
-        assert dist_2.shape == np.broadcast_shapes(
-            dist.shape, np.shape(A)[: -2 + diagonal_A]
-        )
-        assert np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov] == np.broadcast_shapes(
-            np.shape(dist.cov)[: -2 + diagonal_cov], np.shape(A)[: -2 + diagonal_A]
-        )
-        assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
-            np.shape(dist.mean)[:-1], np.shape(A)[: -2 + diagonal_A]
-        )
-        assert dist_2.dim == k
-
-    @pytest.mark.parametrize(
-        "dim, shape, mean_shape, cov_shape, diagonal_cov, p", p_tests
-    )
-    def test_marginalise(self, dim, shape, mean_shape, cov_shape, diagonal_cov, p):
-        indices = np.random.choice(dim, p, replace=False)
-        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
-        dist_2 = dist.marginalise(indices)
-
-        assert isinstance(dist_2, self.cls)
-        assert dist_2.shape == dist.shape
-        assert (
-            np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov]
-            == np.shape(dist.cov)[: -2 + diagonal_cov]
-        )
-        assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
-        assert dist_2.dim == dim - p
-
-    @pytest.mark.parametrize("values_shape", shapes)
-    @pytest.mark.parametrize(
-        "dim, shape, mean_shape, cov_shape, diagonal_cov, p", p_tests
-    )
-    def test_condition(
-        self, dim, shape, mean_shape, cov_shape, diagonal_cov, p, values_shape
-    ):
-        indices = np.random.choice(dim, p, replace=False)
-        values = np.random.randn(*values_shape, p)
-        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
-        dist_2 = dist.condition(indices, values)
-
-        assert isinstance(dist_2, self.cls)
-        assert dist_2.shape == np.broadcast_shapes(dist.shape, values_shape)
-        assert (
-            np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov]
-            == np.shape(dist.cov)[: -2 + diagonal_cov]
-        )
-        if cov_shape == "scalar" or diagonal_cov:
-            assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
-        else:
-            assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
-                np.shape(dist.mean)[:-1],
-                np.shape(dist.cov)[: -2 + diagonal_cov],
-                values_shape,
-            )
-        assert dist_2.dim == dim - p
-
-    @pytest.mark.parametrize("x_shape", shapes)
-    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
-    def test_bijector(self, dim, shape, mean_shape, cov_shape, diagonal_cov, x_shape):
-        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
-        x = np.random.rand(*x_shape, dim)
-        y = dist.bijector(x)
-        assert y.shape == np.broadcast_shapes(dist.shape + (dim,), x.shape)
-
-        y = np.random.rand(*x_shape, dim)
-        x = dist.bijector(y, inverse=True)
-
-        assert x.shape == np.broadcast_shapes(dist.shape + (dim,), x.shape)
-
-        x = np.random.rand(*x_shape, dim)
-        y = dist.bijector(x)
-        assert_allclose(np.broadcast_to(x, y.shape), dist.bijector(y, inverse=True))
-
-
-@pytest.mark.parametrize("logA_shape", shapes)
-class TestMixtureNormal(TestMultivariateNormal):
-    cls = mixture_normal
-
-    def random(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov):
-        dist = super().random(dim, shape, mean_shape, cov_shape, diagonal_cov)
-        logA = np.random.randn(*logA_shape)
-        dist = mixture_normal(
-            logA, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal_cov
-        )
-        assert np.all(dist.logA == logA)
-        return dist
-
-    @pytest.mark.parametrize("size", sizes)
-    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
-    def test_logpdf(
-        self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov, size
-    ):
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
-        x = np.random.randn(*size, dim)
-        logpdf = dist.logpdf(x)
-        assert logpdf.shape == size + dist.shape[:-1]
-
-        logA = np.broadcast_to(dist.logA, dist.shape).reshape(-1, dist.k).copy()
-        logA -= logsumexp(logA, axis=-1, keepdims=True)
-        mean = np.broadcast_to(dist.mean, dist.shape + (dist.dim,)).reshape(
-            -1, dist.k, dist.dim
-        )
-        if dist.diagonal_cov:
-            cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim,)).reshape(
-                -1, dist.k, dist.dim
-            )
-        else:
-            cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim, dist.dim)).reshape(
-                -1, dist.k, dist.dim, dist.dim
-            )
-
-        flat_dist = [
-            [
-                scipy_multivariate_normal(m, c, allow_singular=True)
-                for (m, c) in zip(ms, cs)
-            ]
-            for (ms, cs) in zip(mean, cov)
-        ]
-        flat_logpdf = np.array(
-            [
-                logsumexp([la + d.logpdf(x) for la, d in zip(las, ds)], axis=0)
-                for las, ds in zip(logA, flat_dist)
-            ]
-        )
-        flat_logpdf = np.moveaxis(flat_logpdf, 0, -1).reshape(logpdf.shape)
-        assert_allclose(logpdf, flat_logpdf)
-
-    @pytest.mark.parametrize("size", sizes)
-    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
-    def test_rvs_shape(
-        self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov, size
-    ):
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
-        rvs = dist.rvs(size)
-        assert rvs.shape == size + dist.shape[:-1] + (dim,)
-
-    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
-    def test_rvs(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov):
-        size = 100
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
-        rvs = dist.rvs(size)
-        logA = np.broadcast_to(dist.logA, dist.shape).reshape(-1, dist.k).copy()
-        logA -= logsumexp(logA, axis=-1, keepdims=True)
-        p = np.exp(logA)
-        mean = np.broadcast_to(dist.mean, dist.shape + (dist.dim,)).reshape(
-            -1, dist.k, dist.dim
-        )
-        if dist.diagonal_cov:
-            cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim,)).reshape(
-                -1, dist.k, dist.dim
-            )
-        else:
-            cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim, dist.dim)).reshape(
-                -1, dist.k, dist.dim, dist.dim
-            )
-
-        rvs_ = np.array(
-            [
-                [
-                    scipy_multivariate_normal(ms[j], cs[j], allow_singular=True).rvs()
-                    for j in np.random.choice(len(ms), p=ps, size=size)
-                ]
-                for ms, cs, ps in zip(mean, cov, p)
-            ]
-        ).reshape(-1, size, dim)
-        rvs = np.moveaxis(rvs, -2, 0).reshape(-1, size, dim)
-
-        for a, b in zip(rvs, rvs_):
-            for i in range(dim):
-                assert scipy.stats.kstest(a[:, i], b[:, i]).pvalue > pvalue
-
-    @pytest.mark.parametrize(
-        "dim, shape, mean_shape, cov_shape, diagonal_cov, A_shape, diagonal_A, b_shape, k",
-        A_tests,
-    )
-    def test_predict(
-        self,
-        dim,
-        shape,
-        logA_shape,
-        mean_shape,
-        cov_shape,
-        diagonal_cov,
-        A_shape,
-        diagonal_A,
-        b_shape,
-        k,
-    ):
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
-
-        if b_shape == "scalar":
-            b = np.random.randn()
-        else:
-            b = np.random.randn(*b_shape, k)
-
-        if A_shape == "scalar":
-            A = np.random.randn()
-        elif diagonal_A:
-            A = np.random.randn(*A_shape, dim)
-        else:
-            A = np.random.randn(*A_shape, k, dim)
-
-        dist_2 = dist.predict(A, b, diagonal_A)
-        assert isinstance(dist_2, self.cls)
-        assert dist_2.shape == np.broadcast_shapes(
-            dist.shape,
-            np.shape(A)[: -2 + diagonal_A],
-            np.shape(b)[:-1],
-        )
-        assert np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov] == np.broadcast_shapes(
-            np.shape(dist.cov)[: -2 + diagonal_cov], np.shape(A)[: -2 + diagonal_A]
-        )
-        assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
-            np.shape(dist.mean)[:-1],
-            np.shape(A)[: -2 + diagonal_A],
-            np.shape(b)[:-1],
-        )
-        assert dist_2.dim == k
-
-        dist_2 = dist.predict(A, diagonal_A=diagonal_A)
-        assert isinstance(dist_2, self.cls)
-        assert dist_2.shape == np.broadcast_shapes(
-            dist.shape, np.shape(A)[: -2 + diagonal_A]
-        )
-        assert np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov] == np.broadcast_shapes(
-            np.shape(dist.cov)[: -2 + diagonal_cov], np.shape(A)[: -2 + diagonal_A]
-        )
-        assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
-            np.shape(dist.mean)[:-1], np.shape(A)[: -2 + diagonal_A]
-        )
-        assert dist_2.dim == k
-
-    @pytest.mark.parametrize(
-        "dim, shape, mean_shape, cov_shape, diagonal_cov, p", p_tests
-    )
-    def test_marginalise(
-        self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov, p
-    ):
-        indices = np.random.choice(dim, p, replace=False)
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
-        dist_2 = dist.marginalise(indices)
-
-        assert isinstance(dist_2, self.cls)
-        assert dist_2.shape == dist.shape
-        assert (
-            np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov]
-            == np.shape(dist.cov)[: -2 + diagonal_cov]
-        )
-        assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
-        assert np.shape(dist_2.logA) == np.shape(dist.logA)
-        assert dist_2.dim == dim - p
-
-    @pytest.mark.parametrize("values_shape", shapes)
-    @pytest.mark.parametrize(
-        "dim, shape, mean_shape, cov_shape, diagonal_cov, p", p_tests
-    )
-    def test_condition(
-        self,
-        dim,
-        shape,
-        logA_shape,
-        mean_shape,
-        cov_shape,
-        diagonal_cov,
-        p,
-        values_shape,
-    ):
-        indices = np.random.choice(dim, p, replace=False)
-        values = np.random.randn(*values_shape[:-1], p)
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
-        dist_2 = dist.condition(indices, values)
-
-        assert isinstance(dist_2, self.cls)
-        assert dist_2.shape == np.broadcast_shapes(dist.shape, values_shape[:-1] + (1,))
-        assert (
-            np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov]
-            == np.shape(dist.cov)[: -2 + diagonal_cov]
-        )
-        if cov_shape == "scalar" or diagonal_cov:
-            assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
-        else:
-            assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
-                np.shape(dist.mean)[:-1],
-                np.shape(dist.cov)[: -2 + diagonal_cov],
-                values_shape[:-1] + (1,),
-            )
-        assert np.shape(dist_2.logA) == dist_2.shape
-        assert dist_2.dim == dim - p
-
-    @pytest.mark.parametrize("x_shape", shapes)
-    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
-    def test_bijector(
-        self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov, x_shape
-    ):
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
-        x = np.random.rand(*x_shape[:-1], dim)
-        y = dist.bijector(x)
-        assert y.shape == np.broadcast_shapes(x.shape, dist.shape[:-1] + (dim,))
-
-        y = np.random.rand(*x_shape[:-1], dim)
-        x = dist.bijector(y, inverse=True)
-        assert x.shape == np.broadcast_shapes(y.shape, dist.shape[:-1] + (dim,))
-
-        x = np.random.rand(*x_shape[:-1], dim)
-        y = dist.bijector(x)
-        assert_allclose(
-            np.broadcast_to(x, y.shape), dist.bijector(y, inverse=True), atol=1e-4
-        )

From b80bba14a7a5a9f11f92911856cf6fffe85c763d Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Tue, 6 Feb 2024 18:19:30 +0000
Subject: [PATCH 051/117] Corrected tests

---
 lsbi/model.py       | 2 +-
 tests/test_model.py | 2 +-
 tests/test_stats.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index c39b624..2f33b2b 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -2,7 +2,7 @@
 import numpy as np
 from numpy.linalg import inv, solve
 
-from lsbi.stats_1 import mixture_normal, multivariate_normal
+from lsbi.stats import mixture_normal, multivariate_normal
 from lsbi.utils import logdet, matrix
 
 
diff --git a/tests/test_model.py b/tests/test_model.py
index d3b937a..3a408b5 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -10,7 +10,7 @@ def assert_allclose_broadcast(a, b, *args, **kwargs):
     )
 
 
-from lsbi.model_1 import (
+from lsbi.model import (
     LinearModel,
     MixtureModel,
     ReducedLinearModel,
diff --git a/tests/test_stats.py b/tests/test_stats.py
index c340d67..5fc7bf2 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -5,7 +5,7 @@
 from scipy.special import logsumexp
 from scipy.stats import multivariate_normal as scipy_multivariate_normal
 
-from lsbi.stats_1 import mixture_normal, multivariate_normal
+from lsbi.stats import mixture_normal, multivariate_normal
 
 shapes = [(2, 3), (3,), ()]
 sizes = [(6, 5), (5,), ()]

From 1a575802f2d42079378595cd39cd2b1f4f3888dd Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Tue, 6 Feb 2024 18:37:21 +0000
Subject: [PATCH 052/117] Corrected imports for Reduced classes

---
 lsbi/stats.py       |  1 +
 tests/test_model.py | 18 ++++++++++--------
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/lsbi/stats.py b/lsbi/stats.py
index 71ec517..0d09b7b 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -520,5 +520,6 @@ def f(t):
 
     def __getitem__(self, arg):  # noqa: D105
         dist = super().__getitem__(arg)
+        dist.__class__ = mixture_normal
         dist.logA = np.broadcast_to(self.logA, self.shape)[arg]
         return dist
diff --git a/tests/test_model.py b/tests/test_model.py
index 3a408b5..2868433 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -1,14 +1,7 @@
 import numpy as np
 import pytest
 from numpy.testing import assert_allclose
-
-
-def assert_allclose_broadcast(a, b, *args, **kwargs):
-    shape = np.broadcast_shapes(np.shape(a), np.shape(b))
-    return assert_allclose(
-        np.broadcast_to(a, shape), np.broadcast_to(b, shape), *args, **kwargs
-    )
-
+from scipy.stats import invwishart
 
 from lsbi.model import (
     LinearModel,
@@ -18,8 +11,17 @@ def assert_allclose_broadcast(a, b, *args, **kwargs):
     _de_diagonalise,
 )
 
+
+def assert_allclose_broadcast(a, b, *args, **kwargs):
+    shape = np.broadcast_shapes(np.shape(a), np.shape(b))
+    return assert_allclose(
+        np.broadcast_to(a, shape), np.broadcast_to(b, shape), *args, **kwargs
+    )
+
+
 shapes = [(2, 3), (3,), ()]
 dims = [1, 2, 4]
+N = 1000
 
 tests = []
 for d in dims:

From 3a6cd8735d5daf3ec834a576af88b3340904d69e Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Tue, 6 Feb 2024 18:43:16 +0000
Subject: [PATCH 053/117] Updated for black 24.1.1

---
 lsbi/__init__.py | 1 +
 lsbi/model.py    | 1 +
 lsbi/network.py  | 1 +
 lsbi/stats.py    | 1 +
 lsbi/utils.py    | 1 +
 5 files changed, 5 insertions(+)

diff --git a/lsbi/__init__.py b/lsbi/__init__.py
index 819dd94..013722b 100644
--- a/lsbi/__init__.py
+++ b/lsbi/__init__.py
@@ -1,2 +1,3 @@
 """lsbi: Linear Simulation Based Inference."""
+
 from lsbi._version import __version__  # noqa: F401
diff --git a/lsbi/model.py b/lsbi/model.py
index 2f33b2b..25a4aef 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -1,4 +1,5 @@
 """Gaussian models for linear Bayesian inference."""
+
 import numpy as np
 from numpy.linalg import inv, solve
 
diff --git a/lsbi/network.py b/lsbi/network.py
index e87498e..ad3c0ed 100644
--- a/lsbi/network.py
+++ b/lsbi/network.py
@@ -1,4 +1,5 @@
 """Simple binary classifiers to perform model comparison."""
+
 import torch
 import torch.nn as nn
 import torch.optim as optim
diff --git a/lsbi/stats.py b/lsbi/stats.py
index 0d09b7b..085710c 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -1,4 +1,5 @@
 """Extensions to scipy.stats functions."""
+
 from copy import deepcopy
 
 import numpy as np
diff --git a/lsbi/utils.py b/lsbi/utils.py
index 082d1ca..b7cda6a 100644
--- a/lsbi/utils.py
+++ b/lsbi/utils.py
@@ -1,4 +1,5 @@
 """Utility functions for lsbi."""
+
 import numpy as np
 
 

From 036142fab276470e5b0c772d45418099a96f2179 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Tue, 6 Feb 2024 19:30:33 +0000
Subject: [PATCH 054/117] Reordered theta D for joint

---
 lsbi/model.py       | 365 ++++++++++++++++++++++++++++++++++++++++++--
 tests/test_model.py |  17 ++-
 2 files changed, 362 insertions(+), 20 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index 25a4aef..2f54235 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -225,23 +225,23 @@ def evidence(self):
         return multivariate_normal(mu, Sigma, self.shape, self.d, diagonal_Sigma)
 
     def joint(self):
-        """P(D, theta) as a scipy distribution object.
+        """P(theta, D) as a scipy distribution object.
 
-        [  D  ] | A ~ N( [m + M mu]   [C + M Sigma M'  M Sigma] )
-        [theta] |      ( [   mu   ] , [   Sigma M'      Sigma ] )
+        [theta] ~ N( [   mu   ]   [ Sigma      Sigma M'   ] )
+        [  D  ]    ( [m + M mu] , [M Sigma  C + M Sigma M'] )
         """
         evidence = self.evidence()
         prior = self.prior()
         a = np.broadcast_to(evidence.mean, self.shape + (self.d,))
         b = np.broadcast_to(prior.mean, self.shape + (self.n,))
         mu = np.block([a, b])
-        A = _de_diagonalise(evidence.cov, evidence.diagonal_cov, self.d)
-        A = np.broadcast_to(A, self.shape + (self.d, self.d))
-        D = _de_diagonalise(prior.cov, prior.diagonal_cov, self.n)
-        D = np.broadcast_to(D, self.shape + (self.n, self.n))
-        B = np.einsum("...ja,...al->...jl", self._M, self._Sigma)
-        B = np.broadcast_to(B, self.shape + (self.d, self.n))
-        C = np.moveaxis(B, -1, -2)
+        A = _de_diagonalise(prior.cov, prior.diagonal_cov, self.n)
+        A = np.broadcast_to(D, self.shape + (self.n, self.n))
+        D = _de_diagonalise(evidence.cov, evidence.diagonal_cov, self.d)
+        D = np.broadcast_to(A, self.shape + (self.d, self.d))
+        C = np.einsum("...ja,...al->...jl", self._M, self._Sigma)
+        C = np.broadcast_to(B, self.shape + (self.d, self.n))
+        B = np.moveaxis(C, -1, -2)
         Sigma = np.block([[A, B], [C, D]])
         return multivariate_normal(mu, Sigma, self.shape, self.n + self.d)
 
@@ -382,8 +382,8 @@ def evidence(self):
     def joint(self):
         """P(D, theta) as a scipy distribution object.
 
-        [  D  ] | A ~ N( [m + M mu]   [C + M Sigma M'  M Sigma] )
-        [theta] |      ( [   mu   ] , [   Sigma M'      Sigma ] )
+        [theta] | A ~ N( [   mu   ]   [ Sigma      Sigma M'   ] )
+        [  D  ] |      ( [m + M mu] , [M Sigma  C + M Sigma M'] )
 
         A           ~ categorical(exp(logA))
         """
@@ -543,3 +543,344 @@ def logZ(self):
     def DKL(self):
         """D_KL(P(theta|D)||P(theta)) the Kullback-Leibler divergence."""
         return self.logV - logdet(2 * np.pi * np.e * self.Sigma_P) / 2
+
+
+class ReducedLinearModel(object):
+    """A model with no data.
+
+    If a Likelihood is Gaussian in the parameters, it is sometimes more
+    clear/efficient to phrase it in terms of a parameter covariance, parameter
+    mean and peak value:
+
+    logL(theta) = logLmax - (theta - mu_L)' Sigma_L^{-1} (theta - mu_L)
+
+    We can link this to a data-based model with the relations:
+
+    Sigma_L = (M' C^{-1} M)^{-1}
+    mu_L = Sigma_L M' C^{-1} (D-m)
+    logLmax =
+    - log|2 pi C|/2 - (D-m)'C^{-1}(C - M (M' C^{-1} M)^{-1} M' )C^{-1}(D-m)/2
+
+    Parameters
+    ----------
+    mu_L : array_like
+        Likelihood peak
+    Sigma_L : array_like
+        Likelihood covariance
+    logLmax : float, optional
+        Likelihood maximum, defaults to zero
+    mu_pi : array_like, optional
+        Prior mean, defaults to zero vector
+    Sigma_pi : array_like, optional
+        Prior covariance, defaults to identity matrix
+    """
+
+    def __init__(
+        self,
+        mu_L=0,
+        Sigma_L=1,
+        logLmax=0,
+        mu_pi=0,
+        Sigma_pi=1,
+        shape=(),
+        n=1,
+        diagonal_Sigma_L=False,
+        diagonal_Sigma_pi=False,
+    ):
+        self.mu_L = mu_L
+        self.Sigma_L = Sigma_L
+        self.logLmax = logLmax
+        self.mu_pi = mu_pi
+        self.Sigma_pi = Sigma_pi
+        self._shape = shape
+        self._n = n
+        self.diagonal_Sigma_L = diagonal_Sigma_L
+        self.diagonal_Sigma_pi = diagonal_Sigma_pi
+
+    @property
+    def shape(self):
+        """Shape of the distribution."""
+        return np.broadcast_shapes(
+            np.shape(self.mu_L)[:-1],
+            np.shape(self.Sigma_L)[: -2 + self.diagonal_Sigma_L],
+            np.shape(self.mu_pi)[:-1],
+            np.shape(self.Sigma_pi)[: -2 + self.diagonal_Sigma_pi],
+            self._shape,
+        )
+
+    @property
+    def n(self):
+        """Dimension of the distribution."""
+        return np.max(
+            [
+                *np.shape(self.Sigma_L)[-2 + self.diagonal_Sigma_L :],
+                *np.shape(self.mu_L)[-1:],
+                *np.shape(self.Sigma_pi)[-2 + self.diagonal_Sigma_pi :],
+                *np.shape(self.mu_pi)[-1:],
+                self._n,
+            ]
+        )
+
+    def prior(self):
+        """P(theta) as a scipy distribution object."""
+        return multivariate_normal(
+            self.mu_pi, self.Sigma_pi, self.shape, self.n, self.diagonal_Sigma_pi
+        )
+
+    def posterior(self):
+        """P(theta|D) as a scipy distribution object."""
+        if self.diagonal_Sigma_L and self.diagonal_Sigma_pi:
+            Sigma_P = 1 / (1 / self.Sigma_pi + 1 / self.Sigma_L)
+        else:
+            Sigma_P = inv(
+                inv(_de_diagonalise(self.Sigma_pi, self.diagonal_Sigma_pi, self.n))
+                + inv(_de_diagonalise(self.Sigma_L, self.diagonal_Sigma_L, self.n))
+            )
+
+        if self.diagonal_Sigma_L:
+            x_L = mu_L / self.Sigma_L
+        else:
+            x_L = solve(self.Sigma_L, self.mu_L)
+
+        if self.diagonal_Sigma_pi:
+            x_pi = mu_pi / self.Sigma_pi
+        else:
+            x_pi = solve(self.Sigma_pi, self.mu_pi)
+
+        mu_P = Sigma_P @ (x_pi + x_L)
+        return multivariate_normal(
+            mu_P,
+            Sigma_P,
+            self.shape,
+            self.n,
+            self.diagonal_Sigma_L and self.diagonal_Sigma_pi,
+        )
+
+    def logpi(self, theta):
+        """P(theta) as a scalar."""
+        return self.prior().logpdf(theta)
+
+    def logP(self, theta):
+        """P(theta|D) as a scalar."""
+        return self.posterior().logpdf(theta)
+
+    def logL(self, theta):
+        """P(D|theta) as a scalar."""
+        dist = multivariate_normal(
+            self.mu_L, self.Sigma_L, self.shape, self.n, self.diagonal_Sigma_L
+        )
+        return self.logLmax + dist.logpdf(theta) - dist.logpdf(dist.mu)
+
+    def logZ(self):
+        """P(D) as a scalar."""
+        posterior = self.posterior()
+        mu_P = posterior.mean
+        Sigma_P = posterior.cov
+        logZ = (
+            self.logLmax
+            + logdet(self.Sigma_P, posterior.diagonal_cov) / 2
+            - logdet(self.Sigma_pi, self.diagonal_Sigma_pi) / 2
+        )
+        if self.diagonal_Sigma_L:
+            logZ -= (
+                (self.mu_L - self.mu_pi)
+                @ (self.mu_L - self.mu_pi)
+                / (2 * self.Sigma_pi)
+            )
+        else:
+            logZ -= (
+                (self.mu_L - self.mu_pi)
+                @ inv(self.Sigma_pi)
+                @ (self.mu_L - self.mu_pi)
+                / 2
+            )
+        if self.diagonal_Sigma_pi:
+            logZ -= (
+                (self.mu_P - self.mu_pi)
+                @ (self.mu_P - self.mu_pi)
+                / (2 * self.Sigma_pi)
+            )
+        else:
+            logZ -= (
+                (self.mu_P - self.mu_pi)
+                @ inv(self.Sigma_pi)
+                @ (self.mu_P - self.mu_pi)
+                / 2
+            )
+        return logZ
+
+    def DKL(self):
+        """D_KL(P(theta|D)||P(theta)) the Kullback-Leibler divergence."""
+        posterior = self.posterior()
+        mu_P = posterior.mean
+
+        if self.diagonal_Sigma_pi:
+            DKL = (self.mu_P - self.mu_pi) @ (
+                self.mu_P - self.mu_pi
+            ) / self.Sigma_pi + np.trace(self.Sigma_P / self.Sigma_pi - 1)
+            inv_Sigma_pi = np.eye(self.n) / np.atleast_1d(self.Sigma_pi)[..., None, :]
+        else:
+            DKL = (
+                (self.mu_P - self.mu_pi) @ inv(self.Sigma_pi) @ (self.mu_P - self.mu_pi)
+            )
+            inv_Sigma_pi = inv(self.Sigma_pi)
+
+        DKL += np.trace(inv_Sigma_pi @ self.Sigma_P - 1)
+        DKL += logdet(self.Sigma_pi, self.diagonal_Sigma_pi)
+        DKL -= logdet(posterior.cov, posterior.diagonal_cov)
+        return DKL / 2
+
+
+class ReducedLinearModelUniformPrior(object):
+    """A model with no data.
+
+    Gaussian likelihood in the parameters
+
+    logL(theta) = logLmax - (theta - mu_L)' Sigma_L^{-1} (theta - mu_L)
+
+    Uniform prior
+
+    We can link this to a data-based model with the relations:
+
+    Sigma = (M' C^{-1} M)^{-1}
+    mu = Sigma M' C^{-1} (D-m)
+    logLmax =
+    -log|2 pi C|/2 - (D-m)'C^{-1}(C - M (M' C^{-1} M)^{-1} M' )C^{-1}(D-m)/2
+
+    Parameters
+    ----------
+    mu : array_like
+        Likelihood peak
+    Sigma : array_like
+        Likelihood covariance
+    logLmax : float, optional
+        Likelihood maximum, defaults to zero
+    logV : float, optional
+        log prior volume, defaults to zero
+    n : int, optional
+        Number of parameters, defaults to automatically inferred value
+    """
+
+    def __init__(
+        self, mu=0, Sigma=1, logLmax=0, logV=0, shape=(), n=1, diagonal_Sigma=False
+    ):
+        self.mu = mu
+        self.Sigma = Sigma
+        self.logLmax = logLmax
+        self.logV = logV
+        self._shape = shape
+        self._n = n
+        self.diagonal_Sigma = diagonal_Sigma
+
+    @property
+    def shape(self):
+        """Shape of the distribution."""
+        return np.broadcast_shapes(
+            np.shape(self.mu)[:-1],
+            np.shape(self.Sigma)[: -2 + self.diagonal_Sigma],
+            self._shape,
+        )
+
+    @property
+    def n(self):
+        """Dimension of the distribution."""
+        return np.max(
+            [
+                *np.shape(self.Sigma)[-2 + self.diagonal_Sigma :],
+                *np.shape(self.mu)[-1:],
+                self._n,
+            ]
+        )
+
+    def posterior(self):
+        """P(theta|D) as a scipy distribution object."""
+        return multivariate_normal(
+            self.mu, self.Sigma, self.shape, self.n, self.diagonal_Sigma
+        )
+
+    def logpi(self, theta):
+        """P(theta) as a scalar."""
+        return numpy.zeros(*self.shape, *np.shape(theta)) - self.logV
+
+    def logP(self, theta):
+        """P(theta|D) as a scalar."""
+        return self.posterior().logpdf(theta)
+
+    def logL(self, theta):
+        """P(D|theta) as a scalar."""
+        dist = self.posterior()
+        return self.logLmax + dist.logpdf(theta) - dist.logpdf(dist.mu)
+
+    def logZ(self):
+        """P(D) as a scalar."""
+        return (
+            self.logLmax
+            + logdet(2 * np.pi * self.Sigma, self.diagonal_Sigma) / 2
+            - self.logV
+        )
+
+    def DKL(self):
+        """D_KL(P(theta|D)||P(theta)) the Kullback-Leibler divergence."""
+        return (
+            self.logV - logdet(2 * np.pi * np.e * self.Sigma, self.diagonal_Sigma) / 2
+        )
+
+
+class ReducedMixtureModel(ReducedLinearModel):
+    """A model with no data.
+
+    Gaussian likelihood in the parameters
+
+    logL(theta) = logLmax - (theta - mu_L)' Sigma_L^{-1} (theta - mu_L)
+
+    We can link this to a data-based model with the relations:
+
+    Sigma_L = (M' C^{-1} M)^{-1}
+    mu_L = Sigma_L M' C^{-1} (D-m)
+    logLmax =
+    - log|2 pi C|/2 - (D-m)'C^{-1}(C - M (M' C^{-1} M)^{-1} M' )C^{-1}(D-m)/2
+
+    Parameters
+    ----------
+    mu_L : array_like
+        Likelihood peak
+    Sigma_L : array_like
+        Likelihood covariance
+    logLmax : float, optional
+        Likelihood maximum, defaults to zero
+    logA : array_like, optional
+        log mixture weights
+        if ndim>=1: log mixture weights
+        if scalar: scalar * unit vector
+        Defaults to uniform weights
+    n : int, optional
+        Number of parameters, defaults to automatically inferred value
+    """
+
+    def __init__(self, logA=1, *args):
+        super().__init__(*args)
+        self.logA = logA
+
+    @property
+    def shape(self):
+        """Shape of the distribution."""
+        return np.broadcast_shapes(np.shape(self.logA), super().shape)
+
+    @property
+    def k(self):
+        """Number of mixture components of the distribution."""
+        return self.shape[-1]
+
+    def prior(self):
+        """P(theta) as a scipy distribution object."""
+        dist = super().prior()
+        dist.__class__ = mixture
+        dist.logA = self.logA
+        return dist
+
+    def posterior(self):
+        """P(theta|D) as a scipy distribution object."""
+        dist = super().posterior()
+        dist.__class__ = mixture
+        dist.logA = self.evidence().logpdf(D, broadcast=True, joint=True)
+        return dist
diff --git a/tests/test_model.py b/tests/test_model.py
index 2868433..ad23725 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -320,7 +320,7 @@ def test_marginal_conditional(
         )
         atol = 1e-5
 
-        i = np.arange(d + n)[-n:]
+        i = np.arange(d + n)[:n]
         model_1 = model.evidence()
         model_2 = model.joint().marginalise(i)
         assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
@@ -340,7 +340,7 @@ def test_marginal_conditional(
             atol=atol,
         )
 
-        i = np.arange(d + n)[:d]
+        i = np.arange(d + n)[-d:]
         model_1 = model.prior()
         model_2 = model.joint().marginalise(i)
         assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
@@ -387,8 +387,9 @@ def test_bayes_theorem(
             n,
             d,
         )
-        theta = model.prior().rvs()
-        D = model.evidence().rvs()
+
+        theta_D = model.joint().rvs()
+        theta, D = np.split(theta_D, [model.n], axis=-1)
         assert_allclose(
             model.posterior(D).logpdf(theta, broadcast=True)
             + model.evidence().logpdf(D, broadcast=True),
@@ -650,7 +651,7 @@ def test_marginal_conditional(
 
         atol = 1e-5
 
-        i = np.arange(d + n)[-n:]
+        i = np.arange(n + d)[:n]
         model_1 = model.evidence()
         model_2 = model.joint().marginalise(i)
         assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
@@ -670,7 +671,7 @@ def test_marginal_conditional(
             atol=atol,
         )
 
-        i = np.arange(d + n)[:d]
+        i = np.arange(n + d)[-d:]
         model_1 = model.prior()
         model_2 = model.joint().marginalise(i)
         assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
@@ -719,8 +720,8 @@ def test_bayes_theorem(
             n,
             d,
         )
-        theta = model.prior().rvs()
-        D = model.evidence().rvs()
+        theta_D = model.joint().rvs()
+        theta, D = np.split(theta_D, [model.n], axis=-1)
         assert_allclose(
             model.posterior(D).logpdf(theta, broadcast=True)
             + model.evidence().logpdf(D, broadcast=True),

From 4bc07c56e2d520c621063c2ecc220a13796b689d Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Tue, 6 Feb 2024 19:32:59 +0000
Subject: [PATCH 055/117] Removed partial code

---
 lsbi/model.py | 341 --------------------------------------------------
 1 file changed, 341 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index 2f54235..6f8ff64 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -543,344 +543,3 @@ def logZ(self):
     def DKL(self):
         """D_KL(P(theta|D)||P(theta)) the Kullback-Leibler divergence."""
         return self.logV - logdet(2 * np.pi * np.e * self.Sigma_P) / 2
-
-
-class ReducedLinearModel(object):
-    """A model with no data.
-
-    If a Likelihood is Gaussian in the parameters, it is sometimes more
-    clear/efficient to phrase it in terms of a parameter covariance, parameter
-    mean and peak value:
-
-    logL(theta) = logLmax - (theta - mu_L)' Sigma_L^{-1} (theta - mu_L)
-
-    We can link this to a data-based model with the relations:
-
-    Sigma_L = (M' C^{-1} M)^{-1}
-    mu_L = Sigma_L M' C^{-1} (D-m)
-    logLmax =
-    - log|2 pi C|/2 - (D-m)'C^{-1}(C - M (M' C^{-1} M)^{-1} M' )C^{-1}(D-m)/2
-
-    Parameters
-    ----------
-    mu_L : array_like
-        Likelihood peak
-    Sigma_L : array_like
-        Likelihood covariance
-    logLmax : float, optional
-        Likelihood maximum, defaults to zero
-    mu_pi : array_like, optional
-        Prior mean, defaults to zero vector
-    Sigma_pi : array_like, optional
-        Prior covariance, defaults to identity matrix
-    """
-
-    def __init__(
-        self,
-        mu_L=0,
-        Sigma_L=1,
-        logLmax=0,
-        mu_pi=0,
-        Sigma_pi=1,
-        shape=(),
-        n=1,
-        diagonal_Sigma_L=False,
-        diagonal_Sigma_pi=False,
-    ):
-        self.mu_L = mu_L
-        self.Sigma_L = Sigma_L
-        self.logLmax = logLmax
-        self.mu_pi = mu_pi
-        self.Sigma_pi = Sigma_pi
-        self._shape = shape
-        self._n = n
-        self.diagonal_Sigma_L = diagonal_Sigma_L
-        self.diagonal_Sigma_pi = diagonal_Sigma_pi
-
-    @property
-    def shape(self):
-        """Shape of the distribution."""
-        return np.broadcast_shapes(
-            np.shape(self.mu_L)[:-1],
-            np.shape(self.Sigma_L)[: -2 + self.diagonal_Sigma_L],
-            np.shape(self.mu_pi)[:-1],
-            np.shape(self.Sigma_pi)[: -2 + self.diagonal_Sigma_pi],
-            self._shape,
-        )
-
-    @property
-    def n(self):
-        """Dimension of the distribution."""
-        return np.max(
-            [
-                *np.shape(self.Sigma_L)[-2 + self.diagonal_Sigma_L :],
-                *np.shape(self.mu_L)[-1:],
-                *np.shape(self.Sigma_pi)[-2 + self.diagonal_Sigma_pi :],
-                *np.shape(self.mu_pi)[-1:],
-                self._n,
-            ]
-        )
-
-    def prior(self):
-        """P(theta) as a scipy distribution object."""
-        return multivariate_normal(
-            self.mu_pi, self.Sigma_pi, self.shape, self.n, self.diagonal_Sigma_pi
-        )
-
-    def posterior(self):
-        """P(theta|D) as a scipy distribution object."""
-        if self.diagonal_Sigma_L and self.diagonal_Sigma_pi:
-            Sigma_P = 1 / (1 / self.Sigma_pi + 1 / self.Sigma_L)
-        else:
-            Sigma_P = inv(
-                inv(_de_diagonalise(self.Sigma_pi, self.diagonal_Sigma_pi, self.n))
-                + inv(_de_diagonalise(self.Sigma_L, self.diagonal_Sigma_L, self.n))
-            )
-
-        if self.diagonal_Sigma_L:
-            x_L = mu_L / self.Sigma_L
-        else:
-            x_L = solve(self.Sigma_L, self.mu_L)
-
-        if self.diagonal_Sigma_pi:
-            x_pi = mu_pi / self.Sigma_pi
-        else:
-            x_pi = solve(self.Sigma_pi, self.mu_pi)
-
-        mu_P = Sigma_P @ (x_pi + x_L)
-        return multivariate_normal(
-            mu_P,
-            Sigma_P,
-            self.shape,
-            self.n,
-            self.diagonal_Sigma_L and self.diagonal_Sigma_pi,
-        )
-
-    def logpi(self, theta):
-        """P(theta) as a scalar."""
-        return self.prior().logpdf(theta)
-
-    def logP(self, theta):
-        """P(theta|D) as a scalar."""
-        return self.posterior().logpdf(theta)
-
-    def logL(self, theta):
-        """P(D|theta) as a scalar."""
-        dist = multivariate_normal(
-            self.mu_L, self.Sigma_L, self.shape, self.n, self.diagonal_Sigma_L
-        )
-        return self.logLmax + dist.logpdf(theta) - dist.logpdf(dist.mu)
-
-    def logZ(self):
-        """P(D) as a scalar."""
-        posterior = self.posterior()
-        mu_P = posterior.mean
-        Sigma_P = posterior.cov
-        logZ = (
-            self.logLmax
-            + logdet(self.Sigma_P, posterior.diagonal_cov) / 2
-            - logdet(self.Sigma_pi, self.diagonal_Sigma_pi) / 2
-        )
-        if self.diagonal_Sigma_L:
-            logZ -= (
-                (self.mu_L - self.mu_pi)
-                @ (self.mu_L - self.mu_pi)
-                / (2 * self.Sigma_pi)
-            )
-        else:
-            logZ -= (
-                (self.mu_L - self.mu_pi)
-                @ inv(self.Sigma_pi)
-                @ (self.mu_L - self.mu_pi)
-                / 2
-            )
-        if self.diagonal_Sigma_pi:
-            logZ -= (
-                (self.mu_P - self.mu_pi)
-                @ (self.mu_P - self.mu_pi)
-                / (2 * self.Sigma_pi)
-            )
-        else:
-            logZ -= (
-                (self.mu_P - self.mu_pi)
-                @ inv(self.Sigma_pi)
-                @ (self.mu_P - self.mu_pi)
-                / 2
-            )
-        return logZ
-
-    def DKL(self):
-        """D_KL(P(theta|D)||P(theta)) the Kullback-Leibler divergence."""
-        posterior = self.posterior()
-        mu_P = posterior.mean
-
-        if self.diagonal_Sigma_pi:
-            DKL = (self.mu_P - self.mu_pi) @ (
-                self.mu_P - self.mu_pi
-            ) / self.Sigma_pi + np.trace(self.Sigma_P / self.Sigma_pi - 1)
-            inv_Sigma_pi = np.eye(self.n) / np.atleast_1d(self.Sigma_pi)[..., None, :]
-        else:
-            DKL = (
-                (self.mu_P - self.mu_pi) @ inv(self.Sigma_pi) @ (self.mu_P - self.mu_pi)
-            )
-            inv_Sigma_pi = inv(self.Sigma_pi)
-
-        DKL += np.trace(inv_Sigma_pi @ self.Sigma_P - 1)
-        DKL += logdet(self.Sigma_pi, self.diagonal_Sigma_pi)
-        DKL -= logdet(posterior.cov, posterior.diagonal_cov)
-        return DKL / 2
-
-
-class ReducedLinearModelUniformPrior(object):
-    """A model with no data.
-
-    Gaussian likelihood in the parameters
-
-    logL(theta) = logLmax - (theta - mu_L)' Sigma_L^{-1} (theta - mu_L)
-
-    Uniform prior
-
-    We can link this to a data-based model with the relations:
-
-    Sigma = (M' C^{-1} M)^{-1}
-    mu = Sigma M' C^{-1} (D-m)
-    logLmax =
-    -log|2 pi C|/2 - (D-m)'C^{-1}(C - M (M' C^{-1} M)^{-1} M' )C^{-1}(D-m)/2
-
-    Parameters
-    ----------
-    mu : array_like
-        Likelihood peak
-    Sigma : array_like
-        Likelihood covariance
-    logLmax : float, optional
-        Likelihood maximum, defaults to zero
-    logV : float, optional
-        log prior volume, defaults to zero
-    n : int, optional
-        Number of parameters, defaults to automatically inferred value
-    """
-
-    def __init__(
-        self, mu=0, Sigma=1, logLmax=0, logV=0, shape=(), n=1, diagonal_Sigma=False
-    ):
-        self.mu = mu
-        self.Sigma = Sigma
-        self.logLmax = logLmax
-        self.logV = logV
-        self._shape = shape
-        self._n = n
-        self.diagonal_Sigma = diagonal_Sigma
-
-    @property
-    def shape(self):
-        """Shape of the distribution."""
-        return np.broadcast_shapes(
-            np.shape(self.mu)[:-1],
-            np.shape(self.Sigma)[: -2 + self.diagonal_Sigma],
-            self._shape,
-        )
-
-    @property
-    def n(self):
-        """Dimension of the distribution."""
-        return np.max(
-            [
-                *np.shape(self.Sigma)[-2 + self.diagonal_Sigma :],
-                *np.shape(self.mu)[-1:],
-                self._n,
-            ]
-        )
-
-    def posterior(self):
-        """P(theta|D) as a scipy distribution object."""
-        return multivariate_normal(
-            self.mu, self.Sigma, self.shape, self.n, self.diagonal_Sigma
-        )
-
-    def logpi(self, theta):
-        """P(theta) as a scalar."""
-        return numpy.zeros(*self.shape, *np.shape(theta)) - self.logV
-
-    def logP(self, theta):
-        """P(theta|D) as a scalar."""
-        return self.posterior().logpdf(theta)
-
-    def logL(self, theta):
-        """P(D|theta) as a scalar."""
-        dist = self.posterior()
-        return self.logLmax + dist.logpdf(theta) - dist.logpdf(dist.mu)
-
-    def logZ(self):
-        """P(D) as a scalar."""
-        return (
-            self.logLmax
-            + logdet(2 * np.pi * self.Sigma, self.diagonal_Sigma) / 2
-            - self.logV
-        )
-
-    def DKL(self):
-        """D_KL(P(theta|D)||P(theta)) the Kullback-Leibler divergence."""
-        return (
-            self.logV - logdet(2 * np.pi * np.e * self.Sigma, self.diagonal_Sigma) / 2
-        )
-
-
-class ReducedMixtureModel(ReducedLinearModel):
-    """A model with no data.
-
-    Gaussian likelihood in the parameters
-
-    logL(theta) = logLmax - (theta - mu_L)' Sigma_L^{-1} (theta - mu_L)
-
-    We can link this to a data-based model with the relations:
-
-    Sigma_L = (M' C^{-1} M)^{-1}
-    mu_L = Sigma_L M' C^{-1} (D-m)
-    logLmax =
-    - log|2 pi C|/2 - (D-m)'C^{-1}(C - M (M' C^{-1} M)^{-1} M' )C^{-1}(D-m)/2
-
-    Parameters
-    ----------
-    mu_L : array_like
-        Likelihood peak
-    Sigma_L : array_like
-        Likelihood covariance
-    logLmax : float, optional
-        Likelihood maximum, defaults to zero
-    logA : array_like, optional
-        log mixture weights
-        if ndim>=1: log mixture weights
-        if scalar: scalar * unit vector
-        Defaults to uniform weights
-    n : int, optional
-        Number of parameters, defaults to automatically inferred value
-    """
-
-    def __init__(self, logA=1, *args):
-        super().__init__(*args)
-        self.logA = logA
-
-    @property
-    def shape(self):
-        """Shape of the distribution."""
-        return np.broadcast_shapes(np.shape(self.logA), super().shape)
-
-    @property
-    def k(self):
-        """Number of mixture components of the distribution."""
-        return self.shape[-1]
-
-    def prior(self):
-        """P(theta) as a scipy distribution object."""
-        dist = super().prior()
-        dist.__class__ = mixture
-        dist.logA = self.logA
-        return dist
-
-    def posterior(self):
-        """P(theta|D) as a scipy distribution object."""
-        dist = super().posterior()
-        dist.__class__ = mixture
-        dist.logA = self.evidence().logpdf(D, broadcast=True, joint=True)
-        return dist

From f54a8d1bbe59d1898a352e0c26b45603988a72aa Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Tue, 6 Feb 2024 20:22:28 +0000
Subject: [PATCH 056/117] Corrected transposition error

---
 lsbi/model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index 6f8ff64..3b67cf0 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -236,9 +236,9 @@ def joint(self):
         b = np.broadcast_to(prior.mean, self.shape + (self.n,))
         mu = np.block([a, b])
         A = _de_diagonalise(prior.cov, prior.diagonal_cov, self.n)
-        A = np.broadcast_to(D, self.shape + (self.n, self.n))
+        A = np.broadcast_to(A, self.shape + (self.n, self.n))
         D = _de_diagonalise(evidence.cov, evidence.diagonal_cov, self.d)
-        D = np.broadcast_to(A, self.shape + (self.d, self.d))
+        D = np.broadcast_to(D, self.shape + (self.d, self.d))
         C = np.einsum("...ja,...al->...jl", self._M, self._Sigma)
         C = np.broadcast_to(B, self.shape + (self.d, self.n))
         B = np.moveaxis(C, -1, -2)

From 26633ee8fc70a3319f871c83fd6cea03aa217e8c Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Tue, 6 Feb 2024 20:38:29 +0000
Subject: [PATCH 057/117] Corrected sphinx docstrings

---
 lsbi/stats.py | 48 ++++++++++++++++++++++++------------------------
 lsbi/utils.py |  2 +-
 2 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/lsbi/stats.py b/lsbi/stats.py
index 085710c..8a795b0 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -23,10 +23,10 @@ class multivariate_normal(object):
 
     Parameters
     ----------
-    mean : array_like, shape (..., dim)
+    mean : array_like, shape `(..., dim)`
         Mean of each component.
 
-    cov array_like, shape (..., dim, dim)
+    cov array_like, shape `(..., dim, dim)`
         Covariance matrix of each component.
 
     shape: tuple, optional, default=()
@@ -75,7 +75,7 @@ def logpdf(self, x, broadcast=False):
 
         Parameters
         ----------
-        x : array_like, shape (*size, dim)
+        x : array_like, shape `(*size, dim)`
             Points at which to evaluate the log of the probability density
             function.
         broadcast : bool, optional, default=False
@@ -83,7 +83,7 @@ def logpdf(self, x, broadcast=False):
 
         Returns
         -------
-        logpdf : array_like, shape (*size, *shape)
+        logpdf : array_like, shape `(*size, *shape)`
             Log of the probability density function evaluated at x.
         """
         x = np.array(x)
@@ -106,12 +106,12 @@ def pdf(self, x):
 
         Parameters
         ----------
-        x : array_like, shape (*size, dim)
+        x : array_like, shape `(*size, dim)`
             Points at which to evaluate the probability density function.
 
         Returns
         -------
-        pdf : array_like, shape (*size, *shape)
+        pdf : array_like, shape `(*size, *shape)`
             Probability density function evaluated at x.
         """
         return np.exp(self.logpdf(x))
@@ -126,7 +126,7 @@ def rvs(self, size=()):
 
         Returns
         -------
-        rvs : ndarray, shape (*size, *shape, dim)
+        rvs : ndarray, shape `(*size, *shape, dim)`
             Random samples from the distribution.
         """
         size = np.atleast_1d(size)
@@ -144,16 +144,16 @@ def predict(self, A=1, b=0, diagonal_A=False):
 
         Parameters
         ----------
-        A : array_like, shape (..., k, dim)
+        A : array_like, shape `(..., k, dim)`
             Linear transformation matrix.
-        b : array_like, shape (..., k), optional
+        b : array_like, shape `(..., k)`, optional
             Linear transformation vector.
 
         where self.shape is broadcastable to ...
 
         Returns
         -------
-        transformed distribution shape (..., k)
+        transformed distribution shape `(..., k)`
         """
         if len(np.shape(A)) < 2:
             diagonal_A = True
@@ -192,7 +192,7 @@ def marginalise(self, indices):
 
         Returns
         -------
-        marginalised distribution, shape (*shape, dim - len(indices))
+        marginalised distribution, shape `(*shape, dim - len(indices))`
         """
         dist = deepcopy(self)
         i = self._bar(indices)
@@ -213,14 +213,14 @@ def condition(self, indices, values):
         ----------
         indices : array_like
             Indices to condition over.
-        values : array_like shape (..., len(indices))
+        values : array_like shape `(..., len(indices))`
             Values to condition on.
 
         where where self.shape is broadcastable to ...
 
         Returns
         -------
-        conditioned distribution shape (..., len(indices))
+        conditioned distribution shape `(..., len(indices))`
         """
         i = self._bar(indices)
         k = indices
@@ -263,7 +263,7 @@ def bijector(self, x, inverse=False):
 
         Parameters
         ----------
-        x : array_like, shape (..., dim)
+        x : array_like, shape `(..., dim)`
             if inverse: x is theta
             else: x is x
         inverse : bool, optional, default=False
@@ -337,13 +337,13 @@ class mixture_normal(multivariate_normal):
 
     Parameters
     ----------
-    mean : array_like, shape (..., n, dim)
+    mean : array_like, shape `(..., n, dim)`
         Mean of each component.
 
-    cov: array_like, shape (..., n, dim, dim)
+    cov: array_like, shape `(..., n, dim, dim)`
         Covariance matrix of each component.
 
-    logA: array_like, shape (..., n)
+    logA: array_like, shape `(..., n)`
         Log of the mixing weights.
 
     shape: tuple, optional, default=()
@@ -379,7 +379,7 @@ def logpdf(self, x, broadcast=False, joint=False):
 
         Parameters
         ----------
-        x : array_like, shape (*size, dim)
+        x : array_like, shape `(*size, dim)`
             Points at which to evaluate the log of the probability density
             function.
 
@@ -388,7 +388,7 @@ def logpdf(self, x, broadcast=False, joint=False):
 
         Returns
         -------
-        logpdf : array_like, shape (*size, *shape[:-1])
+        logpdf : array_like, shape `(*size, *shape[:-1])`
             Log of the probability density function evaluated at x.
         """
         if broadcast:
@@ -411,7 +411,7 @@ def rvs(self, size=()):
 
         Returns
         -------
-        rvs : array_like, shape (*size, *shape[:-1], dim)
+        rvs : array_like, shape `(*size, *shape[:-1], dim)`
         """
         if self.shape == ():
             return super().rvs(size=size)
@@ -443,14 +443,14 @@ def condition(self, indices, values):
         ----------
         indices : array_like
             Indices to condition over.
-        values : array_like shape (..., len(indices))
+        values : array_like shape `(..., len(indices))`
             Values to condition on.
 
         where self.shape[:-1] is broadcastable to ...
 
         Returns
         -------
-        conditioned distribution, shape (*shape, len(indices))
+        conditioned distribution, shape `(*shape, len(indices))`
         """
         dist = super().condition(indices, np.expand_dims(values, -2))
         dist.__class__ = mixture_normal
@@ -469,7 +469,7 @@ def bijector(self, x, inverse=False):
 
         Parameters
         ----------
-        x : array_like, shape (..., d)
+        x : array_like, shape `(..., d)`
             if inverse: x is theta
             else: x is x
         inverse : bool, optional, default=False
@@ -480,7 +480,7 @@ def bijector(self, x, inverse=False):
 
         Returns
         -------
-        transformed x or theta: array_like, shape (..., d)
+        transformed x or theta: array_like, shape `(..., d)`
         """
         x = np.array(x)
         theta = np.empty(np.broadcast_shapes(x.shape, self.shape[:-1] + (self.dim,)))
diff --git a/lsbi/utils.py b/lsbi/utils.py
index b7cda6a..fc8d4ae 100644
--- a/lsbi/utils.py
+++ b/lsbi/utils.py
@@ -82,7 +82,7 @@ def choice(size, p):
     Returns
     -------
     out : ndarray
-        Output array of shape (*size, *p.shape[:-1]).
+        Output array of shape `(*size, *p.shape[:-1])`.
     """
     cump = np.cumsum(p, axis=-1)
     u = np.random.rand(*size, *p.shape)

From 1d56c8ce36fbc7c0a45f2136d685c133d8f0e9b3 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 7 Feb 2024 09:02:53 +0000
Subject: [PATCH 058/117] Made covariance calculations more reliable

---
 lsbi/model.py       |  6 +++---
 tests/test_model.py | 14 ++++++++++----
 tests/test_stats.py |  4 ++--
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index 3b67cf0..9fd8510 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -232,15 +232,15 @@ def joint(self):
         """
         evidence = self.evidence()
         prior = self.prior()
-        a = np.broadcast_to(evidence.mean, self.shape + (self.d,))
         b = np.broadcast_to(prior.mean, self.shape + (self.n,))
-        mu = np.block([a, b])
+        a = np.broadcast_to(evidence.mean, self.shape + (self.d,))
+        mu = np.block([b, a])
         A = _de_diagonalise(prior.cov, prior.diagonal_cov, self.n)
         A = np.broadcast_to(A, self.shape + (self.n, self.n))
         D = _de_diagonalise(evidence.cov, evidence.diagonal_cov, self.d)
         D = np.broadcast_to(D, self.shape + (self.d, self.d))
         C = np.einsum("...ja,...al->...jl", self._M, self._Sigma)
-        C = np.broadcast_to(B, self.shape + (self.d, self.n))
+        C = np.broadcast_to(C, self.shape + (self.d, self.n))
         B = np.moveaxis(C, -1, -2)
         Sigma = np.block([[A, B], [C, D]])
         return multivariate_normal(mu, Sigma, self.shape, self.n + self.d)
diff --git a/tests/test_model.py b/tests/test_model.py
index ad23725..769aec5 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -89,9 +89,9 @@ def random(
             m = np.random.randn(*m_shape, d)
 
         if C_shape == "scalar":
-            C = np.random.randn() ** 2
+            C = np.random.randn() ** 2 + d
         elif diagonal_C:
-            C = np.random.randn(*C_shape, d) ** 2
+            C = np.random.randn(*C_shape, d) ** 2 + d
         else:
             C = np.random.randn(*C_shape, d, d)
             C = np.einsum("...ij,...kj->...ik", C, C) + d * np.eye(d)
@@ -102,9 +102,9 @@ def random(
             mu = np.random.randn(*mu_shape, n)
 
         if Sigma_shape == "scalar":
-            Sigma = np.random.randn() ** 2
+            Sigma = np.random.randn() ** 2 + n
         elif diagonal_Sigma:
-            Sigma = np.random.randn(*Sigma_shape, n) ** 2
+            Sigma = np.random.randn(*Sigma_shape, n) ** 2 + n
         else:
             Sigma = np.random.randn(*Sigma_shape, n, n)
             Sigma = np.einsum("...ij,...kj->...ik", Sigma, Sigma) + n * np.eye(n)
@@ -374,6 +374,8 @@ def test_bayes_theorem(
         n,
         d,
     ):
+        atol = 1e-5
+
         model = self.random(
             M_shape,
             diagonal_M,
@@ -395,6 +397,7 @@ def test_bayes_theorem(
             + model.evidence().logpdf(D, broadcast=True),
             model.likelihood(theta).logpdf(D, broadcast=True)
             + model.prior().logpdf(theta, broadcast=True),
+            atol=atol,
         )
 
 
@@ -706,6 +709,8 @@ def test_bayes_theorem(
         n,
         d,
     ):
+        atol = 1e-5
+
         model = self.random(
             logA_shape,
             M_shape,
@@ -727,6 +732,7 @@ def test_bayes_theorem(
             + model.evidence().logpdf(D, broadcast=True),
             model.likelihood(theta).logpdf(D, broadcast=True)
             + model.prior().logpdf(theta, broadcast=True),
+            atol=atol,
         )
 
 
diff --git a/tests/test_stats.py b/tests/test_stats.py
index 5fc7bf2..7025b35 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -79,9 +79,9 @@ def random(self, dim, shape, mean_shape, cov_shape, diagonal_cov):
             mean = np.random.randn(*mean_shape, dim)
 
         if cov_shape == "scalar":
-            cov = np.random.randn() ** 2
+            cov = np.random.randn() ** 2 + dim
         elif diagonal_cov:
-            cov = np.random.randn(*cov_shape, dim) ** 2
+            cov = np.random.randn(*cov_shape, dim) ** 2 + dim
         else:
             cov = np.random.randn(*cov_shape, dim, dim)
             cov = np.einsum("...ij,...kj->...ik", cov, cov) + dim * np.eye(dim)

From 575845619293233ff0b1a47e025c9dce87e8967e Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 7 Feb 2024 12:32:22 +0000
Subject: [PATCH 059/117] Removed now unused function

---
 lsbi/utils.py | 35 ++---------------------------------
 1 file changed, 2 insertions(+), 33 deletions(-)

diff --git a/lsbi/utils.py b/lsbi/utils.py
index fc8d4ae..2036145 100644
--- a/lsbi/utils.py
+++ b/lsbi/utils.py
@@ -3,12 +3,9 @@
 import numpy as np
 
 
-def logdet(A, diag=False):
+def logdet(A):
     """log(abs(det(A)))."""
-    if diag:
-        return np.sum(np.log(np.abs(A)), axis=-1)
-    else:
-        return np.linalg.slogdet(A)[1]
+    return np.linalg.slogdet(A)[1]
 
 
 def quantise(f, x, tol=1e-8):
@@ -17,14 +14,6 @@ def quantise(f, x, tol=1e-8):
     return np.where(np.abs(y) < tol, 0, y)
 
 
-def matrix(M, *args):
-    """Convert M to a matrix."""
-    if len(np.shape(M)) > 1:
-        return M
-    else:
-        return M * np.eye(*args)
-
-
 def bisect(f, a, b, args=(), tol=1e-8):
     """Vectorised simple bisection search.
 
@@ -67,23 +56,3 @@ def bisect(f, a, b, args=(), tol=1e-8):
         b = np.where(fq == 0, q, b)
         b = np.where(fb * fq > 0, q, b)
     return (a + b) / 2
-
-
-def choice(size, p):
-    """Vectorised choice function.
-
-    Parameters
-    ----------
-    size : int or tuple of ints
-        Shape of the output.
-    p : array_like
-        Probability array
-
-    Returns
-    -------
-    out : ndarray
-        Output array of shape `(*size, *p.shape[:-1])`.
-    """
-    cump = np.cumsum(p, axis=-1)
-    u = np.random.rand(*size, *p.shape)
-    return np.argmin(u > cump, axis=-1)

From 00a338a4b4272a10b9fb06fe2cb055afcb9a41c1 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 7 Feb 2024 13:44:22 +0000
Subject: [PATCH 060/117] Aiming for 100% coverage

---
 lsbi/model.py       |  2 +-
 lsbi/stats.py       |  2 +-
 tests/test_stats.py | 57 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index 9fd8510..cfd11e0 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -4,7 +4,7 @@
 from numpy.linalg import inv, solve
 
 from lsbi.stats import mixture_normal, multivariate_normal
-from lsbi.utils import logdet, matrix
+from lsbi.utils import logdet
 
 
 def _de_diagonalise(x, diagonal, *args):
diff --git a/lsbi/stats.py b/lsbi/stats.py
index 8a795b0..9effbb3 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -7,7 +7,7 @@
 from numpy.linalg import cholesky, inv
 from scipy.special import erf, logsumexp
 
-from lsbi.utils import bisect, choice, logdet
+from lsbi.utils import bisect, logdet
 
 
 class multivariate_normal(object):
diff --git a/tests/test_stats.py b/tests/test_stats.py
index 7025b35..bf97206 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -93,6 +93,32 @@ def random(self, dim, shape, mean_shape, cov_shape, diagonal_cov):
         assert np.all(dist.cov == cov)
         return dist
 
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
+    def test_getitem(self, dim, shape, mean_shape, cov_shape, diagonal_cov):
+        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
+
+        if len(dist.shape) > 0:
+            dist_2 = dist[0]
+            assert isinstance(dist_2, self.cls)
+            assert dist_2.shape == dist.shape[1:]
+            assert dist_2.dim == dim
+
+        if len(dist.shape) > 1:
+            dist_2 = dist[0, 0]
+            assert isinstance(dist_2, self.cls)
+            assert dist_2.shape == dist.shape[2:]
+            assert dist_2.dim == dim
+
+            dist_2 = dist[0, :]
+            assert isinstance(dist_2, self.cls)
+            assert dist_2.shape == dist.shape[1:]
+            assert dist_2.dim == dim
+
+            dist_2 = dist[:, 0]
+            assert isinstance(dist_2, self.cls)
+            assert dist_2.shape == dist.shape[:-1]
+            assert dist_2.dim == dim
+
     @pytest.mark.parametrize("size", sizes)
     @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
     def test_logpdf(self, dim, shape, mean_shape, cov_shape, diagonal_cov, size):
@@ -106,6 +132,8 @@ def test_logpdf(self, dim, shape, mean_shape, cov_shape, diagonal_cov, size):
         flat_logpdf = np.moveaxis(flat_logpdf, 0, -1).reshape(logpdf.shape)
         assert_allclose(logpdf, flat_logpdf)
 
+        assert_allclose(np.log(np.exp(logpdf), dist.pdf(x)))
+
     @pytest.mark.parametrize("size", sizes)
     @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
     def test_rvs_shape(self, dim, shape, mean_shape, cov_shape, diagonal_cov, size):
@@ -274,8 +302,35 @@ def random(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov):
             logA, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal_cov
         )
         assert np.all(dist.logA == logA)
+        assert dist.k == dist.shape[-1]
         return dist
 
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
+    def test_getitem(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov):
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
+
+        if len(dist.shape) > 0:
+            dist_2 = dist[0]
+            assert isinstance(dist_2, self.cls)
+            assert dist_2.shape == dist.shape[1:]
+            assert dist_2.dim == dim
+
+        if len(dist.shape) > 1:
+            dist_2 = dist[0, 0]
+            assert isinstance(dist_2, self.cls)
+            assert dist_2.shape == dist.shape[2:]
+            assert dist_2.dim == dim
+
+            dist_2 = dist[0, :]
+            assert isinstance(dist_2, self.cls)
+            assert dist_2.shape == dist.shape[1:]
+            assert dist_2.dim == dim
+
+            dist_2 = dist[:, 0]
+            assert isinstance(dist_2, self.cls)
+            assert dist_2.shape == dist.shape[:-1]
+            assert dist_2.dim == dim
+
     @pytest.mark.parametrize("size", sizes)
     @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
     def test_logpdf(
@@ -286,6 +341,8 @@ def test_logpdf(
         logpdf = dist.logpdf(x)
         assert logpdf.shape == size + dist.shape[:-1]
 
+        assert_allclose(np.log(np.exp(logpdf), dist.pdf(x)))
+
         logA = np.broadcast_to(dist.logA, dist.shape).reshape(-1, dist.k).copy()
         logA -= logsumexp(logA, axis=-1, keepdims=True)
         mean = np.broadcast_to(dist.mean, dist.shape + (dist.dim,)).reshape(

From c73d43ec58d787aa91195c0c8c255a26279d5068 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Fri, 9 Feb 2024 09:56:39 +0000
Subject: [PATCH 061/117] Corrected syntax error in assert

---
 tests/test_stats.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_stats.py b/tests/test_stats.py
index bf97206..5a08393 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -341,7 +341,7 @@ def test_logpdf(
         logpdf = dist.logpdf(x)
         assert logpdf.shape == size + dist.shape[:-1]
 
-        assert_allclose(np.log(np.exp(logpdf), dist.pdf(x)))
+        assert_allclose(np.log(np.exp(logpdf)), dist.pdf(x))
 
         logA = np.broadcast_to(dist.logA, dist.shape).reshape(-1, dist.k).copy()
         logA -= logsumexp(logA, axis=-1, keepdims=True)

From 8e8b6de362c8173abcddcccb0868d245797e666d Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Fri, 9 Feb 2024 10:11:24 +0000
Subject: [PATCH 062/117] Actually corrected typos

---
 tests/test_stats.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_stats.py b/tests/test_stats.py
index 5a08393..2d86b8a 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -132,7 +132,7 @@ def test_logpdf(self, dim, shape, mean_shape, cov_shape, diagonal_cov, size):
         flat_logpdf = np.moveaxis(flat_logpdf, 0, -1).reshape(logpdf.shape)
         assert_allclose(logpdf, flat_logpdf)
 
-        assert_allclose(np.log(np.exp(logpdf), dist.pdf(x)))
+        assert_allclose(np.log(np.exp(logpdf)), dist.pdf(x))
 
     @pytest.mark.parametrize("size", sizes)
     @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)

From a0bda840c4d026d1f1df8f55580a4a68ebe156aa Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Fri, 9 Feb 2024 11:00:03 +0000
Subject: [PATCH 063/117] Updated

---
 lsbi/model.py       |  19 ++---
 lsbi/stats.py       |  72 ++++++++++--------
 lsbi/utils.py       |   8 ++
 tests/test_model.py |  18 ++---
 tests/test_stats.py | 178 +++++++++++++++++++++-----------------------
 5 files changed, 147 insertions(+), 148 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index cfd11e0..fbf8fff 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -4,14 +4,7 @@
 from numpy.linalg import inv, solve
 
 from lsbi.stats import mixture_normal, multivariate_normal
-from lsbi.utils import logdet
-
-
-def _de_diagonalise(x, diagonal, *args):
-    if diagonal:
-        return np.atleast_1d(x)[..., None, :] * np.eye(*args)
-    else:
-        return x
+from lsbi.utils import dediagonalise, logdet
 
 
 class LinearModel(object):
@@ -235,9 +228,9 @@ def joint(self):
         b = np.broadcast_to(prior.mean, self.shape + (self.n,))
         a = np.broadcast_to(evidence.mean, self.shape + (self.d,))
         mu = np.block([b, a])
-        A = _de_diagonalise(prior.cov, prior.diagonal_cov, self.n)
+        A = dediagonalise(prior.cov, prior.diagonal, self.n)
         A = np.broadcast_to(A, self.shape + (self.n, self.n))
-        D = _de_diagonalise(evidence.cov, evidence.diagonal_cov, self.d)
+        D = dediagonalise(evidence.cov, evidence.diagonal, self.d)
         D = np.broadcast_to(D, self.shape + (self.d, self.d))
         C = np.einsum("...ja,...al->...jl", self._M, self._Sigma)
         C = np.broadcast_to(C, self.shape + (self.d, self.n))
@@ -247,15 +240,15 @@ def joint(self):
 
     @property
     def _M(self):
-        return _de_diagonalise(self.M, self.diagonal_M, self.d, self.n)
+        return dediagonalise(self.M, self.diagonal_M, self.d, self.n)
 
     @property
     def _C(self):
-        return _de_diagonalise(self.C, self.diagonal_C, self.d)
+        return dediagonalise(self.C, self.diagonal_C, self.d)
 
     @property
     def _Sigma(self):
-        return _de_diagonalise(self.Sigma, self.diagonal_Sigma, self.n)
+        return dediagonalise(self.Sigma, self.diagonal_Sigma, self.n)
 
 
 class MixtureModel(LinearModel):
diff --git a/lsbi/stats.py b/lsbi/stats.py
index 9effbb3..e61c52f 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -26,7 +26,7 @@ class multivariate_normal(object):
     mean : array_like, shape `(..., dim)`
         Mean of each component.
 
-    cov array_like, shape `(..., dim, dim)`
+    cov array_like, shape `(..., dim, dim)`if diagonal is False else shape `(..., dim)`
         Covariance matrix of each component.
 
     shape: tuple, optional, default=()
@@ -35,27 +35,27 @@ class multivariate_normal(object):
 
     dim: int, optional, default=0
         Dimension of the distribution. Useful for forcing a broadcast beyond that
-        inferred by mean and cov shapes
+        inferred by mean and cov dimensions
 
-    diagonal_cov: bool, optional, default=False
+    diagonal: bool, optional, default=False
         If True, cov is interpreted as the diagonal of the covariance matrix.
     """
 
-    def __init__(self, mean=0, cov=1, shape=(), dim=0, diagonal_cov=False):
+    def __init__(self, mean=0, cov=1, shape=(), dim=0, diagonal=False):
         self.mean = mean
         self.cov = cov
         self._shape = shape
         self._dim = dim
-        self.diagonal_cov = diagonal_cov
+        self.diagonal = diagonal
         if len(np.shape(self.cov)) < 2:
-            self.diagonal_cov = True
+            self.diagonal = True
 
     @property
     def shape(self):
         """Shape of the distribution."""
         return np.broadcast_shapes(
             np.shape(self.mean)[:-1],
-            np.shape(self.cov)[: -2 + self.diagonal_cov],
+            np.shape(self.cov)[: -2 + self.diagonal],
             self._shape,
         )
 
@@ -65,7 +65,7 @@ def dim(self):
         return np.max(
             [
                 *np.shape(self.mean)[-1:],
-                *np.shape(self.cov)[-2 + self.diagonal_cov :],
+                *np.shape(self.cov)[-2 + self.diagonal :],
                 self._dim,
             ]
         )
@@ -79,11 +79,11 @@ def logpdf(self, x, broadcast=False):
             Points at which to evaluate the log of the probability density
             function.
         broadcast : bool, optional, default=False
-            If True, broadcast x across the distribution parameters.
+            If True, broadcast x across the shape of the distribution
 
         Returns
         -------
-        logpdf : array_like, shape `(*size, *shape)`
+        logpdf : array_like, shape `(*size, *shape)` if broadcast is False else the consistent broadcast of size and shape.
             Log of the probability density function evaluated at x.
         """
         x = np.array(x)
@@ -93,7 +93,7 @@ def logpdf(self, x, broadcast=False):
             size = x.shape[:-1]
             mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
             dx = x.reshape(*size, *np.ones_like(self.shape), self.dim) - mean
-        if self.diagonal_cov:
+        if self.diagonal:
             chi2 = (dx**2 / self.cov).sum(axis=-1)
             norm = -np.log(2 * np.pi * np.ones(self.dim) * self.cov).sum(axis=-1) / 2
         else:
@@ -101,20 +101,22 @@ def logpdf(self, x, broadcast=False):
             norm = -logdet(2 * np.pi * self.cov) / 2
         return norm - chi2 / 2
 
-    def pdf(self, x):
+    def pdf(self, x, broadcast=False):
         """Probability density function.
 
         Parameters
         ----------
         x : array_like, shape `(*size, dim)`
             Points at which to evaluate the probability density function.
+        broadcast : bool, optional, default=False
+            If True, broadcast x across the distribution parameters.
 
         Returns
         -------
-        pdf : array_like, shape `(*size, *shape)`
+        pdf : array_like, shape `(*size, *shape)` if broadcast is False else the consistent broadcast of size and shape.
             Probability density function evaluated at x.
         """
-        return np.exp(self.logpdf(x))
+        return np.exp(self.logpdf(x, broadcast=broadcast))
 
     def rvs(self, size=()):
         """Draw random samples from the distribution.
@@ -131,12 +133,12 @@ def rvs(self, size=()):
         """
         size = np.atleast_1d(size)
         x = np.random.randn(*size, *self.shape, self.dim)
-        if self.diagonal_cov:
+        if self.diagonal:
             return self.mean + np.sqrt(self.cov) * x
         else:
             return self.mean + np.einsum("...jk,...k->...j", cholesky(self.cov), x)
 
-    def predict(self, A=1, b=0, diagonal_A=False):
+    def predict(self, A=1, b=0, diagonal=False):
         """Predict the mean and covariance of a linear transformation.
 
         if:         x ~ N(mu, Sigma)
@@ -148,6 +150,8 @@ def predict(self, A=1, b=0, diagonal_A=False):
             Linear transformation matrix.
         b : array_like, shape `(..., k)`, optional
             Linear transformation vector.
+        diagonal : bool, optional, default=False
+            If True, A is interpreted as the diagonal of the transformation matrix.
 
         where self.shape is broadcastable to ...
 
@@ -156,11 +160,11 @@ def predict(self, A=1, b=0, diagonal_A=False):
         transformed distribution shape `(..., k)`
         """
         if len(np.shape(A)) < 2:
-            diagonal_A = True
+            diagonal = True
         dist = deepcopy(self)
-        if diagonal_A:
+        if diagonal:
             dist.mean = A * self.mean + b
-            if self.diagonal_cov:
+            if self.diagonal:
                 dist.cov = A * self.cov * A
             else:
                 dist.cov = (
@@ -172,11 +176,11 @@ def predict(self, A=1, b=0, diagonal_A=False):
             dist.mean = (
                 np.einsum("...qn,...n->...q", A, np.ones(self.dim) * self.mean) + b
             )
-            if self.diagonal_cov:
+            if self.diagonal:
                 dist.cov = np.einsum(
                     "...qn,...pn->...qp", A, A * np.atleast_1d(self.cov)[..., None, :]
                 )
-                dist.diagonal_cov = False
+                dist.diagonal = False
             else:
                 dist.cov = np.einsum("...qn,...nm,...pm->...qp", A, self.cov, A)
             dist._dim = np.shape(A)[-2]
@@ -198,7 +202,7 @@ def marginalise(self, indices):
         i = self._bar(indices)
         dist.mean = (np.ones(self.dim) * self.mean)[..., i]
 
-        if self.diagonal_cov:
+        if self.diagonal:
             dist.cov = (np.ones(self.dim) * self.cov)[..., i]
         else:
             dist.cov = self.cov[..., i, :][..., i]
@@ -227,7 +231,7 @@ def condition(self, indices, values):
         dist = deepcopy(self)
         dist.mean = (np.ones(self.dim) * self.mean)[..., i]
 
-        if self.diagonal_cov:
+        if self.diagonal:
             dist.cov = (np.ones(self.dim) * self.cov)[..., i]
             dist._shape = np.broadcast_shapes(self.shape, values.shape[:-1])
         else:
@@ -279,14 +283,14 @@ def bijector(self, x, inverse=False):
         x = np.array(x)
         mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
         if inverse:
-            if self.diagonal_cov:
+            if self.diagonal:
                 y = (x - mean) / np.sqrt(self.cov)
             else:
                 y = np.einsum("...jk,...k->...j", inv(cholesky(self.cov)), x - mean)
             return scipy.stats.norm.cdf(y)
         else:
             y = scipy.stats.norm.ppf(x)
-            if self.diagonal_cov:
+            if self.diagonal:
                 return mean + np.sqrt(self.cov) * y
             else:
                 L = cholesky(self.cov)
@@ -321,7 +325,7 @@ def __getitem__(self, arg):
         """
         dist = deepcopy(self)
         dist.mean = np.broadcast_to(self.mean, (*self.shape, self.dim))[arg]
-        if self.diagonal_cov:
+        if self.diagonal:
             dist.cov = np.broadcast_to(self.cov, (*self.shape, self.dim))[arg]
         else:
             dist.cov = np.broadcast_to(self.cov, (*self.shape, self.dim, self.dim))[arg]
@@ -354,13 +358,13 @@ class mixture_normal(multivariate_normal):
         Dimension of the distribution. Useful for forcing a broadcast beyond that
         inferred by mean and cov shapes
 
-    diagonal_cov: bool, optional, default=False
+    diagonal: bool, optional, default=False
         If True, cov is interpreted as the diagonal of the covariance matrix.
     """
 
-    def __init__(self, logA=0, mean=0, cov=1, shape=(), dim=0, diagonal_cov=False):
+    def __init__(self, logA=0, mean=0, cov=1, shape=(), dim=0, diagonal=False):
         self.logA = logA
-        super().__init__(mean, cov, shape, dim, diagonal_cov)
+        super().__init__(mean, cov, shape, dim, diagonal)
 
     @property
     def shape(self):
@@ -388,8 +392,12 @@ def logpdf(self, x, broadcast=False, joint=False):
 
         Returns
         -------
-        logpdf : array_like, shape `(*size, *shape[:-1])`
+        logpdf :
             Log of the probability density function evaluated at x.
+            if not broadcast and not joint:
+                array_like, shape `(*size, *shape[:-1])`
+            elif broadcast and not joint.
+                array_like, shape the broadcast of `(*size,) and `shape[:-1]`
         """
         if broadcast:
             x = np.expand_dims(x, -2)
@@ -425,7 +433,7 @@ def rvs(self, size=()):
         mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
         mean = np.choose(i[..., None], np.moveaxis(mean, -2, 0))
         x = np.random.randn(*size, *self.shape[:-1], self.dim)
-        if self.diagonal_cov:
+        if self.diagonal:
             L = np.sqrt(self.cov)
             L = np.broadcast_to(L, (*self.shape, self.dim))
             L = np.choose(i[..., None], np.moveaxis(L, -2, 0))
@@ -494,7 +502,7 @@ def bijector(self, x, inverse=False):
                 np.s_[:-1], theta[..., :i]
             )
             m = np.atleast_1d(dist.mean)[..., 0]
-            if dist.diagonal_cov:
+            if dist.diagonal:
                 c = np.atleast_1d(dist.cov)[..., 0]
             else:
                 c = np.atleast_2d(dist.cov)[..., 0, 0]
diff --git a/lsbi/utils.py b/lsbi/utils.py
index 2036145..24082e8 100644
--- a/lsbi/utils.py
+++ b/lsbi/utils.py
@@ -56,3 +56,11 @@ def bisect(f, a, b, args=(), tol=1e-8):
         b = np.where(fq == 0, q, b)
         b = np.where(fb * fq > 0, q, b)
     return (a + b) / 2
+
+
+def dediagonalise(x, diagonal, *args):
+    """Optionally construct a dense matrix with x on the diagonal."""
+    if diagonal:
+        return np.atleast_1d(x)[..., None, :] * np.eye(*args)
+    else:
+        return x
diff --git a/tests/test_model.py b/tests/test_model.py
index 769aec5..09f1ade 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -8,7 +8,7 @@
     MixtureModel,
     ReducedLinearModel,
     ReducedLinearModelUniformPrior,
-    _de_diagonalise,
+    dediagonalise,
 )
 
 
@@ -325,7 +325,7 @@ def test_marginal_conditional(
         model_2 = model.joint().marginalise(i)
         assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
         assert_allclose_broadcast(
-            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
+            dediagonalise(model_1.cov, model_1.diagonal, model_1.dim),
             model_2.cov,
             atol=atol,
         )
@@ -335,7 +335,7 @@ def test_marginal_conditional(
         model_2 = model.joint().condition(i, theta)
         assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
         assert_allclose_broadcast(
-            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
+            dediagonalise(model_1.cov, model_1.diagonal, model_1.dim),
             model_2.cov,
             atol=atol,
         )
@@ -345,7 +345,7 @@ def test_marginal_conditional(
         model_2 = model.joint().marginalise(i)
         assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
         assert_allclose_broadcast(
-            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
+            dediagonalise(model_1.cov, model_1.diagonal, model_1.dim),
             model_2.cov,
             atol=atol,
         )
@@ -355,7 +355,7 @@ def test_marginal_conditional(
         model_2 = model.joint().condition(i, D)
         assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
         assert_allclose_broadcast(
-            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
+            dediagonalise(model_1.cov, model_1.diagonal, model_1.dim),
             model_2.cov,
             atol=atol,
         )
@@ -659,7 +659,7 @@ def test_marginal_conditional(
         model_2 = model.joint().marginalise(i)
         assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
         assert_allclose_broadcast(
-            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
+            dediagonalise(model_1.cov, model_1.diagonal, model_1.dim),
             model_2.cov,
             atol=atol,
         )
@@ -669,7 +669,7 @@ def test_marginal_conditional(
         model_2 = model.joint().condition(i, theta)
         assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
         assert_allclose_broadcast(
-            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
+            dediagonalise(model_1.cov, model_1.diagonal, model_1.dim),
             model_2.cov,
             atol=atol,
         )
@@ -679,7 +679,7 @@ def test_marginal_conditional(
         model_2 = model.joint().marginalise(i)
         assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
         assert_allclose_broadcast(
-            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
+            dediagonalise(model_1.cov, model_1.diagonal, model_1.dim),
             model_2.cov,
             atol=atol,
         )
@@ -689,7 +689,7 @@ def test_marginal_conditional(
         model_2 = model.joint().condition(i, D)
         assert_allclose_broadcast(model_1.mean, model_2.mean, atol=atol)
         assert_allclose_broadcast(
-            _de_diagonalise(model_1.cov, model_1.diagonal_cov, model_1.dim),
+            dediagonalise(model_1.cov, model_1.diagonal, model_1.dim),
             model_2.cov,
             atol=atol,
         )
diff --git a/tests/test_stats.py b/tests/test_stats.py
index 2d86b8a..cec5643 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -20,8 +20,8 @@
     for shape in shapes:
         for mean_shape in shapes + ["scalar"]:
             for cov_shape in shapes + ["scalar"]:
-                for diagonal_cov in [True, False]:
-                    tests.append((dim, shape, mean_shape, cov_shape, diagonal_cov))
+                for diagonal in [True, False]:
+                    tests.append((dim, shape, mean_shape, cov_shape, diagonal))
                     for A_shape in shapes + ["scalar"]:
                         for diagonal_A in [True, False]:
                             for b_shape in shapes + ["scalar"]:
@@ -36,7 +36,7 @@
                                             shape,
                                             mean_shape,
                                             cov_shape,
-                                            diagonal_cov,
+                                            diagonal,
                                             A_shape,
                                             diagonal_A,
                                             b_shape,
@@ -47,15 +47,13 @@
                     for p in dims:
                         if dim < p:
                             continue
-                        p_tests.append(
-                            (dim, shape, mean_shape, cov_shape, diagonal_cov, p)
-                        )
+                        p_tests.append((dim, shape, mean_shape, cov_shape, diagonal, p))
 
 
 def flatten(dist):
     """Convert a multivariate_normal to a list of scipy.stats.multivariate_normal"""
     mean = np.broadcast_to(dist.mean, dist.shape + (dist.dim,)).reshape(-1, dist.dim)
-    if dist.diagonal_cov:
+    if dist.diagonal:
         cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim,)).reshape(-1, dist.dim)
     else:
         cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim, dist.dim)).reshape(
@@ -72,7 +70,7 @@ def flatten(dist):
 class TestMultivariateNormal(object):
     cls = multivariate_normal
 
-    def random(self, dim, shape, mean_shape, cov_shape, diagonal_cov):
+    def random(self, dim, shape, mean_shape, cov_shape, diagonal):
         if mean_shape == "scalar":
             mean = np.random.randn()
         else:
@@ -80,22 +78,22 @@ def random(self, dim, shape, mean_shape, cov_shape, diagonal_cov):
 
         if cov_shape == "scalar":
             cov = np.random.randn() ** 2 + dim
-        elif diagonal_cov:
+        elif diagonal:
             cov = np.random.randn(*cov_shape, dim) ** 2 + dim
         else:
             cov = np.random.randn(*cov_shape, dim, dim)
             cov = np.einsum("...ij,...kj->...ik", cov, cov) + dim * np.eye(dim)
 
-        dist = multivariate_normal(mean, cov, shape, dim, diagonal_cov)
+        dist = multivariate_normal(mean, cov, shape, dim, diagonal)
 
         assert dist.dim == dim
         assert np.all(dist.mean == mean)
         assert np.all(dist.cov == cov)
         return dist
 
-    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
-    def test_getitem(self, dim, shape, mean_shape, cov_shape, diagonal_cov):
-        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal", tests)
+    def test_getitem(self, dim, shape, mean_shape, cov_shape, diagonal):
+        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal)
 
         if len(dist.shape) > 0:
             dist_2 = dist[0]
@@ -120,9 +118,9 @@ def test_getitem(self, dim, shape, mean_shape, cov_shape, diagonal_cov):
             assert dist_2.dim == dim
 
     @pytest.mark.parametrize("size", sizes)
-    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
-    def test_logpdf(self, dim, shape, mean_shape, cov_shape, diagonal_cov, size):
-        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal", tests)
+    def test_logpdf(self, dim, shape, mean_shape, cov_shape, diagonal, size):
+        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal)
         x = np.random.randn(*size, dim)
         logpdf = dist.logpdf(x)
         assert logpdf.shape == size + dist.shape
@@ -132,25 +130,25 @@ def test_logpdf(self, dim, shape, mean_shape, cov_shape, diagonal_cov, size):
         flat_logpdf = np.moveaxis(flat_logpdf, 0, -1).reshape(logpdf.shape)
         assert_allclose(logpdf, flat_logpdf)
 
-        assert_allclose(np.log(np.exp(logpdf)), dist.pdf(x))
+        assert_allclose(np.exp(logpdf), dist.pdf(x))
 
     @pytest.mark.parametrize("size", sizes)
-    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
-    def test_rvs_shape(self, dim, shape, mean_shape, cov_shape, diagonal_cov, size):
-        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal", tests)
+    def test_rvs_shape(self, dim, shape, mean_shape, cov_shape, diagonal, size):
+        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal)
         rvs = dist.rvs(size)
         assert rvs.shape == size + dist.shape + (dim,)
 
-    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
-    def test_rvs(self, dim, shape, mean_shape, cov_shape, diagonal_cov):
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal", tests)
+    def test_rvs(self, dim, shape, mean_shape, cov_shape, diagonal):
         size = 100
-        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
+        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal)
         rvs = dist.rvs(size)
 
         mean = np.broadcast_to(dist.mean, dist.shape + (dist.dim,)).reshape(
             -1, dist.dim
         )
-        if dist.diagonal_cov:
+        if dist.diagonal:
             cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim,)).reshape(
                 -1, dist.dim
             )
@@ -173,7 +171,7 @@ def test_rvs(self, dim, shape, mean_shape, cov_shape, diagonal_cov):
                 assert scipy.stats.kstest(a[:, i], b[:, i]).pvalue > pvalue
 
     @pytest.mark.parametrize(
-        "dim, shape, mean_shape, cov_shape, diagonal_cov, A_shape, diagonal_A, b_shape, k",
+        "dim, shape, mean_shape, cov_shape, diagonal, A_shape, diagonal_A, b_shape, k",
         A_tests,
     )
     def test_predict(
@@ -182,13 +180,13 @@ def test_predict(
         shape,
         mean_shape,
         cov_shape,
-        diagonal_cov,
+        diagonal,
         k,
         A_shape,
         diagonal_A,
         b_shape,
     ):
-        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
+        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal)
 
         if b_shape == "scalar":
             b = np.random.randn()
@@ -207,8 +205,8 @@ def test_predict(
         assert dist_2.shape == np.broadcast_shapes(
             dist.shape, np.shape(A)[: -2 + diagonal_A], np.shape(b)[:-1]
         )
-        assert np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov] == np.broadcast_shapes(
-            np.shape(dist.cov)[: -2 + diagonal_cov], np.shape(A)[: -2 + diagonal_A]
+        assert np.shape(dist_2.cov)[: -2 + dist_2.diagonal] == np.broadcast_shapes(
+            np.shape(dist.cov)[: -2 + diagonal], np.shape(A)[: -2 + diagonal_A]
         )
         assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
             np.shape(dist.mean)[:-1], np.shape(A)[: -2 + diagonal_A], np.shape(b)[:-1]
@@ -220,63 +218,59 @@ def test_predict(
         assert dist_2.shape == np.broadcast_shapes(
             dist.shape, np.shape(A)[: -2 + diagonal_A]
         )
-        assert np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov] == np.broadcast_shapes(
-            np.shape(dist.cov)[: -2 + diagonal_cov], np.shape(A)[: -2 + diagonal_A]
+        assert np.shape(dist_2.cov)[: -2 + dist_2.diagonal] == np.broadcast_shapes(
+            np.shape(dist.cov)[: -2 + diagonal], np.shape(A)[: -2 + diagonal_A]
         )
         assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
             np.shape(dist.mean)[:-1], np.shape(A)[: -2 + diagonal_A]
         )
         assert dist_2.dim == k
 
-    @pytest.mark.parametrize(
-        "dim, shape, mean_shape, cov_shape, diagonal_cov, p", p_tests
-    )
-    def test_marginalise(self, dim, shape, mean_shape, cov_shape, diagonal_cov, p):
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal, p", p_tests)
+    def test_marginalise(self, dim, shape, mean_shape, cov_shape, diagonal, p):
         indices = np.random.choice(dim, p, replace=False)
-        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
+        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal)
         dist_2 = dist.marginalise(indices)
 
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == dist.shape
         assert (
-            np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov]
-            == np.shape(dist.cov)[: -2 + diagonal_cov]
+            np.shape(dist_2.cov)[: -2 + dist_2.diagonal]
+            == np.shape(dist.cov)[: -2 + diagonal]
         )
         assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
         assert dist_2.dim == dim - p
 
     @pytest.mark.parametrize("values_shape", shapes)
-    @pytest.mark.parametrize(
-        "dim, shape, mean_shape, cov_shape, diagonal_cov, p", p_tests
-    )
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal, p", p_tests)
     def test_condition(
-        self, dim, shape, mean_shape, cov_shape, diagonal_cov, p, values_shape
+        self, dim, shape, mean_shape, cov_shape, diagonal, p, values_shape
     ):
         indices = np.random.choice(dim, p, replace=False)
         values = np.random.randn(*values_shape, p)
-        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
+        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal)
         dist_2 = dist.condition(indices, values)
 
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == np.broadcast_shapes(dist.shape, values_shape)
         assert (
-            np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov]
-            == np.shape(dist.cov)[: -2 + diagonal_cov]
+            np.shape(dist_2.cov)[: -2 + dist_2.diagonal]
+            == np.shape(dist.cov)[: -2 + diagonal]
         )
-        if cov_shape == "scalar" or diagonal_cov:
+        if cov_shape == "scalar" or diagonal:
             assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
         else:
             assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
                 np.shape(dist.mean)[:-1],
-                np.shape(dist.cov)[: -2 + diagonal_cov],
+                np.shape(dist.cov)[: -2 + diagonal],
                 values_shape,
             )
         assert dist_2.dim == dim - p
 
     @pytest.mark.parametrize("x_shape", shapes)
-    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
-    def test_bijector(self, dim, shape, mean_shape, cov_shape, diagonal_cov, x_shape):
-        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal_cov)
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal", tests)
+    def test_bijector(self, dim, shape, mean_shape, cov_shape, diagonal, x_shape):
+        dist = self.random(dim, shape, mean_shape, cov_shape, diagonal)
         x = np.random.rand(*x_shape, dim)
         y = dist.bijector(x)
         assert y.shape == np.broadcast_shapes(dist.shape + (dim,), x.shape)
@@ -295,19 +289,19 @@ def test_bijector(self, dim, shape, mean_shape, cov_shape, diagonal_cov, x_shape
 class TestMixtureNormal(TestMultivariateNormal):
     cls = mixture_normal
 
-    def random(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov):
-        dist = super().random(dim, shape, mean_shape, cov_shape, diagonal_cov)
+    def random(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal):
+        dist = super().random(dim, shape, mean_shape, cov_shape, diagonal)
         logA = np.random.randn(*logA_shape)
         dist = mixture_normal(
-            logA, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal_cov
+            logA, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal
         )
         assert np.all(dist.logA == logA)
         assert dist.k == dist.shape[-1]
         return dist
 
-    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
-    def test_getitem(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov):
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal", tests)
+    def test_getitem(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal):
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal)
 
         if len(dist.shape) > 0:
             dist_2 = dist[0]
@@ -332,23 +326,23 @@ def test_getitem(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_c
             assert dist_2.dim == dim
 
     @pytest.mark.parametrize("size", sizes)
-    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal", tests)
     def test_logpdf(
-        self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov, size
+        self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal, size
     ):
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal)
         x = np.random.randn(*size, dim)
         logpdf = dist.logpdf(x)
         assert logpdf.shape == size + dist.shape[:-1]
 
-        assert_allclose(np.log(np.exp(logpdf)), dist.pdf(x))
+        assert_allclose(np.exp(logpdf), dist.pdf(x))
 
         logA = np.broadcast_to(dist.logA, dist.shape).reshape(-1, dist.k).copy()
         logA -= logsumexp(logA, axis=-1, keepdims=True)
         mean = np.broadcast_to(dist.mean, dist.shape + (dist.dim,)).reshape(
             -1, dist.k, dist.dim
         )
-        if dist.diagonal_cov:
+        if dist.diagonal:
             cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim,)).reshape(
                 -1, dist.k, dist.dim
             )
@@ -374,18 +368,18 @@ def test_logpdf(
         assert_allclose(logpdf, flat_logpdf)
 
     @pytest.mark.parametrize("size", sizes)
-    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal", tests)
     def test_rvs_shape(
-        self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov, size
+        self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal, size
     ):
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal)
         rvs = dist.rvs(size)
         assert rvs.shape == size + dist.shape[:-1] + (dim,)
 
-    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
-    def test_rvs(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov):
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal", tests)
+    def test_rvs(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal):
         size = 100
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal)
         rvs = dist.rvs(size)
         logA = np.broadcast_to(dist.logA, dist.shape).reshape(-1, dist.k).copy()
         logA -= logsumexp(logA, axis=-1, keepdims=True)
@@ -393,7 +387,7 @@ def test_rvs(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov):
         mean = np.broadcast_to(dist.mean, dist.shape + (dist.dim,)).reshape(
             -1, dist.k, dist.dim
         )
-        if dist.diagonal_cov:
+        if dist.diagonal:
             cov = np.broadcast_to(dist.cov, dist.shape + (dist.dim,)).reshape(
                 -1, dist.k, dist.dim
             )
@@ -418,7 +412,7 @@ def test_rvs(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov):
                 assert scipy.stats.kstest(a[:, i], b[:, i]).pvalue > pvalue
 
     @pytest.mark.parametrize(
-        "dim, shape, mean_shape, cov_shape, diagonal_cov, A_shape, diagonal_A, b_shape, k",
+        "dim, shape, mean_shape, cov_shape, diagonal, A_shape, diagonal_A, b_shape, k",
         A_tests,
     )
     def test_predict(
@@ -428,13 +422,13 @@ def test_predict(
         logA_shape,
         mean_shape,
         cov_shape,
-        diagonal_cov,
+        diagonal,
         A_shape,
         diagonal_A,
         b_shape,
         k,
     ):
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal)
 
         if b_shape == "scalar":
             b = np.random.randn()
@@ -455,8 +449,8 @@ def test_predict(
             np.shape(A)[: -2 + diagonal_A],
             np.shape(b)[:-1],
         )
-        assert np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov] == np.broadcast_shapes(
-            np.shape(dist.cov)[: -2 + diagonal_cov], np.shape(A)[: -2 + diagonal_A]
+        assert np.shape(dist_2.cov)[: -2 + dist_2.diagonal] == np.broadcast_shapes(
+            np.shape(dist.cov)[: -2 + diagonal], np.shape(A)[: -2 + diagonal_A]
         )
         assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
             np.shape(dist.mean)[:-1],
@@ -470,38 +464,34 @@ def test_predict(
         assert dist_2.shape == np.broadcast_shapes(
             dist.shape, np.shape(A)[: -2 + diagonal_A]
         )
-        assert np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov] == np.broadcast_shapes(
-            np.shape(dist.cov)[: -2 + diagonal_cov], np.shape(A)[: -2 + diagonal_A]
+        assert np.shape(dist_2.cov)[: -2 + dist_2.diagonal] == np.broadcast_shapes(
+            np.shape(dist.cov)[: -2 + diagonal], np.shape(A)[: -2 + diagonal_A]
         )
         assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
             np.shape(dist.mean)[:-1], np.shape(A)[: -2 + diagonal_A]
         )
         assert dist_2.dim == k
 
-    @pytest.mark.parametrize(
-        "dim, shape, mean_shape, cov_shape, diagonal_cov, p", p_tests
-    )
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal, p", p_tests)
     def test_marginalise(
-        self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov, p
+        self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal, p
     ):
         indices = np.random.choice(dim, p, replace=False)
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal)
         dist_2 = dist.marginalise(indices)
 
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == dist.shape
         assert (
-            np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov]
-            == np.shape(dist.cov)[: -2 + diagonal_cov]
+            np.shape(dist_2.cov)[: -2 + dist_2.diagonal]
+            == np.shape(dist.cov)[: -2 + diagonal]
         )
         assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
         assert np.shape(dist_2.logA) == np.shape(dist.logA)
         assert dist_2.dim == dim - p
 
     @pytest.mark.parametrize("values_shape", shapes)
-    @pytest.mark.parametrize(
-        "dim, shape, mean_shape, cov_shape, diagonal_cov, p", p_tests
-    )
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal, p", p_tests)
     def test_condition(
         self,
         dim,
@@ -509,38 +499,38 @@ def test_condition(
         logA_shape,
         mean_shape,
         cov_shape,
-        diagonal_cov,
+        diagonal,
         p,
         values_shape,
     ):
         indices = np.random.choice(dim, p, replace=False)
         values = np.random.randn(*values_shape[:-1], p)
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal)
         dist_2 = dist.condition(indices, values)
 
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == np.broadcast_shapes(dist.shape, values_shape[:-1] + (1,))
         assert (
-            np.shape(dist_2.cov)[: -2 + dist_2.diagonal_cov]
-            == np.shape(dist.cov)[: -2 + diagonal_cov]
+            np.shape(dist_2.cov)[: -2 + dist_2.diagonal]
+            == np.shape(dist.cov)[: -2 + diagonal]
         )
-        if cov_shape == "scalar" or diagonal_cov:
+        if cov_shape == "scalar" or diagonal:
             assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
         else:
             assert np.shape(dist_2.mean)[:-1] == np.broadcast_shapes(
                 np.shape(dist.mean)[:-1],
-                np.shape(dist.cov)[: -2 + diagonal_cov],
+                np.shape(dist.cov)[: -2 + diagonal],
                 values_shape[:-1] + (1,),
             )
         assert np.shape(dist_2.logA) == dist_2.shape
         assert dist_2.dim == dim - p
 
     @pytest.mark.parametrize("x_shape", shapes)
-    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal_cov", tests)
+    @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal", tests)
     def test_bijector(
-        self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov, x_shape
+        self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal, x_shape
     ):
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal_cov)
+        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal)
         x = np.random.rand(*x_shape[:-1], dim)
         y = dist.bijector(x)
         assert y.shape == np.broadcast_shapes(x.shape, dist.shape[:-1] + (dim,))

From 941e15121f8f749b8663ba26d4a5c25652190332 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Fri, 9 Feb 2024 11:42:11 +0000
Subject: [PATCH 064/117] Corrected docstrings for sphinx

---
 lsbi/stats.py       | 46 ++++++++++++++++++++++++++++++++++++++-------
 tests/test_stats.py |  9 ++++++---
 2 files changed, 45 insertions(+), 10 deletions(-)

diff --git a/lsbi/stats.py b/lsbi/stats.py
index e61c52f..4c75690 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -83,8 +83,10 @@ def logpdf(self, x, broadcast=False):
 
         Returns
         -------
-        logpdf : array_like, shape `(*size, *shape)` if broadcast is False else the consistent broadcast of size and shape.
+        logpdf : array_like
             Log of the probability density function evaluated at x.
+            if not broadcast: shape `(*size, *self.shape)`
+            else: shape broadcast of `(*size,) and `self.shape`
         """
         x = np.array(x)
         if broadcast:
@@ -113,8 +115,10 @@ def pdf(self, x, broadcast=False):
 
         Returns
         -------
-        pdf : array_like, shape `(*size, *shape)` if broadcast is False else the consistent broadcast of size and shape.
+        pdf : array_like
             Probability density function evaluated at x.
+            if not broadcast: shape `(*size, *self.shape)`
+            else: shape broadcast of `(*size,) and `self.shape`
         """
         return np.exp(self.logpdf(x, broadcast=broadcast))
 
@@ -390,14 +394,17 @@ def logpdf(self, x, broadcast=False, joint=False):
         broadcast : bool, optional, default=False
             If True, broadcast x across the distribution parameters.
 
+        joint : bool, optional, default=False
+            If True, return the joint logpdf of the mixture P(x, N)
+
         Returns
         -------
-        logpdf :
+        logpdf : array_like
             Log of the probability density function evaluated at x.
-            if not broadcast and not joint:
-                array_like, shape `(*size, *shape[:-1])`
-            elif broadcast and not joint.
-                array_like, shape the broadcast of `(*size,) and `shape[:-1]`
+            if not broadcast and not joint: shape `(*size, *shape[:-1])`
+            elif not broadcast and joint: shape `(*size, *shape)`
+            elif not joint: shape the broadcast of `(*size,) and `shape[:-1]`
+            else: shape the broadcast of `(*size,) and `shape`
         """
         if broadcast:
             x = np.expand_dims(x, -2)
@@ -410,6 +417,31 @@ def logpdf(self, x, broadcast=False, joint=False):
             return logpdf + logA
         return logsumexp(logpdf + logA, axis=-1)
 
+    def pdf(self, x, broadcast=False, joint=False):
+        """Probability density function.
+
+        Parameters
+        ----------
+        x : array_like, shape `(*size, dim)`
+            Points at which to evaluate the probability density function.
+
+        broadcast : bool, optional, default=False
+            If True, broadcast x across the distribution parameters.
+
+        joint : bool, optional, default=False
+            If True, return the joint pdf of the mixture P(x, N)
+
+        Returns
+        -------
+        pdf :
+            Probability density function evaluated at x.
+            if not broadcast and not joint: shape `(*size, *shape[:-1])`
+            elif not broadcast and joint: shape `(*size, *shape)`
+            elif not joint: shape the broadcast of `(*size,) and `shape[:-1]`
+            else: shape the broadcast of `(*size,) and `shape`
+        """
+        return np.exp(self.logpdf(x, broadcast=broadcast, joint=joint))
+
     def rvs(self, size=()):
         """Draw random samples from the distribution.
 
diff --git a/tests/test_stats.py b/tests/test_stats.py
index cec5643..25a9886 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -213,7 +213,7 @@ def test_predict(
         )
         assert dist_2.dim == k
 
-        dist_2 = dist.predict(A, diagonal_A=diagonal_A)
+        dist_2 = dist.predict(A, diagonal=diagonal_A)
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == np.broadcast_shapes(
             dist.shape, np.shape(A)[: -2 + diagonal_A]
@@ -296,7 +296,10 @@ def random(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal):
             logA, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal
         )
         assert np.all(dist.logA == logA)
-        assert dist.k == dist.shape[-1]
+        if dist.shape:
+            assert dist.k == dist.shape[-1]
+        else:
+            assert dist.k == 1
         return dist
 
     @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal", tests)
@@ -459,7 +462,7 @@ def test_predict(
         )
         assert dist_2.dim == k
 
-        dist_2 = dist.predict(A, diagonal_A=diagonal_A)
+        dist_2 = dist.predict(A, diagonal=diagonal_A)
         assert isinstance(dist_2, self.cls)
         assert dist_2.shape == np.broadcast_shapes(
             dist.shape, np.shape(A)[: -2 + diagonal_A]

From 32acdab27200d2fe5d33720ea126875392e37dd9 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Fri, 9 Feb 2024 11:44:39 +0000
Subject: [PATCH 065/117] bump version to 0.13.0

---
 README.rst       | 2 +-
 lsbi/_version.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index 8be0d84..7b4e4de 100644
--- a/README.rst
+++ b/README.rst
@@ -3,7 +3,7 @@ lsbi: Linear Simulation Based Inference
 =======================================
 :lsbi: Linear Simulation Based Inference
 :Author: Will Handley & David Yallup
-:Version: 0.12.0
+:Version: 0.13.0
 :Homepage: https://github.com/handley-lab/lsbi
 :Documentation: http://lsbi.readthedocs.io/
 
diff --git a/lsbi/_version.py b/lsbi/_version.py
index ea370a8..f23a6b3 100644
--- a/lsbi/_version.py
+++ b/lsbi/_version.py
@@ -1 +1 @@
-__version__ = "0.12.0"
+__version__ = "0.13.0"

From 0046a2526e5c0e18065a8a2f3092844620862c44 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Fri, 9 Feb 2024 12:10:31 +0000
Subject: [PATCH 066/117] Now testing model.k

---
 lsbi/model.py       | 4 +++-
 tests/test_model.py | 4 ++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index fbf8fff..38f46e5 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -314,7 +314,9 @@ def shape(self):
 
     @property
     def k(self):
-        """Number of mixture components of the distribution."""
+        """Number of mixture components."""
+        if self.shape == ():
+            return 1
         return self.shape[-1]
 
     def likelihood(self, theta):
diff --git a/tests/test_model.py b/tests/test_model.py
index 09f1ade..ebbf019 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -447,6 +447,10 @@ def random(
             diagonal_Sigma,
         )
         assert np.all(model.logA == logA)
+        if model.shape:
+            assert model.k == logA_shape[-1]
+        else:
+            assert model.k == 1
         return model
 
     @pytest.mark.parametrize("theta_shape", shapes)

From 11ba9c2ea3dce17cdacc2119ab5674d4b7d878de Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Fri, 9 Feb 2024 12:14:50 +0000
Subject: [PATCH 067/117] Now covering k

---
 lsbi/model.py       | 4 +++-
 tests/test_model.py | 4 ++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index fbf8fff..38f46e5 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -314,7 +314,9 @@ def shape(self):
 
     @property
     def k(self):
-        """Number of mixture components of the distribution."""
+        """Number of mixture components."""
+        if self.shape == ():
+            return 1
         return self.shape[-1]
 
     def likelihood(self, theta):
diff --git a/tests/test_model.py b/tests/test_model.py
index 09f1ade..c57a52a 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -447,6 +447,10 @@ def random(
             diagonal_Sigma,
         )
         assert np.all(model.logA == logA)
+        if model.shape:
+            assert model.k == model.shape[-1]
+        else:
+            assert model.k == 1
         return model
 
     @pytest.mark.parametrize("theta_shape", shapes)

From c84390e90d98f2f61b10442ec784e83a4f2b74b5 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Tue, 13 Feb 2024 07:21:14 +0000
Subject: [PATCH 068/117] Changed from np.choose to np.take_along_axis

---
 lsbi/stats.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/lsbi/stats.py b/lsbi/stats.py
index 4c75690..381031a 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -460,21 +460,22 @@ def rvs(self, size=()):
         logA -= logsumexp(logA, axis=-1, keepdims=True)
         p = np.exp(logA)
         cump = np.cumsum(p, axis=-1)
-        u = np.random.rand(*size, *p.shape[:-1])
+        u = np.random.rand(np.prod(size), *p.shape[:-1])
         i = np.argmax(np.array(u)[..., None] < cump, axis=-1)
         mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
-        mean = np.choose(i[..., None], np.moveaxis(mean, -2, 0))
-        x = np.random.randn(*size, *self.shape[:-1], self.dim)
+        mean = np.take_along_axis(np.moveaxis(mean, -2, 0), i[..., None], axis=0)
+        x = np.random.randn(np.prod(size), *self.shape[:-1], self.dim)
         if self.diagonal:
             L = np.sqrt(self.cov)
             L = np.broadcast_to(L, (*self.shape, self.dim))
-            L = np.choose(i[..., None], np.moveaxis(L, -2, 0))
-            return mean + L * x
+            L = np.take_along_axis(np.moveaxis(L, -2, 0), i[..., None], axis=0)
+            rvs = mean + L * x
         else:
             L = cholesky(self.cov)
             L = np.broadcast_to(L, (*self.shape, self.dim, self.dim))
-            L = np.choose(i[..., None, None], np.moveaxis(L, -3, 0))
-            return mean + np.einsum("...ij,...j->...i", L, x)
+            L = np.take_along_axis(np.moveaxis(L, -3, 0), i[..., None, None], axis=0)
+            rvs = mean + np.einsum("...ij,...j->...i", L, x)
+        return rvs.reshape(*size, *self.shape[:-1], self.dim)
 
     def condition(self, indices, values):
         """Condition on indices with values.

From 6bc0e22921472c04c64134666cfd80de78d4e580 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 14 Feb 2024 17:11:44 +0000
Subject: [PATCH 069/117] Added cleaner defaults

---
 lsbi/model.py | 14 +++++++-------
 lsbi/stats.py |  6 +++---
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index 38f46e5..43feb5d 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -60,11 +60,11 @@ class LinearModel(object):
 
     def __init__(
         self,
-        M=1,
-        m=0,
-        C=1,
-        mu=0,
-        Sigma=1,
+        M=1.0,
+        m=0.0,
+        C=1.0,
+        mu=0.0,
+        Sigma=1.0,
         shape=(),
         n=1,
         d=1,
@@ -303,8 +303,8 @@ class MixtureModel(LinearModel):
         Number of data dimensions, defaults to automatically inferred value
     """
 
-    def __init__(self, logA=1, *args):
-        super().__init__(*args)
+    def __init__(self, logA=1.0, *args, **kwargs):
+        super().__init__(*args, **kwargs)
         self.logA = logA
 
     @property
diff --git a/lsbi/stats.py b/lsbi/stats.py
index 381031a..44e683f 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -41,7 +41,7 @@ class multivariate_normal(object):
         If True, cov is interpreted as the diagonal of the covariance matrix.
     """
 
-    def __init__(self, mean=0, cov=1, shape=(), dim=0, diagonal=False):
+    def __init__(self, mean=0.0, cov=1.0, shape=(), dim=1, diagonal=False):
         self.mean = mean
         self.cov = cov
         self._shape = shape
@@ -142,7 +142,7 @@ def rvs(self, size=()):
         else:
             return self.mean + np.einsum("...jk,...k->...j", cholesky(self.cov), x)
 
-    def predict(self, A=1, b=0, diagonal=False):
+    def predict(self, A=1.0, b=0.0, diagonal=False):
         """Predict the mean and covariance of a linear transformation.
 
         if:         x ~ N(mu, Sigma)
@@ -366,7 +366,7 @@ class mixture_normal(multivariate_normal):
         If True, cov is interpreted as the diagonal of the covariance matrix.
     """
 
-    def __init__(self, logA=0, mean=0, cov=1, shape=(), dim=0, diagonal=False):
+    def __init__(self, logA=0.0, mean=0.0, cov=1.0, shape=(), dim=1, diagonal=False):
         self.logA = logA
         super().__init__(mean, cov, shape, dim, diagonal)
 

From 31924fe547516ffc6ff9daf387b438b00cd041a7 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 14 Feb 2024 17:34:44 +0000
Subject: [PATCH 070/117] Cursed greek letter option

---
 lsbi/model.py | 312 +++++++++++++++++++++++++-------------------------
 lsbi/stats.py |   4 +-
 2 files changed, 155 insertions(+), 161 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index 43feb5d..dd79bcc 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -10,17 +10,17 @@
 class LinearModel(object):
     """A multilinear model.
 
-    D|theta ~ N( m + M theta, C )
-    theta   ~ N( mu, Sigma )
+    D|θ ~ N( m + M θ, C )
+    θ   ~ N( μ, Σ )
 
     Defined by:
-        Parameters:       theta (..., n,)
-        Data:             D     (..., d,)
-        Model:            M     (..., d, n)
-        Prior mean:       mu    (..., n,)
-        Prior covariance: Sigma (..., n, n)
-        Data mean:        m     (..., d,)
-        Data covariance:  C     (..., d, d)
+        Parameters:       θ (..., n,)
+        Data:             D (..., d,)
+        Model:            M (..., d, n)
+        Prior mean:       μ (..., n,)
+        Prior covariance: Σ (..., n, n)
+        Data mean:        m (..., d,)
+        Data covariance:  C (..., d, d)
 
     where the ellipses indicate arbitrary (broadcastable) additional copies.
 
@@ -40,12 +40,12 @@ class LinearModel(object):
         if ndim==1: data covariance with vector diagonal for all components
         if ndim==0: scalar * identity matrix for all components
         Defaults to rectangular identity matrix
-    mu : array_like, optional
+    μ : array_like, optional
         if ndim>=1: prior means
         if ndim==0: scalar * unit vector for all components
         Defaults to 0 for all components
         Prior mean, defaults to zero vector
-    Sigma : array_like, optional
+    Σ : array_like, optional
         if ndim>=2: prior covariances
         if ndim==1: prior covariance with vector diagonal for all components
         if ndim==0: scalar * identity matrix for all components
@@ -63,14 +63,14 @@ def __init__(
         M=1.0,
         m=0.0,
         C=1.0,
-        mu=0.0,
-        Sigma=1.0,
+        μ=0.0,
+        Σ=1.0,
         shape=(),
         n=1,
         d=1,
         diagonal_M=False,
         diagonal_C=False,
-        diagonal_Sigma=False,
+        diagonal_Σ=False,
     ):
         self.M = M
         self.diagonal_M = diagonal_M
@@ -81,11 +81,11 @@ def __init__(
         self.diagonal_C = diagonal_C
         if len(np.shape(self.C)) < 2:
             self.diagonal_C = True
-        self.mu = mu
-        self.Sigma = Sigma
-        self.diagonal_Sigma = diagonal_Sigma
-        if len(np.shape(self.Sigma)) < 2:
-            self.diagonal_Sigma = True
+        self.μ = μ
+        self.Σ = Σ
+        self.diagonal_Σ = diagonal_Σ
+        if len(np.shape(self.Σ)) < 2:
+            self.diagonal_Σ = True
         self._shape = shape
         self._n = n
         self._d = d
@@ -97,8 +97,8 @@ def shape(self):
             np.shape(self.M)[: -2 + self.diagonal_M],
             np.shape(self.m)[:-1],
             np.shape(self.C)[: -2 + self.diagonal_C],
-            np.shape(self.mu)[:-1],
-            np.shape(self.Sigma)[: -2 + self.diagonal_Sigma],
+            np.shape(self.μ)[:-1],
+            np.shape(self.Σ)[: -2 + self.diagonal_Σ],
             self._shape,
         )
 
@@ -108,8 +108,8 @@ def n(self):
         return np.max(
             [
                 *np.shape(self.M)[len(np.shape(self.M)) - 1 + self.diagonal_M :],
-                *np.shape(self.Sigma)[-2 + self.diagonal_Sigma :],
-                *np.shape(self.mu)[-1:],
+                *np.shape(self.Σ)[-2 + self.diagonal_Σ :],
+                *np.shape(self.μ)[-1:],
                 self._n,
             ]
         )
@@ -126,33 +126,31 @@ def d(self):
             ]
         )
 
-    def likelihood(self, theta):
-        """P(D|theta) as a scipy distribution object.
+    def likelihood(self, θ):
+        """P(D|θ as a scipy distribution object.
 
-        D|theta ~ N( m + M theta, C )
-        theta   ~ N( mu, Sigma )
+        D|θ ~ N( m + M θ, C )
+        θ   ~ N( μ, Σ )
 
         Parameters
         ----------
-        theta : array_like, shape (k, n)
+        θ : array_like, shape (k, n)
         """
-        mu = self.m + np.einsum("...ja,...a->...j", self._M, theta)
-        return multivariate_normal(mu, self.C, self.shape, self.d, self.diagonal_C)
+        μ = self.m + np.einsum("...ja,...a->...j", self._M, θ)
+        return multivariate_normal(μ, self.C, self.shape, self.d, self.diagonal_C)
 
     def prior(self):
-        """P(theta) as a scipy distribution object.
+        """P(θ) as a scipy distribution object.
 
-        theta ~ N( mu, Sigma )
+        θ ~ N( μ, Σ )
         """
-        return multivariate_normal(
-            self.mu, self.Sigma, self.shape, self.n, self.diagonal_Sigma
-        )
+        return multivariate_normal(self.μ, self.Σ, self.shape, self.n, self.diagonal_Σ)
 
     def posterior(self, D):
-        """P(theta|D) as a scipy distribution object.
+        """P(θ|D) as a scipy distribution object.
 
-        theta|D ~ N( mu + S M'C^{-1}(D - m - M mu), S )
-        S = (Sigma^{-1} + M'C^{-1}M)^{-1}
+        θ|D ~ N( μ + S M'C^{-1}(D - m - M μ), S )
+        S = (Σ^{-1} + M'C^{-1}M)^{-1}
 
         Parameters
         ----------
@@ -161,82 +159,82 @@ def posterior(self, D):
         values = (
             D
             - self.m
-            - np.einsum("...ja,...a->...j", self._M, self.mu * np.ones(self.n))
+            - np.einsum("...ja,...a->...j", self._M, self.μ * np.ones(self.n))
         )
 
-        diagonal_Sigma = self.diagonal_C and self.diagonal_Sigma and self.diagonal_M
+        diagonal_Σ = self.diagonal_C and self.diagonal_Σ and self.diagonal_M
 
-        if diagonal_Sigma:
+        if diagonal_Σ:
             dim = min(self.n, self.d)
             shape = np.broadcast_shapes(self.shape, values.shape[:-1])
             C = np.atleast_1d(self.C)[..., :dim]
             M = np.atleast_1d(self.M)[..., :dim]
-            Sigma = np.broadcast_to(self.Sigma, shape + (self.n,)).copy()
-            Sigma[..., :dim] = 1 / (1 / Sigma[..., :dim] + M**2 / C)
+            Σ = np.broadcast_to(self.Σ, shape + (self.n,)).copy()
+            Σ[..., :dim] = 1 / (1 / Σ[..., :dim] + M**2 / C)
 
-            mu = np.broadcast_to(self.mu, shape + (self.n,)).copy()
-            mu[..., :dim] = mu[..., :dim] + Sigma[..., :dim] * M / C * values[..., :dim]
+            μ = np.broadcast_to(self.μ, shape + (self.n,)).copy()
+            μ[..., :dim] = μ[..., :dim] + Σ[..., :dim] * M / C * values[..., :dim]
         else:
             if self.diagonal_C:
                 invC = np.eye(self.d) / np.atleast_1d(self.C)[..., None, :]
             else:
                 invC = inv(self.C)
 
-            if self.diagonal_Sigma:
-                invSigma = np.eye(self.n) / np.atleast_1d(self.Sigma)[..., None, :]
+            if self.diagonal_Σ:
+                invΣ = np.eye(self.n) / np.atleast_1d(self.Σ)[..., None, :]
             else:
-                invSigma = inv(self.Sigma)
+                invΣ = inv(self.Σ)
 
-            Sigma = inv(
-                invSigma + np.einsum("...aj,...ab,...bk->...jk", self._M, invC, self._M)
+            Σ = inv(
+                invΣ + np.einsum("...aj,...ab,...bk->...jk", self._M, invC, self._M)
             )
-            mu = self.mu + np.einsum(
-                "...ja,...ba,...bc,...c->...j", Sigma, self._M, invC, values
+            μ = self.μ + np.einsum(
+                "...ja,...ba,...bc,...c->...j", Σ, self._M, invC, values
             )
 
-        return multivariate_normal(mu, Sigma, self.shape, self.n, diagonal_Sigma)
+        return multivariate_normal(μ, Σ, self.shape, self.n, diagonal_Σ)
 
     def evidence(self):
         """P(D) as a scipy distribution object.
 
-        D ~ N( m + M mu, C + M Sigma M' )
+        D ~ N( m + M μ, C + M Σ M' )
         """
-        mu = self.m + np.einsum("...ja,...a->...j", self._M, self.mu * np.ones(self.n))
-        diagonal_Sigma = self.diagonal_C and self.diagonal_Sigma and self.diagonal_M
+        μ = self.m + np.einsum("...ja,...a->...j", self._M, self.μ * np.ones(self.n))
+        diagonal_Σ = self.diagonal_C and self.diagonal_Σ and self.diagonal_M
 
-        if diagonal_Sigma:
+        if diagonal_Σ:
             dim = min(self.n, self.d)
             M = np.atleast_1d(self.M)[..., :dim]
-            S = np.atleast_1d(self.Sigma)[..., :dim]
-            Sigma = np.broadcast_to(self.C, self.shape + (self.d,)).copy()
-            Sigma[..., :dim] += S * M**2
+            S = np.atleast_1d(self.Σ)[..., :dim]
+            Σ = np.broadcast_to(self.C, self.shape + (self.d,)).copy()
+            Σ[..., :dim] += S * M**2
         else:
-            Sigma = self._C + np.einsum(
-                "...ja,...ab,...kb->...jk", self._M, self._Sigma, self._M
+            Σ = self._C + np.einsum(
+                "...ja,...ab,...kb->...jk", self._M, self._Σ, self._M
             )
 
-        return multivariate_normal(mu, Sigma, self.shape, self.d, diagonal_Sigma)
+        return multivariate_normal(μ, Σ, self.shape, self.d, diagonal_Σ)
 
     def joint(self):
-        """P(theta, D) as a scipy distribution object.
+        """P(θ, D) as a scipy distribution object.
 
-        [theta] ~ N( [   mu   ]   [ Sigma      Sigma M'   ] )
-        [  D  ]    ( [m + M mu] , [M Sigma  C + M Sigma M'] )
+        [θ] ~ N( [   μ   ]   [ Σ      Σ M'   ] )
+        [D]    ( [m + M μ] , [M Σ  C + M Σ M'] )
         """
         evidence = self.evidence()
         prior = self.prior()
         b = np.broadcast_to(prior.mean, self.shape + (self.n,))
         a = np.broadcast_to(evidence.mean, self.shape + (self.d,))
-        mu = np.block([b, a])
+        μ = np.block([b, a])
         A = dediagonalise(prior.cov, prior.diagonal, self.n)
         A = np.broadcast_to(A, self.shape + (self.n, self.n))
         D = dediagonalise(evidence.cov, evidence.diagonal, self.d)
         D = np.broadcast_to(D, self.shape + (self.d, self.d))
-        C = np.einsum("...ja,...al->...jl", self._M, self._Sigma)
+        C = np.einsum("...ja,...al->...jl", self._M, self._Σ)
         C = np.broadcast_to(C, self.shape + (self.d, self.n))
         B = np.moveaxis(C, -1, -2)
-        Sigma = np.block([[A, B], [C, D]])
-        return multivariate_normal(mu, Sigma, self.shape, self.n + self.d)
+        Σ = np.block([[A, B], [C, D]])
+        return multivariate_normal(μ, Σ, self.shape, self.n + self.d)
 
     @property
     def _M(self):
@@ -247,22 +245,22 @@ def _C(self):
         return dediagonalise(self.C, self.diagonal_C, self.d)
 
     @property
-    def _Sigma(self):
-        return dediagonalise(self.Sigma, self.diagonal_Sigma, self.n)
+    def _Σ(self):
+        return dediagonalise(self.Σ, self.diagonal_Σ, self.n)
 
 
 class MixtureModel(LinearModel):
     """A linear mixture model.
 
-    D|theta, A ~ N( m + M theta, C )
-    theta|A    ~ N( mu, Sigma )
+    D|θ, A ~ N( m + M θ, C )
+    θ|A    ~ N( μ, Σ )
     A          ~ categorical( exp(logA) )
 
     Defined by:
-        Parameters:          theta (..., n,)
+        Parameters:          θ     (..., n,)
         Data:                D     (..., d,)
-        Prior means:         mu    (..., k, n)
-        Prior covariances:   Sigma (..., k, n, n)
+        Prior means:         μ     (..., k, n)
+        Prior covariances:   Σ     (..., k, n, n)
         Data means:          m     (..., k, d)
         Data covariances:    C     (..., k, d, d)
         log mixture weights: logA  (..., k,)
@@ -283,12 +281,12 @@ class MixtureModel(LinearModel):
         if ndim==1: data covariance with vector diagonal for all components
         if scalar: scalar * identity matrix for all components
         Defaults to k copies of identity matrices
-    mu : array_like, optional
+    μ : array_like, optional
         if ndim>=1: prior means
         if scalar: scalar * unit vector for all components
         Defaults to 0 for all components
         Prior mean, defaults to zero vector
-    Sigma : array_like, optional
+    Σ : array_like, optional
         if ndim>=2: prior covariances
         if ndim==1: prior covariance with vector diagonal for all components
         if scalar: scalar * identity matrix for all components
@@ -319,26 +317,26 @@ def k(self):
             return 1
         return self.shape[-1]
 
-    def likelihood(self, theta):
-        """P(D|theta) as a scipy distribution object.
+    def likelihood(self, θ):
+        """P(D|θ) as a scipy distribution object.
 
-        D|theta,A ~ N( m + M theta, C )
-        theta|A   ~ N( mu, Sigma )
+        D|θ,A ~ N( m + M θ, C )
+        θ|A   ~ N( μ, Σ )
         A         ~ categorical(exp(logA))
 
         Parameters
         ----------
-        theta : array_like, shape (n,)
+        θ : array_like, shape (n,)
         """
-        dist = super().likelihood(np.expand_dims(theta, -2))
+        dist = super().likelihood(np.expand_dims(θ, -2))
         dist.__class__ = mixture_normal
-        dist.logA = self.prior().logpdf(theta, broadcast=True, joint=True)
+        dist.logA = self.prior().logpdf(θ, broadcast=True, joint=True)
         return dist
 
     def prior(self):
-        """P(theta) as a scipy distribution object.
+        """P(θ) as a scipy distribution object.
 
-        theta|A ~ N( mu, Sigma )
+        θ|A ~ N( μ, Σ )
         A       ~ categorical(exp(logA))
         """
         dist = super().prior()
@@ -347,12 +345,12 @@ def prior(self):
         return dist
 
     def posterior(self, D):
-        """P(theta|D) as a scipy distribution object.
+        """P(θ|D) as a scipy distribution object.
 
-        theta|D, A ~ N( mu + S M'C^{-1}(D - m - M mu), S )
-        D|A        ~ N( m + M mu, C + M Sigma M' )
+        θ|D, A ~ N( μ + S M'C^{-1}(D - m - M μ), S )
+        D|A        ~ N( m + M μ, C + M Σ M' )
         A          ~ categorical(exp(logA))
-        S = (Sigma^{-1} + M'C^{-1}M)^{-1}
+        S = (Σ^{-1} + M'C^{-1}M)^{-1}
 
         Parameters
         ----------
@@ -366,7 +364,7 @@ def posterior(self, D):
     def evidence(self):
         """P(D) as a scipy distribution object.
 
-        D|A ~ N( m + M mu, C + M Sigma M' )
+        D|A ~ N( m + M μ, C + M Σ M' )
         A   ~ categorical(exp(logA))
         """
         dist = super().evidence()
@@ -375,10 +373,10 @@ def evidence(self):
         return dist
 
     def joint(self):
-        """P(D, theta) as a scipy distribution object.
+        """P(D, θ) as a scipy distribution object.
 
-        [theta] | A ~ N( [   mu   ]   [ Sigma      Sigma M'   ] )
-        [  D  ] |      ( [m + M mu] , [M Sigma  C + M Sigma M'] )
+        [θ] | A ~ N( [   μ   ]   [ Σ      Σ M'   ] )
+        [  D  ] |      ( [m + M μ] , [M Σ  C + M Σ M'] )
 
         A           ~ categorical(exp(logA))
         """
@@ -395,83 +393,79 @@ class ReducedLinearModel(object):
     clear/efficient to phrase it in terms of a parameter covariance, parameter
     mean and peak value:
 
-    logL(theta) = logLmax - (theta - mu_L)' Sigma_L^{-1} (theta - mu_L)
+    logL(θ) = logLmax - (θ - μ_L)' Σ_L^{-1} (θ - μ_L)
 
     We can link this to a data-based model with the relations:
 
-    Sigma_L = (M' C^{-1} M)^{-1}
-    mu_L = Sigma_L M' C^{-1} (D-m)
+    Σ_L = (M' C^{-1} M)^{-1}
+    μ_L = Σ_L M' C^{-1} (D-m)
     logLmax =
     - log|2 pi C|/2 - (D-m)'C^{-1}(C - M (M' C^{-1} M)^{-1} M' )C^{-1}(D-m)/2
 
     Parameters
     ----------
-    mu_L : array_like
+    μ_L : array_like
         Likelihood peak
-    Sigma_L : array_like
+    Σ_L : array_like
         Likelihood covariance
     logLmax : float, optional
         Likelihood maximum, defaults to zero
-    mu_pi : array_like, optional
+    μ_pi : array_like, optional
         Prior mean, defaults to zero vector
-    Sigma_pi : array_like, optional
+    Σ_pi : array_like, optional
         Prior covariance, defaults to identity matrix
     """
 
     def __init__(self, *args, **kwargs):
-        self.mu_L = np.atleast_1d(kwargs.pop("mu_L"))
-        self.Sigma_L = np.atleast_2d(kwargs.pop("Sigma_L", None))
+        self.μ_L = np.atleast_1d(kwargs.pop("μ_L"))
+        self.Σ_L = np.atleast_2d(kwargs.pop("Σ_L", None))
         self.logLmax = kwargs.pop("logLmax", 0)
-        self.mu_pi = np.atleast_1d(kwargs.pop("mu_pi", np.zeros_like(self.mu_L)))
-        self.Sigma_pi = np.atleast_2d(kwargs.pop("Sigma_pi", np.eye(len(self.mu_pi))))
-        self.Sigma_P = inv(inv(self.Sigma_pi) + inv(self.Sigma_L))
-        self.mu_P = self.Sigma_P @ (
-            solve(self.Sigma_pi, self.mu_pi) + solve(self.Sigma_L, self.mu_L)
-        )
+        self.μ_pi = np.atleast_1d(kwargs.pop("μ_pi", np.zeros_like(self.μ_L)))
+        self.Σ_pi = np.atleast_2d(kwargs.pop("Σ_pi", np.eye(len(self.μ_pi))))
+        self.Σ_P = inv(inv(self.Σ_pi) + inv(self.Σ_L))
+        self.μ_P = self.Σ_P @ (solve(self.Σ_pi, self.μ_pi) + solve(self.Σ_L, self.μ_L))
 
     def prior(self):
-        """P(theta) as a scipy distribution object."""
-        return multivariate_normal(self.mu_pi, self.Sigma_pi)
+        """P(θ) as a scipy distribution object."""
+        return multivariate_normal(self.μ_pi, self.Σ_pi)
 
     def posterior(self):
-        """P(theta|D) as a scipy distribution object."""
-        return multivariate_normal(self.mu_P, self.Sigma_P)
+        """P(θ|D) as a scipy distribution object."""
+        return multivariate_normal(self.μ_P, self.Σ_P)
 
-    def logpi(self, theta):
-        """P(theta) as a scalar."""
-        return self.prior().logpdf(theta)
+    def logpi(self, θ):
+        """P(θ) as a scalar."""
+        return self.prior().logpdf(θ)
 
-    def logP(self, theta):
-        """P(theta|D) as a scalar."""
-        return self.posterior().logpdf(theta)
+    def logP(self, θ):
+        """P(θ|D) as a scalar."""
+        return self.posterior().logpdf(θ)
 
-    def logL(self, theta):
-        """P(D|theta) as a scalar."""
+    def logL(self, θ):
+        """P(D|θ) as a scalar."""
         return (
             self.logLmax
-            + multivariate_normal(self.mu_L, self.Sigma_L).logpdf(theta)
-            + logdet(2 * np.pi * self.Sigma_L) / 2
+            + multivariate_normal(self.μ_L, self.Σ_L).logpdf(θ)
+            + logdet(2 * np.pi * self.Σ_L) / 2
         )
 
     def logZ(self):
         """P(D) as a scalar."""
         return (
             self.logLmax
-            + logdet(self.Sigma_P) / 2
-            - logdet(self.Sigma_pi) / 2
-            - (self.mu_P - self.mu_pi)
-            @ solve(self.Sigma_pi, self.mu_P - self.mu_pi)
-            / 2
-            - (self.mu_P - self.mu_L) @ solve(self.Sigma_L, self.mu_P - self.mu_L) / 2
+            + logdet(self.Σ_P) / 2
+            - logdet(self.Σ_pi) / 2
+            - (self.μ_P - self.μ_pi) @ solve(self.Σ_pi, self.μ_P - self.μ_pi) / 2
+            - (self.μ_P - self.μ_L) @ solve(self.Σ_L, self.μ_P - self.μ_L) / 2
         )
 
     def DKL(self):
-        """D_KL(P(theta|D)||P(theta)) the Kullback-Leibler divergence."""
+        """D_KL(P(θ|D)||P(θ)) the Kullback-Leibler divergence."""
         return (
-            logdet(self.Sigma_pi)
-            - logdet(self.Sigma_P)
-            + np.trace(inv(self.Sigma_pi) @ self.Sigma_P - 1)
-            + (self.mu_P - self.mu_pi) @ solve(self.Sigma_pi, self.mu_P - self.mu_pi)
+            logdet(self.Σ_pi)
+            - logdet(self.Σ_P)
+            + np.trace(inv(self.Σ_pi) @ self.Σ_P - 1)
+            + (self.μ_P - self.μ_pi) @ solve(self.Σ_pi, self.μ_P - self.μ_pi)
         ) / 2
 
 
@@ -480,22 +474,22 @@ class ReducedLinearModelUniformPrior(object):
 
     Gaussian likelihood in the parameters
 
-    logL(theta) = logLmax - (theta - mu_L)' Sigma_L^{-1} (theta - mu_L)
+    logL(θ) = logLmax - (θ - μ_L)' Σ_L^{-1} (θ - μ_L)
 
     Uniform prior
 
     We can link this to a data-based model with the relations:
 
-    Sigma_L = (M' C^{-1} M)^{-1}
-    mu_L = Sigma_L M' C^{-1} (D-m)
+    Σ_L = (M' C^{-1} M)^{-1}
+    μ_L = Σ_L M' C^{-1} (D-m)
     logLmax =
     -log|2 pi C|/2 - (D-m)'C^{-1}(C - M (M' C^{-1} M)^{-1} M' )C^{-1}(D-m)/2
 
     Parameters
     ----------
-    mu_L : array_like
+    μ_L : array_like
         Likelihood peak
-    Sigma_L : array_like
+    Σ_L : array_like
         Likelihood covariance
     logLmax : float, optional
         Likelihood maximum, defaults to zero
@@ -504,37 +498,37 @@ class ReducedLinearModelUniformPrior(object):
     """
 
     def __init__(self, *args, **kwargs):
-        self.mu_L = np.atleast_1d(kwargs.pop("mu_L"))
-        self.Sigma_L = np.atleast_2d(kwargs.pop("Sigma_L"))
+        self.μ_L = np.atleast_1d(kwargs.pop("μ_L"))
+        self.Σ_L = np.atleast_2d(kwargs.pop("Σ_L"))
         self.logLmax = kwargs.pop("logLmax", 0)
         self.logV = kwargs.pop("logV", 0)
-        self.Sigma_P = self.Sigma_L
-        self.mu_P = self.mu_L
+        self.Σ_P = self.Σ_L
+        self.μ_P = self.μ_L
 
     def posterior(self):
-        """P(theta|D) as a scipy distribution object."""
-        return multivariate_normal(self.mu_P, self.Sigma_P)
+        """P(θ|D) as a scipy distribution object."""
+        return multivariate_normal(self.μ_P, self.Σ_P)
 
-    def logpi(self, theta):
-        """P(theta) as a scalar."""
+    def logpi(self, θ):
+        """P(θ) as a scalar."""
         return -self.logV
 
-    def logP(self, theta):
-        """P(theta|D) as a scalar."""
-        return self.posterior().logpdf(theta)
+    def logP(self, θ):
+        """P(θ|D) as a scalar."""
+        return self.posterior().logpdf(θ)
 
-    def logL(self, theta):
-        """P(D|theta) as a scalar."""
+    def logL(self, θ):
+        """P(D|θ) as a scalar."""
         return (
             self.logLmax
-            + logdet(2 * np.pi * self.Sigma_L) / 2
-            + multivariate_normal(self.mu_L, self.Sigma_L).logpdf(theta)
+            + logdet(2 * np.pi * self.Σ_L) / 2
+            + multivariate_normal(self.μ_L, self.Σ_L).logpdf(θ)
         )
 
     def logZ(self):
         """P(D) as a scalar."""
-        return self.logLmax + logdet(2 * np.pi * self.Sigma_P) / 2 - self.logV
+        return self.logLmax + logdet(2 * np.pi * self.Σ_P) / 2 - self.logV
 
     def DKL(self):
-        """D_KL(P(theta|D)||P(theta)) the Kullback-Leibler divergence."""
-        return self.logV - logdet(2 * np.pi * np.e * self.Sigma_P) / 2
+        """D_KL(P(θ|D)||P(θ)) the Kullback-Leibler divergence."""
+        return self.logV - logdet(2 * np.pi * np.e * self.Σ_P) / 2
diff --git a/lsbi/stats.py b/lsbi/stats.py
index 44e683f..3ff8060 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -145,8 +145,8 @@ def rvs(self, size=()):
     def predict(self, A=1.0, b=0.0, diagonal=False):
         """Predict the mean and covariance of a linear transformation.
 
-        if:         x ~ N(mu, Sigma)
-        then:  Ax + b ~ N(A mu + b, A Sigma A^T)
+        if:         x ~ N(μ, Σ)
+        then:  Ax + b ~ N(A μ + b, A Σ A^T)
 
         Parameters
         ----------

From deedd4e0df481be6014c2645013d78e41cdbbb70 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 14 Feb 2024 18:54:34 +0000
Subject: [PATCH 071/117] Added in aliases and logw

---
 lsbi/model.py       | 130 ++++++++++++++++++++++++++------------------
 lsbi/stats.py       |  28 +++++-----
 tests/test_model.py |  74 ++++++++++++++-----------
 tests/test_stats.py |  58 ++++++++++----------
 4 files changed, 161 insertions(+), 129 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index dd79bcc..59381c1 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -40,12 +40,12 @@ class LinearModel(object):
         if ndim==1: data covariance with vector diagonal for all components
         if ndim==0: scalar * identity matrix for all components
         Defaults to rectangular identity matrix
-    μ : array_like, optional
+    μ : (or mu) array_like, optional
         if ndim>=1: prior means
         if ndim==0: scalar * unit vector for all components
         Defaults to 0 for all components
         Prior mean, defaults to zero vector
-    Σ : array_like, optional
+    Σ : (or Sigma) array_like, optional
         if ndim>=2: prior covariances
         if ndim==1: prior covariance with vector diagonal for all components
         if ndim==0: scalar * identity matrix for all components
@@ -58,37 +58,54 @@ class LinearModel(object):
         Number of mixture components, defaults to automatically inferred value
     """
 
-    def __init__(
-        self,
-        M=1.0,
-        m=0.0,
-        C=1.0,
-        μ=0.0,
-        Σ=1.0,
-        shape=(),
-        n=1,
-        d=1,
-        diagonal_M=False,
-        diagonal_C=False,
-        diagonal_Σ=False,
-    ):
-        self.M = M
-        self.diagonal_M = diagonal_M
+    def __init__(self, *args, **kwargs):
+        self.M = kwargs.pop("M", 1.0)
+        self.diagonal_M = kwargs.pop("diagonal_M", False)
         if len(np.shape(self.M)) < 2:
             self.diagonal_M = True
-        self.m = m
-        self.C = C
-        self.diagonal_C = diagonal_C
+        self.m = kwargs.pop("m", 0.0)
+        self.C = kwargs.pop("C", 1.0)
+        self.diagonal_C = kwargs.pop("diagonal_C", False)
         if len(np.shape(self.C)) < 2:
             self.diagonal_C = True
-        self.μ = μ
-        self.Σ = Σ
-        self.diagonal_Σ = diagonal_Σ
+        self.μ = kwargs.pop("μ", 0.0)
+        self.μ = kwargs.pop("mu", self.μ)
+        self.Σ = kwargs.pop("Σ", 1.0)
+        self.Σ = kwargs.pop("Sigma", self.Σ)
+        self.diagonal_Σ = kwargs.pop("diagonal_Σ", False)
+        self.diagonal_Σ = kwargs.pop("diagonal_Sigma", self.diagonal_Σ)
         if len(np.shape(self.Σ)) < 2:
             self.diagonal_Σ = True
-        self._shape = shape
-        self._n = n
-        self._d = d
+        self._shape = kwargs.pop("shape", ())
+        self._n = kwargs.pop("n", 1)
+        self._d = kwargs.pop("d", 1)
+
+        if kwargs:
+            raise ValueError(f"Unrecognised arguments: {kwargs}")
+
+    @property
+    def Sigma(self):  # noqa: D102
+        return self.Σ
+
+    @Sigma.setter
+    def Sigma(self, value):
+        self.Σ = value
+
+    @property
+    def mu(self):  # noqa: D102
+        return self.μ
+
+    @mu.setter
+    def mu(self, value):
+        self.μ = value
+
+    @property
+    def diagonal_Sigma(self):  # noqa: D102
+        return self.diagonal_Σ
+
+    @diagonal_Sigma.setter
+    def diagonal_Sigma(self, value):
+        self.diagonal_Σ = value
 
     @property
     def shape(self):
@@ -126,6 +143,17 @@ def d(self):
             ]
         )
 
+    def model(self, θ):
+        """Model matrix M(θ) for a given parameter vector.
+
+        M(θ) = m + M θ
+
+        Parameters
+        ----------
+        θ : array_like, shape (..., n,)
+        """
+        return self.m + np.einsum("...ja,...a->...j", self._M, θ * np.ones(self.n))
+
     def likelihood(self, θ):
         """P(D|θ as a scipy distribution object.
 
@@ -136,7 +164,7 @@ def likelihood(self, θ):
         ----------
         θ : array_like, shape (k, n)
         """
-        μ = self.m + np.einsum("...ja,...a->...j", self._M, θ)
+        μ = self.model(θ)
         return multivariate_normal(μ, self.C, self.shape, self.d, self.diagonal_C)
 
     def prior(self):
@@ -156,11 +184,7 @@ def posterior(self, D):
         ----------
         D : array_like, shape (d,)
         """
-        values = (
-            D
-            - self.m
-            - np.einsum("...ja,...a->...j", self._M, self.μ * np.ones(self.n))
-        )
+        values = D - self.model(self.μ)
 
         diagonal_Σ = self.diagonal_C and self.diagonal_Σ and self.diagonal_M
 
@@ -199,9 +223,7 @@ def evidence(self):
 
         D ~ N( m + M μ, C + M Σ M' )
         """
-        μ = self.m + np.einsum("...ja,...a->...j", self._M, self.μ * np.ones(self.n))
         diagonal_Σ = self.diagonal_C and self.diagonal_Σ and self.diagonal_M
-
         if diagonal_Σ:
             dim = min(self.n, self.d)
             M = np.atleast_1d(self.M)[..., :dim]
@@ -212,7 +234,7 @@ def evidence(self):
             Σ = self._C + np.einsum(
                 "...ja,...ab,...kb->...jk", self._M, self._Σ, self._M
             )
-
+        μ = self.model(self.μ)
         return multivariate_normal(μ, Σ, self.shape, self.d, diagonal_Σ)
 
     def joint(self):
@@ -254,7 +276,7 @@ class MixtureModel(LinearModel):
 
     D|θ, A ~ N( m + M θ, C )
     θ|A    ~ N( μ, Σ )
-    A          ~ categorical( exp(logA) )
+    A          ~ categorical( exp(logw) )
 
     Defined by:
         Parameters:          θ     (..., n,)
@@ -263,7 +285,7 @@ class MixtureModel(LinearModel):
         Prior covariances:   Σ     (..., k, n, n)
         Data means:          m     (..., k, d)
         Data covariances:    C     (..., k, d, d)
-        log mixture weights: logA  (..., k,)
+        log mixture weights: logw  (..., k,)
 
     Parameters
     ----------
@@ -291,7 +313,7 @@ class MixtureModel(LinearModel):
         if ndim==1: prior covariance with vector diagonal for all components
         if scalar: scalar * identity matrix for all components
         Defaults to k copies of identity matrices
-    logA : array_like, optional
+    logw : array_like, optional
         if ndim>=1: log mixture weights
         if scalar: scalar * unit vector
         Defaults to uniform weights
@@ -301,14 +323,14 @@ class MixtureModel(LinearModel):
         Number of data dimensions, defaults to automatically inferred value
     """
 
-    def __init__(self, logA=1.0, *args, **kwargs):
+    def __init__(self, *args, **kwargs):
+        self.logw = kwargs.pop("logw", 0.0)
         super().__init__(*args, **kwargs)
-        self.logA = logA
 
     @property
     def shape(self):
         """Shape of the distribution."""
-        return np.broadcast_shapes(np.shape(self.logA), super().shape)
+        return np.broadcast_shapes(np.shape(self.logw), super().shape)
 
     @property
     def k(self):
@@ -320,9 +342,9 @@ def k(self):
     def likelihood(self, θ):
         """P(D|θ) as a scipy distribution object.
 
-        D|θ,A ~ N( m + M θ, C )
-        θ|A   ~ N( μ, Σ )
-        A         ~ categorical(exp(logA))
+        D|θ,w ~ N( m + M θ, C )
+        θ|w   ~ N( μ, Σ )
+        w     ~ categorical(exp(logw))
 
         Parameters
         ----------
@@ -330,18 +352,18 @@ def likelihood(self, θ):
         """
         dist = super().likelihood(np.expand_dims(θ, -2))
         dist.__class__ = mixture_normal
-        dist.logA = self.prior().logpdf(θ, broadcast=True, joint=True)
+        dist.logw = self.prior().logpdf(θ, broadcast=True, joint=True)
         return dist
 
     def prior(self):
         """P(θ) as a scipy distribution object.
 
         θ|A ~ N( μ, Σ )
-        A       ~ categorical(exp(logA))
+        A       ~ categorical(exp(logw))
         """
         dist = super().prior()
         dist.__class__ = mixture_normal
-        dist.logA = self.logA
+        dist.logw = self.logw
         return dist
 
     def posterior(self, D):
@@ -349,7 +371,7 @@ def posterior(self, D):
 
         θ|D, A ~ N( μ + S M'C^{-1}(D - m - M μ), S )
         D|A        ~ N( m + M μ, C + M Σ M' )
-        A          ~ categorical(exp(logA))
+        A          ~ categorical(exp(logw))
         S = (Σ^{-1} + M'C^{-1}M)^{-1}
 
         Parameters
@@ -358,18 +380,18 @@ def posterior(self, D):
         """
         dist = super().posterior(np.expand_dims(D, -2))
         dist.__class__ = mixture_normal
-        dist.logA = self.evidence().logpdf(D, broadcast=True, joint=True)
+        dist.logw = self.evidence().logpdf(D, broadcast=True, joint=True)
         return dist
 
     def evidence(self):
         """P(D) as a scipy distribution object.
 
         D|A ~ N( m + M μ, C + M Σ M' )
-        A   ~ categorical(exp(logA))
+        A   ~ categorical(exp(logw))
         """
         dist = super().evidence()
         dist.__class__ = mixture_normal
-        dist.logA = self.logA
+        dist.logw = self.logw
         return dist
 
     def joint(self):
@@ -378,11 +400,11 @@ def joint(self):
         [θ] | A ~ N( [   μ   ]   [ Σ      Σ M'   ] )
         [  D  ] |      ( [m + M μ] , [M Σ  C + M Σ M'] )
 
-        A           ~ categorical(exp(logA))
+        A           ~ categorical(exp(logw))
         """
         dist = super().joint()
         dist.__class__ = mixture_normal
-        dist.logA = self.logA
+        dist.logw = self.logw
         return dist
 
 
diff --git a/lsbi/stats.py b/lsbi/stats.py
index 3ff8060..96a283a 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -351,7 +351,7 @@ class mixture_normal(multivariate_normal):
     cov: array_like, shape `(..., n, dim, dim)`
         Covariance matrix of each component.
 
-    logA: array_like, shape `(..., n)`
+    logw: array_like, shape `(..., n)`
         Log of the mixing weights.
 
     shape: tuple, optional, default=()
@@ -366,14 +366,14 @@ class mixture_normal(multivariate_normal):
         If True, cov is interpreted as the diagonal of the covariance matrix.
     """
 
-    def __init__(self, logA=0.0, mean=0.0, cov=1.0, shape=(), dim=1, diagonal=False):
-        self.logA = logA
+    def __init__(self, logw=0.0, mean=0.0, cov=1.0, shape=(), dim=1, diagonal=False):
+        self.logw = logw
         super().__init__(mean, cov, shape, dim, diagonal)
 
     @property
     def shape(self):
         """Shape of the distribution."""
-        return np.broadcast_shapes(np.shape(self.logA), super().shape)
+        return np.broadcast_shapes(np.shape(self.logw), super().shape)
 
     @property
     def k(self):
@@ -411,11 +411,11 @@ def logpdf(self, x, broadcast=False, joint=False):
         logpdf = super().logpdf(x, broadcast=broadcast)
         if self.shape == ():
             return logpdf
-        logA = np.broadcast_to(self.logA, self.shape).copy()
-        logA -= logsumexp(logA, axis=-1, keepdims=True)
+        logw = np.broadcast_to(self.logw, self.shape).copy()
+        logw -= logsumexp(logw, axis=-1, keepdims=True)
         if joint:
-            return logpdf + logA
-        return logsumexp(logpdf + logA, axis=-1)
+            return logpdf + logw
+        return logsumexp(logpdf + logw, axis=-1)
 
     def pdf(self, x, broadcast=False, joint=False):
         """Probability density function.
@@ -456,9 +456,9 @@ def rvs(self, size=()):
         if self.shape == ():
             return super().rvs(size=size)
         size = np.atleast_1d(np.array(size, dtype=int))
-        logA = np.broadcast_to(self.logA, self.shape).copy()
-        logA -= logsumexp(logA, axis=-1, keepdims=True)
-        p = np.exp(logA)
+        logw = np.broadcast_to(self.logw, self.shape).copy()
+        logw -= logsumexp(logw, axis=-1, keepdims=True)
+        p = np.exp(logw)
         cump = np.cumsum(p, axis=-1)
         u = np.random.rand(np.prod(size), *p.shape[:-1])
         i = np.argmax(np.array(u)[..., None] < cump, axis=-1)
@@ -496,7 +496,7 @@ def condition(self, indices, values):
         dist = super().condition(indices, np.expand_dims(values, -2))
         dist.__class__ = mixture_normal
         marg = self.marginalise(self._bar(indices))
-        dist.logA = marg.logpdf(values, broadcast=True, joint=True)
+        dist.logw = marg.logpdf(values, broadcast=True, joint=True)
         return dist
 
     def bijector(self, x, inverse=False):
@@ -539,7 +539,7 @@ def bijector(self, x, inverse=False):
                 c = np.atleast_1d(dist.cov)[..., 0]
             else:
                 c = np.atleast_2d(dist.cov)[..., 0, 0]
-            A = np.exp(dist.logA - logsumexp(dist.logA, axis=-1)[..., None])
+            A = np.exp(dist.logw - logsumexp(dist.logw, axis=-1)[..., None])
             m = np.broadcast_to(m, dist.shape)
 
             def f(t):
@@ -563,5 +563,5 @@ def f(t):
     def __getitem__(self, arg):  # noqa: D105
         dist = super().__getitem__(arg)
         dist.__class__ = mixture_normal
-        dist.logA = np.broadcast_to(self.logA, self.shape)[arg]
+        dist.logw = np.broadcast_to(self.logw, self.shape)[arg]
         return dist
diff --git a/tests/test_model.py b/tests/test_model.py
index c57a52a..7807820 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -110,7 +110,17 @@ def random(
             Sigma = np.einsum("...ij,...kj->...ik", Sigma, Sigma) + n * np.eye(n)
 
         model = LinearModel(
-            M, m, C, mu, Sigma, shape, n, d, diagonal_M, diagonal_C, diagonal_Sigma
+            M=M,
+            m=m,
+            C=C,
+            mu=mu,
+            Sigma=Sigma,
+            shape=shape,
+            n=n,
+            d=d,
+            diagonal_M=diagonal_M,
+            diagonal_C=diagonal_C,
+            diagonal_Sigma=diagonal_Sigma,
         )
         assert model.d == d
         assert model.n == n
@@ -401,11 +411,11 @@ def test_bayes_theorem(
         )
 
 
-@pytest.mark.parametrize("logA_shape", shapes)
+@pytest.mark.parametrize("logw_shape", shapes)
 class TestMixtureModel(TestLinearModel):
     def random(
         self,
-        logA_shape,
+        logw_shape,
         M_shape,
         diagonal_M,
         m_shape,
@@ -431,22 +441,22 @@ def random(
             n,
             d,
         )
-        logA = np.random.randn(*logA_shape)
+        logw = np.random.randn(*logw_shape)
         model = MixtureModel(
-            logA,
-            model.M,
-            model.m,
-            model.C,
-            model.mu,
-            model.Sigma,
-            shape,
-            n,
-            d,
-            diagonal_M,
-            diagonal_C,
-            diagonal_Sigma,
-        )
-        assert np.all(model.logA == logA)
+            logw=logw,
+            M=model.M,
+            m=model.m,
+            C=model.C,
+            mu=model.mu,
+            Sigma=model.Sigma,
+            shape=shape,
+            n=n,
+            d=d,
+            diagonal_M=diagonal_M,
+            diagonal_C=diagonal_C,
+            diagonal_Sigma=diagonal_Sigma,
+        )
+        assert np.all(model.logw == logw)
         if model.shape:
             assert model.k == model.shape[-1]
         else:
@@ -457,7 +467,7 @@ def random(
     def test_likelihood(
         self,
         theta_shape,
-        logA_shape,
+        logw_shape,
         M_shape,
         diagonal_M,
         m_shape,
@@ -471,7 +481,7 @@ def test_likelihood(
         d,
     ):
         model = self.random(
-            logA_shape,
+            logw_shape,
             M_shape,
             diagonal_M,
             m_shape,
@@ -492,7 +502,7 @@ def test_likelihood(
 
     def test_prior(
         self,
-        logA_shape,
+        logw_shape,
         M_shape,
         diagonal_M,
         m_shape,
@@ -506,7 +516,7 @@ def test_prior(
         d,
     ):
         model = self.random(
-            logA_shape,
+            logw_shape,
             M_shape,
             diagonal_M,
             m_shape,
@@ -527,7 +537,7 @@ def test_prior(
     def test_posterior(
         self,
         D_shape,
-        logA_shape,
+        logw_shape,
         M_shape,
         diagonal_M,
         m_shape,
@@ -541,7 +551,7 @@ def test_posterior(
         d,
     ):
         model = self.random(
-            logA_shape,
+            logw_shape,
             M_shape,
             diagonal_M,
             m_shape,
@@ -562,7 +572,7 @@ def test_posterior(
 
     def test_evidence(
         self,
-        logA_shape,
+        logw_shape,
         M_shape,
         diagonal_M,
         m_shape,
@@ -576,7 +586,7 @@ def test_evidence(
         d,
     ):
         model = self.random(
-            logA_shape,
+            logw_shape,
             M_shape,
             diagonal_M,
             m_shape,
@@ -595,7 +605,7 @@ def test_evidence(
 
     def test_joint(
         self,
-        logA_shape,
+        logw_shape,
         M_shape,
         diagonal_M,
         m_shape,
@@ -609,7 +619,7 @@ def test_joint(
         d,
     ):
         model = self.random(
-            logA_shape,
+            logw_shape,
             M_shape,
             diagonal_M,
             m_shape,
@@ -628,7 +638,7 @@ def test_joint(
 
     def test_marginal_conditional(
         self,
-        logA_shape,
+        logw_shape,
         M_shape,
         diagonal_M,
         m_shape,
@@ -642,7 +652,7 @@ def test_marginal_conditional(
         d,
     ):
         model = self.random(
-            logA_shape,
+            logw_shape,
             M_shape,
             diagonal_M,
             m_shape,
@@ -700,7 +710,7 @@ def test_marginal_conditional(
 
     def test_bayes_theorem(
         self,
-        logA_shape,
+        logw_shape,
         M_shape,
         diagonal_M,
         m_shape,
@@ -716,7 +726,7 @@ def test_bayes_theorem(
         atol = 1e-5
 
         model = self.random(
-            logA_shape,
+            logw_shape,
             M_shape,
             diagonal_M,
             m_shape,
diff --git a/tests/test_stats.py b/tests/test_stats.py
index 25a9886..0a99ba3 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -285,17 +285,17 @@ def test_bijector(self, dim, shape, mean_shape, cov_shape, diagonal, x_shape):
         assert_allclose(np.broadcast_to(x, y.shape), dist.bijector(y, inverse=True))
 
 
-@pytest.mark.parametrize("logA_shape", shapes)
+@pytest.mark.parametrize("logw_shape", shapes)
 class TestMixtureNormal(TestMultivariateNormal):
     cls = mixture_normal
 
-    def random(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal):
+    def random(self, dim, shape, logw_shape, mean_shape, cov_shape, diagonal):
         dist = super().random(dim, shape, mean_shape, cov_shape, diagonal)
-        logA = np.random.randn(*logA_shape)
+        logw = np.random.randn(*logw_shape)
         dist = mixture_normal(
-            logA, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal
+            logw, dist.mean, dist.cov, dist.shape, dist.dim, dist.diagonal
         )
-        assert np.all(dist.logA == logA)
+        assert np.all(dist.logw == logw)
         if dist.shape:
             assert dist.k == dist.shape[-1]
         else:
@@ -303,8 +303,8 @@ def random(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal):
         return dist
 
     @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal", tests)
-    def test_getitem(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal):
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal)
+    def test_getitem(self, dim, shape, logw_shape, mean_shape, cov_shape, diagonal):
+        dist = self.random(dim, shape, logw_shape, mean_shape, cov_shape, diagonal)
 
         if len(dist.shape) > 0:
             dist_2 = dist[0]
@@ -331,17 +331,17 @@ def test_getitem(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal):
     @pytest.mark.parametrize("size", sizes)
     @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal", tests)
     def test_logpdf(
-        self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal, size
+        self, dim, shape, logw_shape, mean_shape, cov_shape, diagonal, size
     ):
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal)
+        dist = self.random(dim, shape, logw_shape, mean_shape, cov_shape, diagonal)
         x = np.random.randn(*size, dim)
         logpdf = dist.logpdf(x)
         assert logpdf.shape == size + dist.shape[:-1]
 
         assert_allclose(np.exp(logpdf), dist.pdf(x))
 
-        logA = np.broadcast_to(dist.logA, dist.shape).reshape(-1, dist.k).copy()
-        logA -= logsumexp(logA, axis=-1, keepdims=True)
+        logw = np.broadcast_to(dist.logw, dist.shape).reshape(-1, dist.k).copy()
+        logw -= logsumexp(logw, axis=-1, keepdims=True)
         mean = np.broadcast_to(dist.mean, dist.shape + (dist.dim,)).reshape(
             -1, dist.k, dist.dim
         )
@@ -364,7 +364,7 @@ def test_logpdf(
         flat_logpdf = np.array(
             [
                 logsumexp([la + d.logpdf(x) for la, d in zip(las, ds)], axis=0)
-                for las, ds in zip(logA, flat_dist)
+                for las, ds in zip(logw, flat_dist)
             ]
         )
         flat_logpdf = np.moveaxis(flat_logpdf, 0, -1).reshape(logpdf.shape)
@@ -373,20 +373,20 @@ def test_logpdf(
     @pytest.mark.parametrize("size", sizes)
     @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal", tests)
     def test_rvs_shape(
-        self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal, size
+        self, dim, shape, logw_shape, mean_shape, cov_shape, diagonal, size
     ):
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal)
+        dist = self.random(dim, shape, logw_shape, mean_shape, cov_shape, diagonal)
         rvs = dist.rvs(size)
         assert rvs.shape == size + dist.shape[:-1] + (dim,)
 
     @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal", tests)
-    def test_rvs(self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal):
+    def test_rvs(self, dim, shape, logw_shape, mean_shape, cov_shape, diagonal):
         size = 100
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal)
+        dist = self.random(dim, shape, logw_shape, mean_shape, cov_shape, diagonal)
         rvs = dist.rvs(size)
-        logA = np.broadcast_to(dist.logA, dist.shape).reshape(-1, dist.k).copy()
-        logA -= logsumexp(logA, axis=-1, keepdims=True)
-        p = np.exp(logA)
+        logw = np.broadcast_to(dist.logw, dist.shape).reshape(-1, dist.k).copy()
+        logw -= logsumexp(logw, axis=-1, keepdims=True)
+        p = np.exp(logw)
         mean = np.broadcast_to(dist.mean, dist.shape + (dist.dim,)).reshape(
             -1, dist.k, dist.dim
         )
@@ -422,7 +422,7 @@ def test_predict(
         self,
         dim,
         shape,
-        logA_shape,
+        logw_shape,
         mean_shape,
         cov_shape,
         diagonal,
@@ -431,7 +431,7 @@ def test_predict(
         b_shape,
         k,
     ):
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal)
+        dist = self.random(dim, shape, logw_shape, mean_shape, cov_shape, diagonal)
 
         if b_shape == "scalar":
             b = np.random.randn()
@@ -477,10 +477,10 @@ def test_predict(
 
     @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal, p", p_tests)
     def test_marginalise(
-        self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal, p
+        self, dim, shape, logw_shape, mean_shape, cov_shape, diagonal, p
     ):
         indices = np.random.choice(dim, p, replace=False)
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal)
+        dist = self.random(dim, shape, logw_shape, mean_shape, cov_shape, diagonal)
         dist_2 = dist.marginalise(indices)
 
         assert isinstance(dist_2, self.cls)
@@ -490,7 +490,7 @@ def test_marginalise(
             == np.shape(dist.cov)[: -2 + diagonal]
         )
         assert np.shape(dist_2.mean)[:-1] == np.shape(dist.mean)[:-1]
-        assert np.shape(dist_2.logA) == np.shape(dist.logA)
+        assert np.shape(dist_2.logw) == np.shape(dist.logw)
         assert dist_2.dim == dim - p
 
     @pytest.mark.parametrize("values_shape", shapes)
@@ -499,7 +499,7 @@ def test_condition(
         self,
         dim,
         shape,
-        logA_shape,
+        logw_shape,
         mean_shape,
         cov_shape,
         diagonal,
@@ -508,7 +508,7 @@ def test_condition(
     ):
         indices = np.random.choice(dim, p, replace=False)
         values = np.random.randn(*values_shape[:-1], p)
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal)
+        dist = self.random(dim, shape, logw_shape, mean_shape, cov_shape, diagonal)
         dist_2 = dist.condition(indices, values)
 
         assert isinstance(dist_2, self.cls)
@@ -525,15 +525,15 @@ def test_condition(
                 np.shape(dist.cov)[: -2 + diagonal],
                 values_shape[:-1] + (1,),
             )
-        assert np.shape(dist_2.logA) == dist_2.shape
+        assert np.shape(dist_2.logw) == dist_2.shape
         assert dist_2.dim == dim - p
 
     @pytest.mark.parametrize("x_shape", shapes)
     @pytest.mark.parametrize("dim, shape, mean_shape, cov_shape, diagonal", tests)
     def test_bijector(
-        self, dim, shape, logA_shape, mean_shape, cov_shape, diagonal, x_shape
+        self, dim, shape, logw_shape, mean_shape, cov_shape, diagonal, x_shape
     ):
-        dist = self.random(dim, shape, logA_shape, mean_shape, cov_shape, diagonal)
+        dist = self.random(dim, shape, logw_shape, mean_shape, cov_shape, diagonal)
         x = np.random.rand(*x_shape[:-1], dim)
         y = dist.bijector(x)
         assert y.shape == np.broadcast_shapes(x.shape, dist.shape[:-1] + (dim,))

From 56800366c6d5be3b9e9e56a792df0a3b9a99e892 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 14 Feb 2024 19:59:31 +0000
Subject: [PATCH 072/117] Reverted Reduced models for now

---
 lsbi/model.py | 110 ++++++++++++++++++++++++++------------------------
 1 file changed, 57 insertions(+), 53 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index 59381c1..73c0add 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -155,7 +155,7 @@ def model(self, θ):
         return self.m + np.einsum("...ja,...a->...j", self._M, θ * np.ones(self.n))
 
     def likelihood(self, θ):
-        """P(D|θ as a scipy distribution object.
+        """P(D|θ) as a distribution object.
 
         D|θ ~ N( m + M θ, C )
         θ   ~ N( μ, Σ )
@@ -168,14 +168,14 @@ def likelihood(self, θ):
         return multivariate_normal(μ, self.C, self.shape, self.d, self.diagonal_C)
 
     def prior(self):
-        """P(θ) as a scipy distribution object.
+        """P(θ) as a distribution object.
 
         θ ~ N( μ, Σ )
         """
         return multivariate_normal(self.μ, self.Σ, self.shape, self.n, self.diagonal_Σ)
 
     def posterior(self, D):
-        """P(θ|D) as a scipy distribution object.
+        """P(θ|D) as a distribution object.
 
         θ|D ~ N( μ + S M'C^{-1}(D - m - M μ), S )
         S = (Σ^{-1} + M'C^{-1}M)^{-1}
@@ -219,7 +219,7 @@ def posterior(self, D):
         return multivariate_normal(μ, Σ, self.shape, self.n, diagonal_Σ)
 
     def evidence(self):
-        """P(D) as a scipy distribution object.
+        """P(D) as a distribution object.
 
         D ~ N( m + M μ, C + M Σ M' )
         """
@@ -238,7 +238,7 @@ def evidence(self):
         return multivariate_normal(μ, Σ, self.shape, self.d, diagonal_Σ)
 
     def joint(self):
-        """P(θ, D) as a scipy distribution object.
+        """P(θ, D) as a distribution object.
 
         [θ] ~ N( [   μ   ]   [ Σ      Σ M'   ] )
         [D]    ( [m + M μ] , [M Σ  C + M Σ M'] )
@@ -340,7 +340,7 @@ def k(self):
         return self.shape[-1]
 
     def likelihood(self, θ):
-        """P(D|θ) as a scipy distribution object.
+        """P(D|θ) as a distribution object.
 
         D|θ,w ~ N( m + M θ, C )
         θ|w   ~ N( μ, Σ )
@@ -356,7 +356,7 @@ def likelihood(self, θ):
         return dist
 
     def prior(self):
-        """P(θ) as a scipy distribution object.
+        """P(θ) as a distribution object.
 
         θ|A ~ N( μ, Σ )
         A       ~ categorical(exp(logw))
@@ -367,7 +367,7 @@ def prior(self):
         return dist
 
     def posterior(self, D):
-        """P(θ|D) as a scipy distribution object.
+        """P(θ|D) as a distribution object.
 
         θ|D, A ~ N( μ + S M'C^{-1}(D - m - M μ), S )
         D|A        ~ N( m + M μ, C + M Σ M' )
@@ -384,7 +384,7 @@ def posterior(self, D):
         return dist
 
     def evidence(self):
-        """P(D) as a scipy distribution object.
+        """P(D) as a distribution object.
 
         D|A ~ N( m + M μ, C + M Σ M' )
         A   ~ categorical(exp(logw))
@@ -395,10 +395,10 @@ def evidence(self):
         return dist
 
     def joint(self):
-        """P(D, θ) as a scipy distribution object.
+        """P(D, θ) as a distribution object.
 
         [θ] | A ~ N( [   μ   ]   [ Σ      Σ M'   ] )
-        [  D  ] |      ( [m + M μ] , [M Σ  C + M Σ M'] )
+        [D] |      ( [m + M μ] , [M Σ  C + M Σ M'] )
 
         A           ~ categorical(exp(logw))
         """
@@ -415,45 +415,47 @@ class ReducedLinearModel(object):
     clear/efficient to phrase it in terms of a parameter covariance, parameter
     mean and peak value:
 
-    logL(θ) = logLmax - (θ - μ_L)' Σ_L^{-1} (θ - μ_L)
+    logL(θ) = logLmax - (θ - mu_L)' Sigma_L^{-1} (θ - mu_L)
 
     We can link this to a data-based model with the relations:
 
-    Σ_L = (M' C^{-1} M)^{-1}
-    μ_L = Σ_L M' C^{-1} (D-m)
+    Sigma_L = (M' C^{-1} M)^{-1}
+    mu_L = Sigma_L M' C^{-1} (D-m)
     logLmax =
     - log|2 pi C|/2 - (D-m)'C^{-1}(C - M (M' C^{-1} M)^{-1} M' )C^{-1}(D-m)/2
 
     Parameters
     ----------
-    μ_L : array_like
+    mu_L : array_like
         Likelihood peak
-    Σ_L : array_like
+    Sigma_L : array_like
         Likelihood covariance
     logLmax : float, optional
         Likelihood maximum, defaults to zero
-    μ_pi : array_like, optional
+    mmu_pi : array_like, optional
         Prior mean, defaults to zero vector
-    Σ_pi : array_like, optional
+    Sigma_pi : array_like, optional
         Prior covariance, defaults to identity matrix
     """
 
     def __init__(self, *args, **kwargs):
-        self.μ_L = np.atleast_1d(kwargs.pop("μ_L"))
-        self.Σ_L = np.atleast_2d(kwargs.pop("Σ_L", None))
+        self.mu_L = np.atleast_1d(kwargs.pop("mu_L"))
+        self.Sigma_L = np.atleast_2d(kwargs.pop("Sigma_L", None))
         self.logLmax = kwargs.pop("logLmax", 0)
-        self.μ_pi = np.atleast_1d(kwargs.pop("μ_pi", np.zeros_like(self.μ_L)))
-        self.Σ_pi = np.atleast_2d(kwargs.pop("Σ_pi", np.eye(len(self.μ_pi))))
-        self.Σ_P = inv(inv(self.Σ_pi) + inv(self.Σ_L))
-        self.μ_P = self.Σ_P @ (solve(self.Σ_pi, self.μ_pi) + solve(self.Σ_L, self.μ_L))
+        self.mu_pi = np.atleast_1d(kwargs.pop("mu_pi", np.zeros_like(self.mu_L)))
+        self.Sigma_pi = np.atleast_2d(kwargs.pop("Sigma_pi", np.eye(len(self.mu_pi))))
+        self.Sigma_P = inv(inv(self.Sigma_pi) + inv(self.Sigma_L))
+        self.mu_P = self.Sigma_P @ (
+            solve(self.Sigma_pi, self.mu_pi) + solve(self.Sigma_L, self.mu_L)
+        )
 
     def prior(self):
-        """P(θ) as a scipy distribution object."""
-        return multivariate_normal(self.μ_pi, self.Σ_pi)
+        """P(θ) as a distribution object."""
+        return multivariate_normal(self.mu_pi, self.Sigma_pi)
 
     def posterior(self):
-        """P(θ|D) as a scipy distribution object."""
-        return multivariate_normal(self.μ_P, self.Σ_P)
+        """P(θ|D) as a distribution object."""
+        return multivariate_normal(self.mu_P, self.Sigma_P)
 
     def logpi(self, θ):
         """P(θ) as a scalar."""
@@ -467,27 +469,29 @@ def logL(self, θ):
         """P(D|θ) as a scalar."""
         return (
             self.logLmax
-            + multivariate_normal(self.μ_L, self.Σ_L).logpdf(θ)
-            + logdet(2 * np.pi * self.Σ_L) / 2
+            + multivariate_normal(self.mu_L, self.Sigma_L).logpdf(θ)
+            + logdet(2 * np.pi * self.Sigma_L) / 2
         )
 
     def logZ(self):
         """P(D) as a scalar."""
         return (
             self.logLmax
-            + logdet(self.Σ_P) / 2
-            - logdet(self.Σ_pi) / 2
-            - (self.μ_P - self.μ_pi) @ solve(self.Σ_pi, self.μ_P - self.μ_pi) / 2
-            - (self.μ_P - self.μ_L) @ solve(self.Σ_L, self.μ_P - self.μ_L) / 2
+            + logdet(self.Sigma_P) / 2
+            - logdet(self.Sigma_pi) / 2
+            - (self.mu_P - self.mu_pi)
+            @ solve(self.Sigma_pi, self.mu_P - self.mu_pi)
+            / 2
+            - (self.mu_P - self.mu_L) @ solve(self.Sigma_L, self.mu_P - self.mu_L) / 2
         )
 
     def DKL(self):
         """D_KL(P(θ|D)||P(θ)) the Kullback-Leibler divergence."""
         return (
-            logdet(self.Σ_pi)
-            - logdet(self.Σ_P)
-            + np.trace(inv(self.Σ_pi) @ self.Σ_P - 1)
-            + (self.μ_P - self.μ_pi) @ solve(self.Σ_pi, self.μ_P - self.μ_pi)
+            logdet(self.Sigma_pi)
+            - logdet(self.Sigma_P)
+            + np.trace(inv(self.Sigma_pi) @ self.Sigma_P - 1)
+            + (self.mu_P - self.mu_pi) @ solve(self.Sigma_pi, self.mu_P - self.mu_pi)
         ) / 2
 
 
@@ -496,22 +500,22 @@ class ReducedLinearModelUniformPrior(object):
 
     Gaussian likelihood in the parameters
 
-    logL(θ) = logLmax - (θ - μ_L)' Σ_L^{-1} (θ - μ_L)
+    logL(θ) = logLmax - (θ - mu_L)' Sigma_L^{-1} (θ - mu_L)
 
     Uniform prior
 
     We can link this to a data-based model with the relations:
 
-    Σ_L = (M' C^{-1} M)^{-1}
-    μ_L = Σ_L M' C^{-1} (D-m)
+    Sigma_L = (M' C^{-1} M)^{-1}
+    mu_L = Sigma_L M' C^{-1} (D-m)
     logLmax =
     -log|2 pi C|/2 - (D-m)'C^{-1}(C - M (M' C^{-1} M)^{-1} M' )C^{-1}(D-m)/2
 
     Parameters
     ----------
-    μ_L : array_like
+    mu_L : array_like
         Likelihood peak
-    Σ_L : array_like
+    Sigma_L : array_like
         Likelihood covariance
     logLmax : float, optional
         Likelihood maximum, defaults to zero
@@ -520,16 +524,16 @@ class ReducedLinearModelUniformPrior(object):
     """
 
     def __init__(self, *args, **kwargs):
-        self.μ_L = np.atleast_1d(kwargs.pop("μ_L"))
-        self.Σ_L = np.atleast_2d(kwargs.pop("Σ_L"))
+        self.mu_L = np.atleast_1d(kwargs.pop("mu_L"))
+        self.Sigma_L = np.atleast_2d(kwargs.pop("Sigma_L"))
         self.logLmax = kwargs.pop("logLmax", 0)
         self.logV = kwargs.pop("logV", 0)
-        self.Σ_P = self.Σ_L
-        self.μ_P = self.μ_L
+        self.Sigma_P = self.Sigma_L
+        self.mu_P = self.mu_L
 
     def posterior(self):
-        """P(θ|D) as a scipy distribution object."""
-        return multivariate_normal(self.μ_P, self.Σ_P)
+        """P(θ|D) as a distribution object."""
+        return multivariate_normal(self.mu_P, self.Sigma_P)
 
     def logpi(self, θ):
         """P(θ) as a scalar."""
@@ -543,14 +547,14 @@ def logL(self, θ):
         """P(D|θ) as a scalar."""
         return (
             self.logLmax
-            + logdet(2 * np.pi * self.Σ_L) / 2
-            + multivariate_normal(self.μ_L, self.Σ_L).logpdf(θ)
+            + logdet(2 * np.pi * self.Sigma_L) / 2
+            + multivariate_normal(self.mu_L, self.Sigma_L).logpdf(θ)
         )
 
     def logZ(self):
         """P(D) as a scalar."""
-        return self.logLmax + logdet(2 * np.pi * self.Σ_P) / 2 - self.logV
+        return self.logLmax + logdet(2 * np.pi * self.Sigma_P) / 2 - self.logV
 
     def DKL(self):
         """D_KL(P(θ|D)||P(θ)) the Kullback-Leibler divergence."""
-        return self.logV - logdet(2 * np.pi * np.e * self.Σ_P) / 2
+        return self.logV - logdet(2 * np.pi * np.e * self.Sigma_P) / 2

From e39390823b7bf80dfce3950fe504a24fcbfe7c5c Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Thu, 15 Feb 2024 12:40:52 +0000
Subject: [PATCH 073/117] Improved alias functionality

---
 lsbi/model.py | 57 ++++++++++++++++-----------------------------------
 lsbi/utils.py | 40 ++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 39 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index 73c0add..6dca16d 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -4,7 +4,7 @@
 from numpy.linalg import inv, solve
 
 from lsbi.stats import mixture_normal, multivariate_normal
-from lsbi.utils import dediagonalise, logdet
+from lsbi.utils import alias, dediagonalise, logdet
 
 
 class LinearModel(object):
@@ -83,30 +83,6 @@ def __init__(self, *args, **kwargs):
         if kwargs:
             raise ValueError(f"Unrecognised arguments: {kwargs}")
 
-    @property
-    def Sigma(self):  # noqa: D102
-        return self.Σ
-
-    @Sigma.setter
-    def Sigma(self, value):
-        self.Σ = value
-
-    @property
-    def mu(self):  # noqa: D102
-        return self.μ
-
-    @mu.setter
-    def mu(self, value):
-        self.μ = value
-
-    @property
-    def diagonal_Sigma(self):  # noqa: D102
-        return self.diagonal_Σ
-
-    @diagonal_Sigma.setter
-    def diagonal_Sigma(self, value):
-        self.diagonal_Σ = value
-
     @property
     def shape(self):
         """Shape of the distribution."""
@@ -271,12 +247,17 @@ def _Σ(self):
         return dediagonalise(self.Σ, self.diagonal_Σ, self.n)
 
 
+alias(LinearModel, "μ", "mu")
+alias(LinearModel, "Σ", "Sigma")
+alias(LinearModel, "diagonal_Σ", "diagonal_Sigma")
+
+
 class MixtureModel(LinearModel):
     """A linear mixture model.
 
-    D|θ, A ~ N( m + M θ, C )
-    θ|A    ~ N( μ, Σ )
-    A          ~ categorical( exp(logw) )
+    D|θ, w ~ N( m + M θ, C )
+    θ|w    ~ N( μ, Σ )
+    w          ~ categorical( exp(logw) )
 
     Defined by:
         Parameters:          θ     (..., n,)
@@ -343,8 +324,7 @@ def likelihood(self, θ):
         """P(D|θ) as a distribution object.
 
         D|θ,w ~ N( m + M θ, C )
-        θ|w   ~ N( μ, Σ )
-        w     ~ categorical(exp(logw))
+        w|θ   ~ categorical(...)
 
         Parameters
         ----------
@@ -358,8 +338,8 @@ def likelihood(self, θ):
     def prior(self):
         """P(θ) as a distribution object.
 
-        θ|A ~ N( μ, Σ )
-        A       ~ categorical(exp(logw))
+        θ|w ~ N( μ, Σ )
+        w   ~ categorical(exp(logw))
         """
         dist = super().prior()
         dist.__class__ = mixture_normal
@@ -369,9 +349,8 @@ def prior(self):
     def posterior(self, D):
         """P(θ|D) as a distribution object.
 
-        θ|D, A ~ N( μ + S M'C^{-1}(D - m - M μ), S )
-        D|A        ~ N( m + M μ, C + M Σ M' )
-        A          ~ categorical(exp(logw))
+        θ|D, w ~ N( μ + S M'C^{-1}(D - m - M μ), S )
+        w|D    ~ categorical(...)
         S = (Σ^{-1} + M'C^{-1}M)^{-1}
 
         Parameters
@@ -386,8 +365,8 @@ def posterior(self, D):
     def evidence(self):
         """P(D) as a distribution object.
 
-        D|A ~ N( m + M μ, C + M Σ M' )
-        A   ~ categorical(exp(logw))
+        D|w ~ N( m + M μ, C + M Σ M' )
+        w   ~ categorical(exp(logw))
         """
         dist = super().evidence()
         dist.__class__ = mixture_normal
@@ -397,10 +376,10 @@ def evidence(self):
     def joint(self):
         """P(D, θ) as a distribution object.
 
-        [θ] | A ~ N( [   μ   ]   [ Σ      Σ M'   ] )
+        [θ] | w ~ N( [   μ   ]   [ Σ      Σ M'   ] )
         [D] |      ( [m + M μ] , [M Σ  C + M Σ M'] )
 
-        A           ~ categorical(exp(logw))
+        w           ~ categorical(exp(logw))
         """
         dist = super().joint()
         dist.__class__ = mixture_normal
diff --git a/lsbi/utils.py b/lsbi/utils.py
index 24082e8..b9f0f25 100644
--- a/lsbi/utils.py
+++ b/lsbi/utils.py
@@ -64,3 +64,43 @@ def dediagonalise(x, diagonal, *args):
         return np.atleast_1d(x)[..., None, :] * np.eye(*args)
     else:
         return x
+
+
+def alias(cls, name, alias):
+    """Create an alias for a property.
+
+    Parameters
+    ----------
+    cls : class
+        Class to add the alias to.
+    name : str
+        Name of the property to alias.
+    alias : str
+        Name of the alias.
+
+    Examples
+    --------
+    >>> class MyCls:
+    ...     def __init__(self, name):
+    ...         self.name = name
+    ...
+    >>> alias(MyCls, 'name', 'n')
+    >>> obj = MyCls('will')
+    >>> obj.name
+    'will'
+    >>> obj.n
+    'will'
+    >>> obj.n = 'bill'
+    >>> obj.name
+    'bill'
+    """
+
+    @property
+    def f(self):
+        return getattr(self, name)
+
+    @f.setter
+    def f(self, x):
+        setattr(self, name, x)
+
+    setattr(cls, alias, f)

From d6c3996769e76499ef24769a326aa291b49461d7 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Thu, 15 Feb 2024 16:22:40 +0000
Subject: [PATCH 074/117] Added a test for alias and model kwargs

---
 tests/test_model.py |  4 ++++
 tests/test_utils.py | 24 +++++++++++++++++++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/tests/test_model.py b/tests/test_model.py
index 7807820..4c37a98 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -57,6 +57,10 @@ def assert_allclose_broadcast(a, b, *args, **kwargs):
                                 tests.append(test)
 
 
+def test_linear_model_init(object):
+    with pytest.raises(ValueError):
+        LinearModel(foo='bar')
+
 @pytest.mark.parametrize(
     "d,n,shape,m_shape,M_shape,mu_shape,C_shape,Sigma_shape,diagonal_Sigma,diagonal_C,diagonal_M",
     tests,
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 3123965..4c7a066 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,7 +1,8 @@
 import pytest
 from numpy.testing import assert_allclose
 
-from lsbi.utils import bisect
+from lsbi.utils import bisect, alias
+import numpy as np
 
 
 def test_bisect():
@@ -17,3 +18,24 @@ def f(x):
         return x - [1, 2]
 
     assert_allclose(bisect(f, 0, 10), [1, 2])
+
+
+def test_alias():
+    class A:
+        pass
+
+    a = A()
+    with pytest.raises(AttributeError):
+        a.x
+    a.x = 1
+    assert a.x == 1
+    with pytest.raises(AttributeError):
+        a.y
+    alias(A, "x", "y")
+    assert a.y == 1
+    a.y = 2
+    assert a.x == 2
+    a.x = np.eye(3)
+    assert_allclose(a.y, np.eye(3))
+    a.y[0, 0] = 0
+    assert a.x[0, 0] == 0

From 0bb90092ef0d567cff160448cf82680b11b290d3 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Thu, 15 Feb 2024 17:21:47 +0000
Subject: [PATCH 075/117] Corrected black

---
 tests/test_model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/test_model.py b/tests/test_model.py
index 4c37a98..e428ee5 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -59,7 +59,8 @@ def assert_allclose_broadcast(a, b, *args, **kwargs):
 
 def test_linear_model_init(object):
     with pytest.raises(ValueError):
-        LinearModel(foo='bar')
+        LinearModel(foo="bar")
+
 
 @pytest.mark.parametrize(
     "d,n,shape,m_shape,M_shape,mu_shape,C_shape,Sigma_shape,diagonal_Sigma,diagonal_C,diagonal_M",

From 448cb05bfc698f59169f611403ea8cbb9bb2f563 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Thu, 15 Feb 2024 17:23:07 +0000
Subject: [PATCH 076/117] Corrected isort

---
 tests/test_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_utils.py b/tests/test_utils.py
index 4c7a066..f2195e7 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,8 +1,8 @@
+import numpy as np
 import pytest
 from numpy.testing import assert_allclose
 
-from lsbi.utils import bisect, alias
-import numpy as np
+from lsbi.utils import alias, bisect
 
 
 def test_bisect():

From 046174f78fe3160c987960e55f475084920f47cd Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Thu, 15 Feb 2024 18:08:54 +0000
Subject: [PATCH 077/117] Corrected test_linear_model_init

---
 tests/test_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_model.py b/tests/test_model.py
index e428ee5..192d9e9 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -57,7 +57,7 @@ def assert_allclose_broadcast(a, b, *args, **kwargs):
                                 tests.append(test)
 
 
-def test_linear_model_init(object):
+def test_linear_model_init():
     with pytest.raises(ValueError):
         LinearModel(foo="bar")
 

From 03f4e05c258d51417919f2fb06827361e2c0a821 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Fri, 16 Feb 2024 10:00:56 +0000
Subject: [PATCH 078/117] Corrections of conversions

---
 lsbi/model.py | 70 ++++++++++++++++++++++++++++++++++++++++-----------
 lsbi/stats.py | 14 ++++++-----
 2 files changed, 64 insertions(+), 20 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index 6dca16d..9962eb2 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -1,5 +1,7 @@
 """Gaussian models for linear Bayesian inference."""
 
+import copy
+
 import numpy as np
 from numpy.linalg import inv, solve
 
@@ -59,18 +61,18 @@ class LinearModel(object):
     """
 
     def __init__(self, *args, **kwargs):
-        self.M = kwargs.pop("M", 1.0)
+        self.M = kwargs.pop("M", 1)
         self.diagonal_M = kwargs.pop("diagonal_M", False)
         if len(np.shape(self.M)) < 2:
             self.diagonal_M = True
-        self.m = kwargs.pop("m", 0.0)
-        self.C = kwargs.pop("C", 1.0)
+        self.m = kwargs.pop("m", 0)
+        self.C = kwargs.pop("C", 1)
         self.diagonal_C = kwargs.pop("diagonal_C", False)
         if len(np.shape(self.C)) < 2:
             self.diagonal_C = True
-        self.μ = kwargs.pop("μ", 0.0)
+        self.μ = kwargs.pop("μ", 0)
         self.μ = kwargs.pop("mu", self.μ)
-        self.Σ = kwargs.pop("Σ", 1.0)
+        self.Σ = kwargs.pop("Σ", 1)
         self.Σ = kwargs.pop("Sigma", self.Σ)
         self.diagonal_Σ = kwargs.pop("diagonal_Σ", False)
         self.diagonal_Σ = kwargs.pop("diagonal_Sigma", self.diagonal_Σ)
@@ -169,10 +171,10 @@ def posterior(self, D):
             shape = np.broadcast_shapes(self.shape, values.shape[:-1])
             C = np.atleast_1d(self.C)[..., :dim]
             M = np.atleast_1d(self.M)[..., :dim]
-            Σ = np.broadcast_to(self.Σ, shape + (self.n,)).copy()
+            Σ = self.Σ * np.ones((*shape, self.n))
             Σ[..., :dim] = 1 / (1 / Σ[..., :dim] + M**2 / C)
 
-            μ = np.broadcast_to(self.μ, shape + (self.n,)).copy()
+            μ = self.μ * np.ones((*shape, self.n))
             μ[..., :dim] = μ[..., :dim] + Σ[..., :dim] * M / C * values[..., :dim]
         else:
             if self.diagonal_C:
@@ -204,8 +206,13 @@ def evidence(self):
             dim = min(self.n, self.d)
             M = np.atleast_1d(self.M)[..., :dim]
             S = np.atleast_1d(self.Σ)[..., :dim]
-            Σ = np.broadcast_to(self.C, self.shape + (self.d,)).copy()
-            Σ[..., :dim] += S * M**2
+            Σ = self.C * np.ones(
+                (
+                    *self.shape,
+                    self.d,
+                )
+            )
+            Σ[..., :dim] = Σ[..., :dim] + S * M**2
         else:
             Σ = self._C + np.einsum(
                 "...ja,...ab,...kb->...jk", self._M, self._Σ, self._M
@@ -234,6 +241,24 @@ def joint(self):
         Σ = np.block([[A, B], [C, D]])
         return multivariate_normal(μ, Σ, self.shape, self.n + self.d)
 
+    def update(self, D, inplace=False):
+        """Bayesian update of the model with data.
+
+        Parameters
+        ----------
+        D : array_like, shape (..., d)
+        """
+        dist = copy.deepcopy(self) if not inplace else self
+        posterior = self.posterior(D)
+        dist.μ = posterior.mean
+        dist.Σ = posterior.cov
+        if not inplace:
+            return dist
+
+    def PPD(self, D0):
+        """P(D|D0) as a distribution object."""
+        return self.update(D0).evidence()
+
     @property
     def _M(self):
         return dediagonalise(self.M, self.diagonal_M, self.d, self.n)
@@ -257,7 +282,7 @@ class MixtureModel(LinearModel):
 
     D|θ, w ~ N( m + M θ, C )
     θ|w    ~ N( μ, Σ )
-    w          ~ categorical( exp(logw) )
+    w      ~ categorical( exp(logw) )
 
     Defined by:
         Parameters:          θ     (..., n,)
@@ -305,7 +330,7 @@ class MixtureModel(LinearModel):
     """
 
     def __init__(self, *args, **kwargs):
-        self.logw = kwargs.pop("logw", 0.0)
+        self.logw = kwargs.pop("logw", 0)
         super().__init__(*args, **kwargs)
 
     @property
@@ -332,7 +357,8 @@ def likelihood(self, θ):
         """
         dist = super().likelihood(np.expand_dims(θ, -2))
         dist.__class__ = mixture_normal
-        dist.logw = self.prior().logpdf(θ, broadcast=True, joint=True)
+        prior = self.prior()
+        dist.logw = prior.logpdf(θ, broadcast=True, joint=True) - prior.logpdf(θ)
         return dist
 
     def prior(self):
@@ -350,7 +376,7 @@ def posterior(self, D):
         """P(θ|D) as a distribution object.
 
         θ|D, w ~ N( μ + S M'C^{-1}(D - m - M μ), S )
-        w|D    ~ categorical(...)
+        w|D    ~ P(D|w)P(w)/P(D)
         S = (Σ^{-1} + M'C^{-1}M)^{-1}
 
         Parameters
@@ -359,7 +385,8 @@ def posterior(self, D):
         """
         dist = super().posterior(np.expand_dims(D, -2))
         dist.__class__ = mixture_normal
-        dist.logw = self.evidence().logpdf(D, broadcast=True, joint=True)
+        evidence = self.evidence()
+        dist.logw = evidence.logpdf(D, broadcast=True, joint=True) - evidence.logpdf(D)
         return dist
 
     def evidence(self):
@@ -386,6 +413,21 @@ def joint(self):
         dist.logw = self.logw
         return dist
 
+    def update(self, D, inplace=False):
+        """Bayesian update of the model with data.
+
+        Parameters
+        ----------
+        D : array_like, shape (..., d)
+        """
+        dist = copy.deepcopy(self) if not inplace else self
+        posterior = self.posterior(D)
+        dist.μ = posterior.mean
+        dist.Σ = posterior.cov
+        dist.logw = posterior.logw
+        if not inplace:
+            return dist
+
 
 class ReducedLinearModel(object):
     """A model with no data.
diff --git a/lsbi/stats.py b/lsbi/stats.py
index 96a283a..7531b37 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -41,7 +41,7 @@ class multivariate_normal(object):
         If True, cov is interpreted as the diagonal of the covariance matrix.
     """
 
-    def __init__(self, mean=0.0, cov=1.0, shape=(), dim=1, diagonal=False):
+    def __init__(self, mean=0, cov=1, shape=(), dim=1, diagonal=False):
         self.mean = mean
         self.cov = cov
         self._shape = shape
@@ -142,7 +142,7 @@ def rvs(self, size=()):
         else:
             return self.mean + np.einsum("...jk,...k->...j", cholesky(self.cov), x)
 
-    def predict(self, A=1.0, b=0.0, diagonal=False):
+    def predict(self, A=1, b=0, diagonal=False):
         """Predict the mean and covariance of a linear transformation.
 
         if:         x ~ N(μ, Σ)
@@ -366,7 +366,7 @@ class mixture_normal(multivariate_normal):
         If True, cov is interpreted as the diagonal of the covariance matrix.
     """
 
-    def __init__(self, logw=0.0, mean=0.0, cov=1.0, shape=(), dim=1, diagonal=False):
+    def __init__(self, logw=0, mean=0, cov=1, shape=(), dim=1, diagonal=False):
         self.logw = logw
         super().__init__(mean, cov, shape, dim, diagonal)
 
@@ -412,7 +412,7 @@ def logpdf(self, x, broadcast=False, joint=False):
         if self.shape == ():
             return logpdf
         logw = np.broadcast_to(self.logw, self.shape).copy()
-        logw -= logsumexp(logw, axis=-1, keepdims=True)
+        logw = logw - logsumexp(logw, axis=-1, keepdims=True)
         if joint:
             return logpdf + logw
         return logsumexp(logpdf + logw, axis=-1)
@@ -457,7 +457,7 @@ def rvs(self, size=()):
             return super().rvs(size=size)
         size = np.atleast_1d(np.array(size, dtype=int))
         logw = np.broadcast_to(self.logw, self.shape).copy()
-        logw -= logsumexp(logw, axis=-1, keepdims=True)
+        logw = logw - logsumexp(logw, axis=-1, keepdims=True)
         p = np.exp(logw)
         cump = np.cumsum(p, axis=-1)
         u = np.random.rand(np.prod(size), *p.shape[:-1])
@@ -496,7 +496,9 @@ def condition(self, indices, values):
         dist = super().condition(indices, np.expand_dims(values, -2))
         dist.__class__ = mixture_normal
         marg = self.marginalise(self._bar(indices))
-        dist.logw = marg.logpdf(values, broadcast=True, joint=True)
+        dist.logw = marg.logpdf(values, broadcast=True, joint=True) - marg.logpdf(
+            values, broadcast=True
+        )
         return dist
 
     def bijector(self, x, inverse=False):

From 1bb3d6b9152875e2003f81c1066c1047af190026 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Fri, 16 Feb 2024 12:27:43 +0000
Subject: [PATCH 079/117] Fixed normalisation

---
 lsbi/model.py | 7 +++++--
 lsbi/stats.py | 5 ++---
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index 9962eb2..eeb4de2 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -4,6 +4,7 @@
 
 import numpy as np
 from numpy.linalg import inv, solve
+from scipy.special import logsumexp
 
 from lsbi.stats import mixture_normal, multivariate_normal
 from lsbi.utils import alias, dediagonalise, logdet
@@ -358,7 +359,8 @@ def likelihood(self, θ):
         dist = super().likelihood(np.expand_dims(θ, -2))
         dist.__class__ = mixture_normal
         prior = self.prior()
-        dist.logw = prior.logpdf(θ, broadcast=True, joint=True) - prior.logpdf(θ)
+        dist.logw = prior.logpdf(θ, broadcast=True, joint=True)
+        dist.logw = dist.logw - logsumexp(dist.logw, axis=-1, keepdims=True)
         return dist
 
     def prior(self):
@@ -386,7 +388,8 @@ def posterior(self, D):
         dist = super().posterior(np.expand_dims(D, -2))
         dist.__class__ = mixture_normal
         evidence = self.evidence()
-        dist.logw = evidence.logpdf(D, broadcast=True, joint=True) - evidence.logpdf(D)
+        dist.logw = evidence.logpdf(D, broadcast=True, joint=True)
+        dist.logw = dist.logw - logsumexp(dist.logw, axis=-1, keepdims=True)
         return dist
 
     def evidence(self):
diff --git a/lsbi/stats.py b/lsbi/stats.py
index 7531b37..d775e21 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -496,9 +496,8 @@ def condition(self, indices, values):
         dist = super().condition(indices, np.expand_dims(values, -2))
         dist.__class__ = mixture_normal
         marg = self.marginalise(self._bar(indices))
-        dist.logw = marg.logpdf(values, broadcast=True, joint=True) - marg.logpdf(
-            values, broadcast=True
-        )
+        dist.logw = marg.logpdf(values, broadcast=True, joint=True)
+        dist.logw = dist.logw - logsumexp(dist.logw, axis=-1, keepdims=True)
         return dist
 
     def bijector(self, x, inverse=False):

From 19dd841633f33ad4174cd77850c52185dc715fb2 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Fri, 16 Feb 2024 15:06:43 +0000
Subject: [PATCH 080/117] Added tests to cover ppd and update

---
 lsbi/model.py       |  2 +-
 tests/test_model.py | 94 ++++++++++++++++++++++++++++++---------------
 2 files changed, 65 insertions(+), 31 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index eeb4de2..8a6125f 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -256,7 +256,7 @@ def update(self, D, inplace=False):
         if not inplace:
             return dist
 
-    def PPD(self, D0):
+    def ppd(self, D0):
         """P(D|D0) as a distribution object."""
         return self.update(D0).evidence()
 
diff --git a/tests/test_model.py b/tests/test_model.py
index 192d9e9..11c74c8 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -175,9 +175,9 @@ def test_likelihood(
             d,
         )
         theta = np.random.randn(*theta_shape, n)
-        dist = model.likelihood(theta)
-        assert dist.shape == np.broadcast_shapes(model.shape, theta_shape)
-        assert dist.dim == model.d
+        likelihood = model.likelihood(theta)
+        assert likelihood.shape == np.broadcast_shapes(model.shape, theta_shape)
+        assert likelihood.dim == model.d
 
     def test_prior(
         self,
@@ -206,9 +206,9 @@ def test_prior(
             n,
             d,
         )
-        dist = model.prior()
-        assert dist.shape == model.shape
-        assert dist.dim == model.n
+        prior = model.prior()
+        assert prior.shape == model.shape
+        assert prior.dim == model.n
 
     @pytest.mark.parametrize("D_shape", shapes)
     def test_posterior(
@@ -240,9 +240,26 @@ def test_posterior(
             d,
         )
         D = np.random.randn(*D_shape, d)
-        dist = model.posterior(D)
-        assert dist.shape == np.broadcast_shapes(model.shape, D_shape)
-        assert dist.dim == model.n
+        posterior = model.posterior(D)
+        assert posterior.shape == np.broadcast_shapes(model.shape, D_shape)
+        assert posterior.dim == model.n
+
+        ppd = model.ppd(D)
+        assert ppd.shape == np.broadcast_shapes(model.shape, D_shape)
+
+        update = model.update(D)
+        assert_allclose_broadcast(update.m, model.m)
+        assert_allclose_broadcast(update.M, model.M)
+        assert_allclose_broadcast(update.C, model.C)
+        assert_allclose_broadcast(update.mu, posterior.mean)
+        assert_allclose_broadcast(update.Sigma, posterior.cov)
+
+        model.update(D, inplace=True)
+        assert_allclose(model.m, update.m)
+        assert_allclose(model.M, update.M)
+        assert_allclose(model.C, update.C)
+        assert_allclose(model.mu, update.mu)
+        assert_allclose(model.Sigma, update.Sigma)
 
     def test_evidence(
         self,
@@ -271,9 +288,9 @@ def test_evidence(
             n,
             d,
         )
-        dist = model.evidence()
-        assert dist.shape == model.shape
-        assert dist.dim == model.d
+        evidence = model.evidence()
+        assert evidence.shape == model.shape
+        assert evidence.dim == model.d
 
     def test_joint(
         self,
@@ -302,9 +319,9 @@ def test_joint(
             n,
             d,
         )
-        dist = model.joint()
-        assert dist.shape == model.shape
-        assert dist.dim == model.n + model.d
+        joint = model.joint()
+        assert joint.shape == model.shape
+        assert joint.dim == model.n + model.d
 
     def test_marginal_conditional(
         self,
@@ -500,10 +517,10 @@ def test_likelihood(
             d,
         )
         theta = np.random.randn(*theta_shape[:-1], n)
-        dist = model.likelihood(theta)
+        likelihood = model.likelihood(theta)
         if model.shape != ():
-            assert dist.shape == np.broadcast_shapes(model.shape, theta_shape)
-        assert dist.dim == model.d
+            assert likelihood.shape == np.broadcast_shapes(model.shape, theta_shape)
+        assert likelihood.dim == model.d
 
     def test_prior(
         self,
@@ -534,9 +551,9 @@ def test_prior(
             n,
             d,
         )
-        dist = model.prior()
-        assert dist.shape == model.shape
-        assert dist.dim == model.n
+        prior = model.prior()
+        assert prior.shape == model.shape
+        assert prior.dim == model.n
 
     @pytest.mark.parametrize("D_shape", shapes)
     def test_posterior(
@@ -570,10 +587,27 @@ def test_posterior(
             d,
         )
         D = np.random.randn(*D_shape[:-1], d)
-        dist = model.posterior(D)
+        posterior = model.posterior(D)
         if model.shape != ():
-            assert dist.shape == np.broadcast_shapes(model.shape, D_shape)
-        assert dist.dim == model.n
+            assert posterior.shape == np.broadcast_shapes(model.shape, D_shape)
+        assert posterior.dim == model.n
+
+        ppd = model.ppd(D)
+        assert ppd.shape == np.broadcast_shapes(model.shape, D_shape)
+
+        update = model.update(D)
+        assert_allclose_broadcast(update.m, model.m)
+        assert_allclose_broadcast(update.M, model.M)
+        assert_allclose_broadcast(update.C, model.C)
+        assert_allclose_broadcast(update.mu, posterior.mean)
+        assert_allclose_broadcast(update.Sigma, posterior.cov)
+
+        model.update(D, inplace=True)
+        assert_allclose(model.m, update.m)
+        assert_allclose(model.M, update.M)
+        assert_allclose(model.C, update.C)
+        assert_allclose(model.mu, update.mu)
+        assert_allclose(model.Sigma, update.Sigma)
 
     def test_evidence(
         self,
@@ -604,9 +638,9 @@ def test_evidence(
             n,
             d,
         )
-        dist = model.evidence()
-        assert dist.shape == model.shape
-        assert dist.dim == model.d
+        evidence = model.evidence()
+        assert evidence.shape == model.shape
+        assert evidence.dim == model.d
 
     def test_joint(
         self,
@@ -637,9 +671,9 @@ def test_joint(
             n,
             d,
         )
-        dist = model.joint()
-        assert dist.shape == model.shape
-        assert dist.dim == model.n + model.d
+        joint = model.joint()
+        assert joint.shape == model.shape
+        assert joint.dim == model.n + model.d
 
     def test_marginal_conditional(
         self,

From 2aaeaf85482a15e004513d6844c24cd6b050a151 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Fri, 16 Feb 2024 15:15:38 +0000
Subject: [PATCH 081/117] Now passing on diagonal

---
 lsbi/model.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lsbi/model.py b/lsbi/model.py
index 8a6125f..012b553 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -253,6 +253,7 @@ def update(self, D, inplace=False):
         posterior = self.posterior(D)
         dist.μ = posterior.mean
         dist.Σ = posterior.cov
+        dist.diagonal_Σ = posterior.diagonal
         if not inplace:
             return dist
 
@@ -427,6 +428,7 @@ def update(self, D, inplace=False):
         posterior = self.posterior(D)
         dist.μ = posterior.mean
         dist.Σ = posterior.cov
+        dist.diagonal_Σ = posterior.diagonal
         dist.logw = posterior.logw
         if not inplace:
             return dist

From 6d38b67106a049dc7cd19493afb636bc501ecab6 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Fri, 16 Feb 2024 15:35:31 +0000
Subject: [PATCH 082/117] Now covering ppd

---
 tests/test_model.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/test_model.py b/tests/test_model.py
index 11c74c8..cef0fd6 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -572,6 +572,7 @@ def test_posterior(
         n,
         d,
     ):
+
         model = self.random(
             logw_shape,
             M_shape,
@@ -593,7 +594,8 @@ def test_posterior(
         assert posterior.dim == model.n
 
         ppd = model.ppd(D)
-        assert ppd.shape == np.broadcast_shapes(model.shape, D_shape)
+        if model.shape != ():
+            assert ppd.shape == np.broadcast_shapes(model.shape, D_shape)
 
         update = model.update(D)
         assert_allclose_broadcast(update.m, model.m)

From 9a26f3701bfdf616ca62222657c5162399123dd2 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sun, 18 Feb 2024 14:17:50 +0000
Subject: [PATCH 083/117] Added a KL divergence

---
 lsbi/model.py       | 29 +++++++++++++++++++++++++++-
 lsbi/stats.py       | 47 +++++++++++++++++++++++++++++++++++++++++++--
 lsbi/utils.py       |  7 +++++--
 tests/test_model.py | 12 +++++++++++-
 tests/test_stats.py | 37 ++++++++++++++++++++++++++++++++++-
 5 files changed, 125 insertions(+), 7 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index 012b553..be94640 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -6,7 +6,7 @@
 from numpy.linalg import inv, solve
 from scipy.special import logsumexp
 
-from lsbi.stats import mixture_normal, multivariate_normal
+from lsbi.stats import dkl, mixture_normal, multivariate_normal
 from lsbi.utils import alias, dediagonalise, logdet
 
 
@@ -261,6 +261,19 @@ def ppd(self, D0):
         """P(D|D0) as a distribution object."""
         return self.update(D0).evidence()
 
+    def dkl(self, D, n=0):
+        """KL divergence between the posterior and prior.
+
+        Parameters
+        ----------
+        D : array_like, shape (..., d)
+            Data to form the posterior
+        n : int, optional
+            Number of samples for a monte carlo estimate, defaults to 0
+        """
+
+        return dkl(self.posterior(D), self.prior(), n)
+
     @property
     def _M(self):
         return dediagonalise(self.M, self.diagonal_M, self.d, self.n)
@@ -433,6 +446,20 @@ def update(self, D, inplace=False):
         if not inplace:
             return dist
 
+    def dkl(self, D, n=0):
+        """KL divergence between the posterior and prior.
+
+        Parameters
+        ----------
+        D : array_like, shape (..., d)
+            Data to form the posterior
+        n : int, optional
+            Number of samples for a monte carlo estimate, defaults to 0
+        """
+        if n == 0:
+            raise ValueError("MixtureModel requires a monte carlo estimate. Use n>0.")
+        return super().dkl(D, n)
+
 
 class ReducedLinearModel(object):
     """A model with no data.
diff --git a/lsbi/stats.py b/lsbi/stats.py
index d775e21..fcaa7d4 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -122,7 +122,7 @@ def pdf(self, x, broadcast=False):
         """
         return np.exp(self.logpdf(x, broadcast=broadcast))
 
-    def rvs(self, size=()):
+    def rvs(self, size=(), broadcast=False):
         """Draw random samples from the distribution.
 
         Parameters
@@ -136,7 +136,10 @@ def rvs(self, size=()):
             Random samples from the distribution.
         """
         size = np.atleast_1d(size)
-        x = np.random.randn(*size, *self.shape, self.dim)
+        if broadcast:
+            x = np.random.randn(*size, self.dim)
+        else:
+            x = np.random.randn(*size, *self.shape, self.dim)
         if self.diagonal:
             return self.mean + np.sqrt(self.cov) * x
         else:
@@ -566,3 +569,43 @@ def __getitem__(self, arg):  # noqa: D105
         dist.__class__ = mixture_normal
         dist.logw = np.broadcast_to(self.logw, self.shape)[arg]
         return dist
+
+
+def dkl(p, q, n=0):
+    """Kullback-Leibler divergence between two distributions.
+
+    Parameters
+    ----------
+    p : lsbi.stats.multivariate_normal
+    q : lsbi.stats.multivariate_normal
+    n : int, optional, default=0
+        Number of samples to mcmc estimate the divergence.
+
+    Returns
+    -------
+    dkl : array_like
+        Kullback-Leibler divergence between p and q.
+    """
+    shape = np.broadcast_shapes(p.shape, q.shape)
+    if n:
+        x = p.rvs(size=(n, *shape), broadcast=True)
+        return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).mean(axis=0)
+    dkl = -p.dim * np.ones(shape)
+    dkl = dkl + logdet(q.cov * np.ones(q.dim), q.diagonal)
+    dkl = dkl - logdet(p.cov * np.ones(p.dim), p.diagonal)
+    pq = (p.mean - q.mean) * np.ones(p.dim)
+    if q.diagonal:
+        dkl = dkl + (pq**2 / q.cov).sum(axis=-1)
+        if p.diagonal:
+            dkl = dkl + (p.cov / q.cov * np.ones(q.dim)).sum(axis=-1)
+        else:
+            dkl = dkl + (np.diagonal(p.cov, 0, -2, -1) / q.cov).sum(axis=-1)
+    else:
+        invq = inv(q.cov)
+        dkl = dkl + np.einsum("...i,...ij,...j->...", pq, invq, pq)
+        if p.diagonal:
+            dkl = dkl + (p.cov * np.diagonal(invq, 0, -2, -1)).sum(axis=-1)
+        else:
+            dkl = dkl + np.einsum("...ij,...ji->...", invq, p.cov)
+
+    return dkl / 2
diff --git a/lsbi/utils.py b/lsbi/utils.py
index b9f0f25..e5a7a03 100644
--- a/lsbi/utils.py
+++ b/lsbi/utils.py
@@ -3,9 +3,12 @@
 import numpy as np
 
 
-def logdet(A):
+def logdet(A, diagonal=False):
     """log(abs(det(A)))."""
-    return np.linalg.slogdet(A)[1]
+    if diagonal:
+        return np.log(np.abs(A)).sum(axis=-1)
+    else:
+        return np.linalg.slogdet(A)[1]
 
 
 def quantise(f, x, tol=1e-8):
diff --git a/tests/test_model.py b/tests/test_model.py
index cef0fd6..16fc9f1 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -261,6 +261,10 @@ def test_posterior(
         assert_allclose(model.mu, update.mu)
         assert_allclose(model.Sigma, update.Sigma)
 
+        dkl = model.dkl(D)
+        assert dkl.shape == model.shape
+        assert (dkl >= 0).all()
+
     def test_evidence(
         self,
         M_shape,
@@ -572,7 +576,6 @@ def test_posterior(
         n,
         d,
     ):
-
         model = self.random(
             logw_shape,
             M_shape,
@@ -611,6 +614,13 @@ def test_posterior(
         assert_allclose(model.mu, update.mu)
         assert_allclose(model.Sigma, update.Sigma)
 
+        with pytest.raises(ValueError):
+            model.dkl(D)
+
+        dkl = model.dkl(D, 10)
+        assert dkl.shape == model.shape
+        assert (dkl >= 0).all()
+
     def test_evidence(
         self,
         logw_shape,
diff --git a/tests/test_stats.py b/tests/test_stats.py
index 0a99ba3..6f957a4 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -5,7 +5,7 @@
 from scipy.special import logsumexp
 from scipy.stats import multivariate_normal as scipy_multivariate_normal
 
-from lsbi.stats import mixture_normal, multivariate_normal
+from lsbi.stats import dkl, mixture_normal, multivariate_normal
 
 shapes = [(2, 3), (3,), ()]
 sizes = [(6, 5), (5,), ()]
@@ -547,3 +547,38 @@ def test_bijector(
         assert_allclose(
             np.broadcast_to(x, y.shape), dist.bijector(y, inverse=True), atol=1e-4
         )
+
+
+@pytest.mark.parametrize("dim_p, shape_p, mean_shape_p, cov_shape_p, diagonal_p", tests)
+@pytest.mark.parametrize("dim_q, shape_q, mean_shape_q, cov_shape_q, diagonal_q", tests)
+def test_dkl(
+    dim_p,
+    shape_p,
+    mean_shape_p,
+    cov_shape_p,
+    diagonal_p,
+    dim_q,
+    shape_q,
+    mean_shape_q,
+    cov_shape_q,
+    diagonal_q,
+):
+    p = TestMultivariateNormal().random(
+        dim, shape_p, mean_shape_p, cov_shape_p, diagonal_p
+    )
+    q = TestMultivariateNormal().random(
+        dim, shape_q, mean_shape_q, cov_shape_q, diagonal_q
+    )
+
+    dkl_pq = dkl(p, q)
+
+    assert_allclose(dkl(p, p), 0, atol=1e-10)
+    assert_allclose(dkl(q, q), 0, atol=1e-10)
+
+    assert (dkl_pq >= 0).all()
+    assert dkl_pq.shape == np.broadcast_shapes(p.shape, q.shape)
+
+    dkl_mc = dkl(p, q, 1000)
+    assert dkl_mc.shape == np.broadcast_shapes(p.shape, q.shape)
+
+    assert_allclose(dkl_pq, dkl_mc, atol=1)

From 91ff25f734522411400311ec8566704d02b61dbc Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sun, 18 Feb 2024 14:26:06 +0000
Subject: [PATCH 084/117] Implemented broadcasting for mixture models

---
 lsbi/stats.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/lsbi/stats.py b/lsbi/stats.py
index fcaa7d4..3a414e0 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -129,6 +129,8 @@ def rvs(self, size=(), broadcast=False):
         ----------
         size : int or tuple of ints, optional, default=()
             Number of samples to draw.
+        broadcast : bool, optional, default=False
+            If True, broadcast x across the distribution parameters.
 
         Returns
         -------
@@ -445,19 +447,22 @@ def pdf(self, x, broadcast=False, joint=False):
         """
         return np.exp(self.logpdf(x, broadcast=broadcast, joint=joint))
 
-    def rvs(self, size=()):
+    def rvs(self, size=(), broadcast=False):
         """Draw random samples from the distribution.
 
         Parameters
         ----------
         size : int or tuple of ints, optional, default=1
+            Number of samples to draw.
+        broadcast : bool, optional, default=False
+            If True, broadcast x across the distribution parameters.
 
         Returns
         -------
         rvs : array_like, shape `(*size, *shape[:-1], dim)`
         """
         if self.shape == ():
-            return super().rvs(size=size)
+            return super().rvs(size=size, broadcast=broadcast)
         size = np.atleast_1d(np.array(size, dtype=int))
         logw = np.broadcast_to(self.logw, self.shape).copy()
         logw = logw - logsumexp(logw, axis=-1, keepdims=True)
@@ -467,7 +472,11 @@ def rvs(self, size=()):
         i = np.argmax(np.array(u)[..., None] < cump, axis=-1)
         mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
         mean = np.take_along_axis(np.moveaxis(mean, -2, 0), i[..., None], axis=0)
-        x = np.random.randn(np.prod(size), *self.shape[:-1], self.dim)
+        if broadcast:
+            x = np.random.randn(np.prod(size), self.dim)
+            x = x.reshape(-1, *self.shape[:-1], self.dim)
+        else:
+            x = np.random.randn(np.prod(size), *self.shape[:-1], self.dim)
         if self.diagonal:
             L = np.sqrt(self.cov)
             L = np.broadcast_to(L, (*self.shape, self.dim))
@@ -478,7 +487,10 @@ def rvs(self, size=()):
             L = np.broadcast_to(L, (*self.shape, self.dim, self.dim))
             L = np.take_along_axis(np.moveaxis(L, -3, 0), i[..., None, None], axis=0)
             rvs = mean + np.einsum("...ij,...j->...i", L, x)
-        return rvs.reshape(*size, *self.shape[:-1], self.dim)
+        if broadcast:
+            return rvs.reshape(*size, self.dim)
+        else:
+            return rvs.reshape(*size, *self.shape[:-1], self.dim)
 
     def condition(self, indices, values):
         """Condition on indices with values.

From 082db82709ba2f51306533b5057b01ea4a4a170f Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sun, 18 Feb 2024 21:43:44 +0000
Subject: [PATCH 085/117] Fixed broadcasting tests

---
 lsbi/model.py       |  6 +++++-
 lsbi/stats.py       | 12 ++++++------
 tests/test_model.py |  3 +--
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index be94640..97c6d8c 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -458,7 +458,11 @@ def dkl(self, D, n=0):
         """
         if n == 0:
             raise ValueError("MixtureModel requires a monte carlo estimate. Use n>0.")
-        return super().dkl(D, n)
+
+        p = self.posterior(D)
+        q = self.prior()
+        x = p.rvs(size=(n, *self.shape[:-1]), broadcast=True)
+        return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).mean(axis=0)
 
 
 class ReducedLinearModel(object):
diff --git a/lsbi/stats.py b/lsbi/stats.py
index 3a414e0..bc931ea 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -463,20 +463,20 @@ def rvs(self, size=(), broadcast=False):
         """
         if self.shape == ():
             return super().rvs(size=size, broadcast=broadcast)
+        if broadcast:
+            shape = np.broadcast_shapes(size, self.shape[:-1])
+        else:
+            shape = (size, *self.shape[:-1])
         size = np.atleast_1d(np.array(size, dtype=int))
         logw = np.broadcast_to(self.logw, self.shape).copy()
         logw = logw - logsumexp(logw, axis=-1, keepdims=True)
         p = np.exp(logw)
         cump = np.cumsum(p, axis=-1)
-        u = np.random.rand(np.prod(size), *p.shape[:-1])
+        u = np.random.rand(np.prod(shape)).reshape(-1, *p.shape[:-1])
         i = np.argmax(np.array(u)[..., None] < cump, axis=-1)
         mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
         mean = np.take_along_axis(np.moveaxis(mean, -2, 0), i[..., None], axis=0)
-        if broadcast:
-            x = np.random.randn(np.prod(size), self.dim)
-            x = x.reshape(-1, *self.shape[:-1], self.dim)
-        else:
-            x = np.random.randn(np.prod(size), *self.shape[:-1], self.dim)
+        x = np.random.randn(np.prod(shape)).reshape(-1, *self.shape[:-1], self.dim)
         if self.diagonal:
             L = np.sqrt(self.cov)
             L = np.broadcast_to(L, (*self.shape, self.dim))
diff --git a/tests/test_model.py b/tests/test_model.py
index 16fc9f1..8d85a72 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -618,8 +618,7 @@ def test_posterior(
             model.dkl(D)
 
         dkl = model.dkl(D, 10)
-        assert dkl.shape == model.shape
-        assert (dkl >= 0).all()
+        assert dkl.shape == model.shape[:-1]
 
     def test_evidence(
         self,

From 4da731d97c5b427ca944ca6f82795e534506350a Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Mon, 19 Feb 2024 17:26:48 +0000
Subject: [PATCH 086/117] Actually fixed broadcasting now

---
 lsbi/stats.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/lsbi/stats.py b/lsbi/stats.py
index bc931ea..2d280b6 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -463,20 +463,22 @@ def rvs(self, size=(), broadcast=False):
         """
         if self.shape == ():
             return super().rvs(size=size, broadcast=broadcast)
-        if broadcast:
-            shape = np.broadcast_shapes(size, self.shape[:-1])
-        else:
-            shape = (size, *self.shape[:-1])
         size = np.atleast_1d(np.array(size, dtype=int))
         logw = np.broadcast_to(self.logw, self.shape).copy()
         logw = logw - logsumexp(logw, axis=-1, keepdims=True)
         p = np.exp(logw)
         cump = np.cumsum(p, axis=-1)
-        u = np.random.rand(np.prod(shape)).reshape(-1, *p.shape[:-1])
+        if broadcast:
+            u = np.random.rand(*size).reshape(-1, *p.shape[:-1])
+        else:
+            u = np.random.rand(np.prod(size), *p.shape[:-1])
         i = np.argmax(np.array(u)[..., None] < cump, axis=-1)
         mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
         mean = np.take_along_axis(np.moveaxis(mean, -2, 0), i[..., None], axis=0)
-        x = np.random.randn(np.prod(shape)).reshape(-1, *self.shape[:-1], self.dim)
+        if broadcast:
+            x = np.random.randn(*size, self.dim)
+        else:
+            x = np.random.randn(np.prod(size), *self.shape[:-1], self.dim)
         if self.diagonal:
             L = np.sqrt(self.cov)
             L = np.broadcast_to(L, (*self.shape, self.dim))

From b9362cba9d698d1158ffdf5fc06610d87a3aa09a Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Mon, 19 Feb 2024 18:57:32 +0000
Subject: [PATCH 087/117] Removed in the end unecessary broadcasting code

---
 lsbi/model.py |  7 +------
 lsbi/stats.py | 32 ++++++++------------------------
 2 files changed, 9 insertions(+), 30 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index 97c6d8c..f0f7b6f 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -271,7 +271,6 @@ def dkl(self, D, n=0):
         n : int, optional
             Number of samples for a monte carlo estimate, defaults to 0
         """
-
         return dkl(self.posterior(D), self.prior(), n)
 
     @property
@@ -458,11 +457,7 @@ def dkl(self, D, n=0):
         """
         if n == 0:
             raise ValueError("MixtureModel requires a monte carlo estimate. Use n>0.")
-
-        p = self.posterior(D)
-        q = self.prior()
-        x = p.rvs(size=(n, *self.shape[:-1]), broadcast=True)
-        return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).mean(axis=0)
+        return super().dkl(D, n)
 
 
 class ReducedLinearModel(object):
diff --git a/lsbi/stats.py b/lsbi/stats.py
index 2d280b6..b296b22 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -122,15 +122,13 @@ def pdf(self, x, broadcast=False):
         """
         return np.exp(self.logpdf(x, broadcast=broadcast))
 
-    def rvs(self, size=(), broadcast=False):
+    def rvs(self, size=()):
         """Draw random samples from the distribution.
 
         Parameters
         ----------
         size : int or tuple of ints, optional, default=()
             Number of samples to draw.
-        broadcast : bool, optional, default=False
-            If True, broadcast x across the distribution parameters.
 
         Returns
         -------
@@ -138,10 +136,7 @@ def rvs(self, size=(), broadcast=False):
             Random samples from the distribution.
         """
         size = np.atleast_1d(size)
-        if broadcast:
-            x = np.random.randn(*size, self.dim)
-        else:
-            x = np.random.randn(*size, *self.shape, self.dim)
+        x = np.random.randn(*size, *self.shape, self.dim)
         if self.diagonal:
             return self.mean + np.sqrt(self.cov) * x
         else:
@@ -447,38 +442,30 @@ def pdf(self, x, broadcast=False, joint=False):
         """
         return np.exp(self.logpdf(x, broadcast=broadcast, joint=joint))
 
-    def rvs(self, size=(), broadcast=False):
+    def rvs(self, size=()):
         """Draw random samples from the distribution.
 
         Parameters
         ----------
         size : int or tuple of ints, optional, default=1
             Number of samples to draw.
-        broadcast : bool, optional, default=False
-            If True, broadcast x across the distribution parameters.
 
         Returns
         -------
         rvs : array_like, shape `(*size, *shape[:-1], dim)`
         """
         if self.shape == ():
-            return super().rvs(size=size, broadcast=broadcast)
+            return super().rvs(size=size)
         size = np.atleast_1d(np.array(size, dtype=int))
         logw = np.broadcast_to(self.logw, self.shape).copy()
         logw = logw - logsumexp(logw, axis=-1, keepdims=True)
         p = np.exp(logw)
         cump = np.cumsum(p, axis=-1)
-        if broadcast:
-            u = np.random.rand(*size).reshape(-1, *p.shape[:-1])
-        else:
-            u = np.random.rand(np.prod(size), *p.shape[:-1])
+        u = np.random.rand(np.prod(size), *p.shape[:-1])
         i = np.argmax(np.array(u)[..., None] < cump, axis=-1)
         mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
         mean = np.take_along_axis(np.moveaxis(mean, -2, 0), i[..., None], axis=0)
-        if broadcast:
-            x = np.random.randn(*size, self.dim)
-        else:
-            x = np.random.randn(np.prod(size), *self.shape[:-1], self.dim)
+        x = np.random.randn(np.prod(size), *self.shape[:-1], self.dim)
         if self.diagonal:
             L = np.sqrt(self.cov)
             L = np.broadcast_to(L, (*self.shape, self.dim))
@@ -489,10 +476,7 @@ def rvs(self, size=(), broadcast=False):
             L = np.broadcast_to(L, (*self.shape, self.dim, self.dim))
             L = np.take_along_axis(np.moveaxis(L, -3, 0), i[..., None, None], axis=0)
             rvs = mean + np.einsum("...ij,...j->...i", L, x)
-        if broadcast:
-            return rvs.reshape(*size, self.dim)
-        else:
-            return rvs.reshape(*size, *self.shape[:-1], self.dim)
+        return rvs.reshape(*size, *self.shape[:-1], self.dim)
 
     def condition(self, indices, values):
         """Condition on indices with values.
@@ -602,7 +586,7 @@ def dkl(p, q, n=0):
     """
     shape = np.broadcast_shapes(p.shape, q.shape)
     if n:
-        x = p.rvs(size=(n, *shape), broadcast=True)
+        x = p.rvs(n)
         return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).mean(axis=0)
     dkl = -p.dim * np.ones(shape)
     dkl = dkl + logdet(q.cov * np.ones(q.dim), q.diagonal)

From 9e94da28f3f89a888ae5917502a14b7ac0ac8e9e Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Tue, 20 Feb 2024 06:28:23 +0000
Subject: [PATCH 088/117] Re-instated rvs with broadcast=True

---
 lsbi/model.py |  6 +++++-
 lsbi/stats.py | 32 ++++++++++++++++++++++++--------
 2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index f0f7b6f..b5fc90e 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -457,7 +457,11 @@ def dkl(self, D, n=0):
         """
         if n == 0:
             raise ValueError("MixtureModel requires a monte carlo estimate. Use n>0.")
-        return super().dkl(D, n)
+
+        p = self.posterior(D)
+        q = self.prior()
+        x = p.rvs(size=(n, *self.shape[:-1]), broadcast=True)
+        return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).mean(axis=0)
 
 
 class ReducedLinearModel(object):
diff --git a/lsbi/stats.py b/lsbi/stats.py
index b296b22..2d280b6 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -122,13 +122,15 @@ def pdf(self, x, broadcast=False):
         """
         return np.exp(self.logpdf(x, broadcast=broadcast))
 
-    def rvs(self, size=()):
+    def rvs(self, size=(), broadcast=False):
         """Draw random samples from the distribution.
 
         Parameters
         ----------
         size : int or tuple of ints, optional, default=()
             Number of samples to draw.
+        broadcast : bool, optional, default=False
+            If True, broadcast x across the distribution parameters.
 
         Returns
         -------
@@ -136,7 +138,10 @@ def rvs(self, size=()):
             Random samples from the distribution.
         """
         size = np.atleast_1d(size)
-        x = np.random.randn(*size, *self.shape, self.dim)
+        if broadcast:
+            x = np.random.randn(*size, self.dim)
+        else:
+            x = np.random.randn(*size, *self.shape, self.dim)
         if self.diagonal:
             return self.mean + np.sqrt(self.cov) * x
         else:
@@ -442,30 +447,38 @@ def pdf(self, x, broadcast=False, joint=False):
         """
         return np.exp(self.logpdf(x, broadcast=broadcast, joint=joint))
 
-    def rvs(self, size=()):
+    def rvs(self, size=(), broadcast=False):
         """Draw random samples from the distribution.
 
         Parameters
         ----------
         size : int or tuple of ints, optional, default=1
             Number of samples to draw.
+        broadcast : bool, optional, default=False
+            If True, broadcast x across the distribution parameters.
 
         Returns
         -------
         rvs : array_like, shape `(*size, *shape[:-1], dim)`
         """
         if self.shape == ():
-            return super().rvs(size=size)
+            return super().rvs(size=size, broadcast=broadcast)
         size = np.atleast_1d(np.array(size, dtype=int))
         logw = np.broadcast_to(self.logw, self.shape).copy()
         logw = logw - logsumexp(logw, axis=-1, keepdims=True)
         p = np.exp(logw)
         cump = np.cumsum(p, axis=-1)
-        u = np.random.rand(np.prod(size), *p.shape[:-1])
+        if broadcast:
+            u = np.random.rand(*size).reshape(-1, *p.shape[:-1])
+        else:
+            u = np.random.rand(np.prod(size), *p.shape[:-1])
         i = np.argmax(np.array(u)[..., None] < cump, axis=-1)
         mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
         mean = np.take_along_axis(np.moveaxis(mean, -2, 0), i[..., None], axis=0)
-        x = np.random.randn(np.prod(size), *self.shape[:-1], self.dim)
+        if broadcast:
+            x = np.random.randn(*size, self.dim)
+        else:
+            x = np.random.randn(np.prod(size), *self.shape[:-1], self.dim)
         if self.diagonal:
             L = np.sqrt(self.cov)
             L = np.broadcast_to(L, (*self.shape, self.dim))
@@ -476,7 +489,10 @@ def rvs(self, size=()):
             L = np.broadcast_to(L, (*self.shape, self.dim, self.dim))
             L = np.take_along_axis(np.moveaxis(L, -3, 0), i[..., None, None], axis=0)
             rvs = mean + np.einsum("...ij,...j->...i", L, x)
-        return rvs.reshape(*size, *self.shape[:-1], self.dim)
+        if broadcast:
+            return rvs.reshape(*size, self.dim)
+        else:
+            return rvs.reshape(*size, *self.shape[:-1], self.dim)
 
     def condition(self, indices, values):
         """Condition on indices with values.
@@ -586,7 +602,7 @@ def dkl(p, q, n=0):
     """
     shape = np.broadcast_shapes(p.shape, q.shape)
     if n:
-        x = p.rvs(n)
+        x = p.rvs(size=(n, *shape), broadcast=True)
         return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).mean(axis=0)
     dkl = -p.dim * np.ones(shape)
     dkl = dkl + logdet(q.cov * np.ones(q.dim), q.diagonal)

From 6382422cb7567fc02fef73132811164246867a91 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Thu, 22 Feb 2024 13:18:52 +0000
Subject: [PATCH 089/117] Removed unusual conversion

---
 lsbi/stats.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lsbi/stats.py b/lsbi/stats.py
index 2d280b6..562db6c 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -463,7 +463,7 @@ def rvs(self, size=(), broadcast=False):
         """
         if self.shape == ():
             return super().rvs(size=size, broadcast=broadcast)
-        size = np.atleast_1d(np.array(size, dtype=int))
+        size = np.atleast_1d(size)
         logw = np.broadcast_to(self.logw, self.shape).copy()
         logw = logw - logsumexp(logw, axis=-1, keepdims=True)
         p = np.exp(logw)
@@ -471,14 +471,14 @@ def rvs(self, size=(), broadcast=False):
         if broadcast:
             u = np.random.rand(*size).reshape(-1, *p.shape[:-1])
         else:
-            u = np.random.rand(np.prod(size), *p.shape[:-1])
+            u = np.random.rand(np.prod(size, dtype=int), *p.shape[:-1])
         i = np.argmax(np.array(u)[..., None] < cump, axis=-1)
         mean = np.broadcast_to(self.mean, (*self.shape, self.dim))
         mean = np.take_along_axis(np.moveaxis(mean, -2, 0), i[..., None], axis=0)
         if broadcast:
             x = np.random.randn(*size, self.dim)
         else:
-            x = np.random.randn(np.prod(size), *self.shape[:-1], self.dim)
+            x = np.random.randn(np.prod(size, dtype=int), *self.shape[:-1], self.dim)
         if self.diagonal:
             L = np.sqrt(self.cov)
             L = np.broadcast_to(L, (*self.shape, self.dim))

From 75827e82a79e8623f2e3a43ab9c3b45ee29cfb14 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Fri, 23 Feb 2024 15:47:34 +0000
Subject: [PATCH 090/117] Added plotting functions

---
 lsbi/plot.py       | 357 +++++++++++++++++++++++++++++++++++++++++++++
 tests/test_plot.py |  50 +++++++
 2 files changed, 407 insertions(+)
 create mode 100644 lsbi/plot.py
 create mode 100644 tests/test_plot.py

diff --git a/lsbi/plot.py b/lsbi/plot.py
new file mode 100644
index 0000000..729103b
--- /dev/null
+++ b/lsbi/plot.py
@@ -0,0 +1,357 @@
+"""anesthetic-style plotting functions for distributions."""
+import matplotlib.pyplot as plt
+import numpy as np
+from anesthetic import make_1d_axes, make_2d_axes
+from anesthetic.plot import basic_cmap, normalize_kwargs, quantile_plot_interval
+from anesthetic.plot import scatter_plot_2d as anesthetic_scatter_plot_2d
+from anesthetic.plot import set_colors
+from anesthetic.utils import (
+    iso_probability_contours_from_samples,
+    match_contour_to_contourf,
+)
+from matplotlib.colors import LinearSegmentedColormap
+
+
+def pdf_plot_1d(ax, dist, *args, **kwargs):
+    """Plot a 1D probability density estimate.
+
+    This is in the same style as anesthetic, but since we have analytic expressions for the marginal densities we can plot the pdf directly
+
+    This functions as a wrapper around :meth:`matplotlib.axes.Axes.plot`, with
+    a kernel density estimation computation provided by
+    :class:`scipy.stats.gaussian_kde` in-between. All remaining keyword
+    arguments are passed onwards.
+
+    Parameters
+    ----------
+    ax: :class:`matplotlib.axes.Axes`
+        Axis object to plot on.
+
+    dist: statistical distribution to plot
+        This should have a `logpdf` method and a `rvs` method, operating on
+        one-dimensional inputs
+
+    levels : list
+        Values at which to draw iso-probability lines.
+        Default: [0.95, 0.68]
+
+    facecolor : bool or string, default=False
+        If set to True then the 1d plot will be shaded with the value of the
+        ``color`` kwarg. Set to a string such as 'blue', 'k', 'r', 'C1' ect.
+        to define the color of the shading directly.
+
+    Returns
+    -------
+    lines : :class:`matplotlib.lines.Line2D`
+        A list of line objects representing the plotted data (same as
+        :meth:`matplotlib.axes.Axes.plot` command).
+    """
+    kwargs = normalize_kwargs(kwargs)
+    nplot = kwargs.get("nplot_1d", 10000)
+
+    levels = kwargs.pop("levels", [0.95, 0.68])
+    density = kwargs.pop("density", False)
+
+    cmap = kwargs.pop("cmap", None)
+    color = kwargs.pop(
+        "color",
+        (ax._get_lines.get_next_color() if cmap is None else plt.get_cmap(cmap)(0.68)),
+    )
+    facecolor = kwargs.pop("facecolor", False)
+    if "edgecolor" in kwargs:
+        edgecolor = kwargs.pop("edgecolor")
+        if edgecolor:
+            color = edgecolor
+    else:
+        edgecolor = color
+
+    x = dist.rvs(nplot)
+    logpdf = dist.logpdf(x)
+    logpdfmin = np.sort(logpdf)[::-1][int(0.997 * nplot)]
+    x = np.squeeze(x)
+    i = np.argsort(x)
+    x = x[i]
+    logpdf = logpdf[i]
+    logpdf[logpdf < logpdfmin] = np.nan
+    if not density:
+        logpdf -= np.nanmax(logpdf)
+    pdf = np.exp(logpdf)
+    ans = ax.plot(x, pdf, color=color, *args, **kwargs)
+
+    if facecolor and facecolor not in [None, "None", "none"]:
+        if facecolor is True:
+            facecolor = color
+
+        c = iso_probability_contours_from_samples(pp, contours=levels)
+        cmap = basic_cmap(facecolor)
+        fill = []
+        for j in range(len(c) - 1):
+            fill.append(
+                ax.fill_between(
+                    x, pp, where=pp >= c[j], color=cmap(c[j]), edgecolor=edgecolor
+                )
+            )
+
+        ans = ans, fill
+
+    if density:
+        ax.set_ylim(bottom=0)
+    else:
+        ax.set_ylim(0, 1.1)
+
+    return ans
+
+
+def pdf_plot_2d(ax, dist, *args, **kwargs):
+    """Plot a 2d marginalised distribution as contours.
+
+    This is in the same style as anesthetic, but since we have analytic expressions for the marginal densities we can plot the pdf directly
+
+    This functions as a wrapper around :meth:`matplotlib.axes.Axes.contour`
+    and :meth:`matplotlib.axes.Axes.contourf` with a kernel density
+    estimation (KDE) computation provided by :class:`scipy.stats.gaussian_kde`
+    in-between. All remaining keyword arguments are passed onwards to both
+    functions.
+
+    Parameters
+    ----------
+    ax : :class:`matplotlib.axes.Axes`
+        Axis object to plot on.
+
+    dist: statistical distribution to plot
+        This should have a `logpdf` method and a `rvs` method, operating on
+        two-dimensional inputs
+
+    levels : list, optional
+        Amount of mass within each iso-probability contour.
+        Has to be ordered from outermost to innermost contour.
+        Default: [0.95, 0.68]
+
+    nplot_2d : int, default=1000
+        Number of plotting points to use.
+
+    Returns
+    -------
+    c : :class:`matplotlib.contour.QuadContourSet`
+        A set of contourlines or filled regions.
+
+    """
+    kwargs = normalize_kwargs(
+        kwargs,
+        dict(
+            linewidths=["linewidth", "lw"],
+            linestyles=["linestyle", "ls"],
+            color=["c"],
+            facecolor=["fc"],
+            edgecolor=["ec"],
+        ),
+    )
+
+    nplot = kwargs.pop("nplot_2d", 10000)
+    label = kwargs.pop("label", None)
+    zorder = kwargs.pop("zorder", 1)
+    levels = kwargs.pop("levels", [0.95, 0.68])
+
+    color = kwargs.pop("color", ax._get_lines.get_next_color())
+    facecolor = kwargs.pop("facecolor", True)
+    edgecolor = kwargs.pop("edgecolor", None)
+    cmap = kwargs.pop("cmap", None)
+    facecolor, edgecolor, cmap = set_colors(
+        c=color, fc=facecolor, ec=edgecolor, cmap=cmap
+    )
+
+    x = dist.rvs(nplot)
+    P = dist.pdf(x)
+    levels = iso_probability_contours_from_samples(P, contours=levels)
+    y = np.atleast_1d(x[..., 1])
+    x = np.atleast_1d(x[..., 0])
+
+    if facecolor not in [None, "None", "none"]:
+        linewidths = kwargs.pop("linewidths", 0.5)
+        contf = ax.tricontourf(
+            x,
+            y,
+            P,
+            levels=levels,
+            cmap=cmap,
+            zorder=zorder,
+            vmin=0,
+            vmax=P.max(),
+            *args,
+            **kwargs,
+        )
+        contf.set_cmap(cmap)
+        ax.add_patch(
+            plt.Rectangle(
+                (0, 0), 0, 0, lw=2, label=label, fc=cmap(0.999), ec=cmap(0.32)
+            )
+        )
+        cmap = None
+    else:
+        linewidths = kwargs.pop("linewidths", plt.rcParams.get("lines.linewidth"))
+        contf = None
+        fc = "None" if cmap is None else cmap(0.999)
+        ec = edgecolor if cmap is None else cmap(0.32)
+        ax.add_patch(plt.Rectangle((0, 0), 0, 0, lw=2, label=label, fc=fc, ec=ec))
+
+    vmin, vmax = match_contour_to_contourf(levels, vmin=0, vmax=P.max())
+    cont = ax.tricontour(
+        x,
+        y,
+        P,
+        levels=levels,
+        zorder=zorder,
+        vmin=vmin,
+        vmax=vmax,
+        linewidths=linewidths,
+        colors=edgecolor,
+        cmap=cmap,
+        *args,
+        **kwargs,
+    )
+
+    return contf, cont
+
+
+def scatter_plot_2d(ax, dist, *args, **kwargs):
+    """Plot samples from a 2d marginalised distribution.
+
+    This functions as a wrapper around :meth:`matplotlib.axes.Axes.plot`,
+    enforcing any prior bounds. All remaining keyword arguments are passed
+    onwards.
+
+    Parameters
+    ----------
+    ax : :class:`matplotlib.axes.Axes`
+        axis object to plot on
+
+    dist: statistical distribution to plot
+        This should have a `logpdf` method and a `rvs` method, operating on
+        two-dimensional inputs
+
+    Returns
+    -------
+    lines : :class:`matplotlib.lines.Line2D`
+        A list of line objects representing the plotted data (same as
+        :meth:`matplotlib.axes.Axes.plot` command).
+    """
+    kwargs = normalize_kwargs(
+        kwargs,
+        alias_mapping=dict(
+            lw=["linewidth", "linewidths"],
+            ls=["linestyle", "linestyles"],
+            color=["c"],
+            mfc=["fc", "facecolor"],
+            mec=["ec", "edgecolor"],
+            cmap=["colormap"],
+        ),
+        drop=["ls", "lw"],
+    )
+    kwargs = cbook.normalize_kwargs(kwargs, mlines.Line2D)
+
+    markersize = kwargs.pop("markersize", 1)
+    cmap = kwargs.pop("cmap", None)
+    color = kwargs.pop(
+        "color", (ax._get_lines.get_next_color() if cmap is None else cmap(0.68))
+    )
+
+    kwargs.pop("q", None)
+
+    N = 1000
+    x = dist.rvs(N)
+    return anesthetic_scatter_plot_2d(x[:, 0], x[:, 1], *args, **kwargs)
+
+
+def plot_1d(dist, axes=None, *args, **kwargs):
+    """Create an array of 1D plots.
+
+    Parameters
+    ----------
+    dist: statistical distribution to plot
+        This should have a `logpdf` method and a `rvs` method, operating on
+        one-dimensional inputs
+
+    axes : plotting axes, optional
+        Can be:
+
+        * list(str) or str
+        * :class:`pandas.Series` of :class:`matplotlib.axes.Axes`
+
+        If a :class:`pandas.Series` is provided as an existing set of axes,
+        then this is used for creating the plot. Otherwise, a new set of
+        axes are created using the list or lists of strings.
+
+        If not provided, then all parameters are plotted. This is intended
+        for plotting a sliced array (e.g. `samples[['x0','x1]].plot_1d()`.
+
+    Returns
+    -------
+    axes : :class:`pandas.Series` of :class:`matplotlib.axes.Axes`
+        Pandas array of axes objects
+
+    """
+    params = kwargs.pop("params", list(range(dist.dim)))
+    if axes is None:
+        fig, axes = make_1d_axes(params)
+    for i, ax in enumerate(axes):
+        d = dist.marginalise(list(set(params) - {i}))
+        pdf_plot_1d(ax, d, *args, **kwargs)
+    return axes
+
+
+def plot_2d(dist, axes=None, *args, **kwargs):
+    """Create an array of 2D plots.
+
+    To avoid interfering with y-axis sharing, one-dimensional plots are
+    created on a separate axis, which is monkey-patched onto the argument
+    ax as the attribute ax.twin.
+
+    Parameters
+    ----------
+    dist : statistical distribution to plot
+        This should have a `logpdf` method and a `rvs` method, operating on
+        two-dimensional inputs
+
+    axes : plotting axes, optional
+        Can be:
+            - list(str) if the x and y axes are the same
+            - [list(str),list(str)] if the x and y axes are different
+            - :class:`pandas.DataFrame` of :class:`matplotlib.axes.Axes`
+
+        If a :class:`pandas.DataFrame` is provided as an existing set of
+        axes, then this is used for creating the plot. Otherwise, a new set
+        of axes are created using the list or lists of strings.
+
+        If not provided, then all parameters are plotted. This is intended
+        for plotting a sliced array (e.g. `samples[['x0','x1]].plot_2d()`.
+        It is not advisible to plot an entire frame, as it is
+        computationally expensive, and liable to run into linear algebra
+        errors for degenerate derived parameters.
+
+    diagonal_kwargs, lower_kwargs, upper_kwargs : dict, optional
+        kwargs for the diagonal (1D)/lower or upper (2D) plots. This is
+        useful when there is a conflict of kwargs for different kinds of
+        plots.  Note that any kwargs directly passed to plot_2d will
+        overwrite any kwarg with the same key passed to <sub>_kwargs.
+        Default: {}
+
+    Returns
+    -------
+    axes : :class:`pandas.DataFrame` of :class:`matplotlib.axes.Axes`
+        Pandas array of axes objects
+
+    """
+    params = kwargs.pop("params", list(range(dist.dim)))
+    if axes is None:
+        fig, axes = make_2d_axes(params)
+    rvs = dist.rvs(1000).reshape(1000, -1, dist.dim)
+    for i, x in enumerate(axes.columns):
+        for j, y in enumerate(axes.index):
+            marg = dist.marginalise(list(set(params) - {i, j}))
+            if i == j:
+                pdf_plot_1d(axes.loc[x, x].twin, marg, *args, **kwargs)
+            elif i < j:
+                pdf_plot_2d(axes.loc[y, x], marg, *args, **kwargs)
+            else:
+                scatter_plot_2d(axes.loc[y, x], marg, *args, **kwargs)
+    return axes
diff --git a/tests/test_plot.py b/tests/test_plot.py
new file mode 100644
index 0000000..6d6e70a
--- /dev/null
+++ b/tests/test_plot.py
@@ -0,0 +1,50 @@
+import matplotlib.pyplot as plt
+import pytest
+import scipy.stats
+
+import lsbi.stats
+from lsbi.plot import pdf_plot_1d, pdf_plot_2d
+
+
+@pytest.fixture(autouse=True)
+def close_figures_on_teardown():
+    yield
+    plt.close("all")
+
+
+dists = [lsbi.stats.multivariate_normal(), scipy.stats.multivariate_normal()]
+
+
+@pytest.mark.parametrize("dist", dists)
+def test_pdf_plot_1d(dist):
+    fig, ax = plt.subplots()
+    pdf_plot_1d(ax, dist)
+    pdf_plot_1d(ax, dist, edgecolor="k")
+    pdf_plot_1d(ax, dist, facecolor=True)
+    pdf_plot_1d(ax, dist, density=True)
+
+
+@pytest.mark.parametrize("dist", dists)
+def test_pdf_plot_2d(dist):
+    dist = scipy.stats.multivariate_normal([0.1, 0.2])
+    fig, ax = plt.subplots()
+    pdf_plot_2d(ax, dist)
+    pdf_plot_2d(ax, dist, facecolor=None, ec="k")
+
+
+@pytest.mark.parametrize("dist", dists)
+def test_scatter_plot_2d(dist):
+    fig, ax = plt.subplots()
+    scatter_plot_2d(ax, dist)
+
+
+@pytest.mark.parametrize("dist", dists)
+def test_plot_1d(dist):
+    fig, ax = plt.subplots()
+    plot_1d(dist, ax)
+
+
+@pytest.mark.parametrize("dist", dists)
+def test_plot_2d(dist):
+    fig, ax = plt.subplots()
+    plot_2d(dist, ax)

From dc4922a5e81579beb2eec59a927cf21c90bf2e84 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Fri, 23 Feb 2024 17:12:10 +0000
Subject: [PATCH 091/117] Added a plot member function to stats

---
 lsbi/stats.py | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/lsbi/stats.py b/lsbi/stats.py
index 4c75690..26189be 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -7,6 +7,7 @@
 from numpy.linalg import cholesky, inv
 from scipy.special import erf, logsumexp
 
+import lsbi.plot
 from lsbi.utils import bisect, logdet
 
 
@@ -337,6 +338,28 @@ def __getitem__(self, arg):
         dist._dim = dist.mean.shape[-1]
         return dist
 
+    def plot_1d(self, axes=None, *args, **kwargs):  # noqa: D102
+        if self.shape:
+            return [
+                self.plot_1d(self[i], axes=axes, *args, **kwargs)
+                for i in range(self.shape[0])
+            ]
+        else:
+            return lsbi.stats.plot_2d(self, axes=axes, *args, **kwargs)
+
+    def plot_2d(self, axes=None, *args, **kwargs):  # noqa:D102
+        if self.shape:
+            return [
+                self.plot_2d(self[i], axes=axes, *args, **kwargs)
+                for i in range(self.shape[0])
+            ]
+        else:
+            return lsbi.stats.plot_2d(self, axes=axes, *args, **kwargs)
+
+
+multivariate_normal.plot_1d.__doc__ = lsbi.plot.plot_1d.__doc__
+multivariate_normal.plot_2d.__doc__ = lsbi.plot.plot_2d.__doc__
+
 
 class mixture_normal(multivariate_normal):
     """Mixture of multivariate normal distributions.
@@ -564,3 +587,25 @@ def __getitem__(self, arg):  # noqa: D105
         dist.__class__ = mixture_normal
         dist.logA = np.broadcast_to(self.logA, self.shape)[arg]
         return dist
+
+    def plot_1d(self, axes=None, *args, **kwargs):  # noqa: D102
+        if self.shape[:-1]:
+            return [
+                self.plot_1d(self[i], axes=axes, *args, **kwargs)
+                for i in range(self.shape[0])
+            ]
+        else:
+            return lsbi.stats.plot_2d(self, axes=axes, *args, **kwargs)
+
+    def plot_2d(self, axes=None, *args, **kwargs):  # noqa:D102
+        if self.shape[:-1]:
+            return [
+                self.plot_2d(self[i], axes=axes, *args, **kwargs)
+                for i in range(self.shape[0])
+            ]
+        else:
+            return lsbi.stats.plot_2d(self, axes=axes, *args, **kwargs)
+
+
+mixture_normal.plot_1d.__doc__ = lsbi.plot.plot_1d.__doc__
+mixture_normal.plot_2d.__doc__ = lsbi.plot.plot_2d.__doc__

From 7404e405966e7dc53bcdd7890903c934821139cb Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Fri, 23 Feb 2024 20:23:49 +0000
Subject: [PATCH 092/117] Updated plot and stats to be recursive

---
 lsbi/plot.py  | 62 ++++++++++++++++++---------------------------------
 lsbi/stats.py | 36 +++++++++++++-----------------
 2 files changed, 38 insertions(+), 60 deletions(-)

diff --git a/lsbi/plot.py b/lsbi/plot.py
index 729103b..181aca9 100644
--- a/lsbi/plot.py
+++ b/lsbi/plot.py
@@ -1,4 +1,6 @@
 """anesthetic-style plotting functions for distributions."""
+import matplotlib.cbook as cbook
+import matplotlib.lines as mlines
 import matplotlib.pyplot as plt
 import numpy as np
 from anesthetic import make_1d_axes, make_2d_axes
@@ -12,7 +14,7 @@
 from matplotlib.colors import LinearSegmentedColormap
 
 
-def pdf_plot_1d(ax, dist, *args, **kwargs):
+def pdf_plot_1d(ax, dist, index=0, *args, **kwargs):
     """Plot a 1D probability density estimate.
 
     This is in the same style as anesthetic, but since we have analytic expressions for the marginal densities we can plot the pdf directly
@@ -68,7 +70,7 @@ def pdf_plot_1d(ax, dist, *args, **kwargs):
     x = dist.rvs(nplot)
     logpdf = dist.logpdf(x)
     logpdfmin = np.sort(logpdf)[::-1][int(0.997 * nplot)]
-    x = np.squeeze(x)
+    x = np.atleast_2d(x)[..., index]
     i = np.argsort(x)
     x = x[i]
     logpdf = logpdf[i]
@@ -102,7 +104,7 @@ def pdf_plot_1d(ax, dist, *args, **kwargs):
     return ans
 
 
-def pdf_plot_2d(ax, dist, *args, **kwargs):
+def pdf_plot_2d(ax, dist, index=[0, 1], *args, **kwargs):
     """Plot a 2d marginalised distribution as contours.
 
     This is in the same style as anesthetic, but since we have analytic expressions for the marginal densities we can plot the pdf directly
@@ -163,8 +165,8 @@ def pdf_plot_2d(ax, dist, *args, **kwargs):
     x = dist.rvs(nplot)
     P = dist.pdf(x)
     levels = iso_probability_contours_from_samples(P, contours=levels)
-    y = np.atleast_1d(x[..., 1])
-    x = np.atleast_1d(x[..., 0])
+    y = np.atleast_1d(x[..., index[1]])
+    x = np.atleast_1d(x[..., index[0]])
 
     if facecolor not in [None, "None", "none"]:
         linewidths = kwargs.pop("linewidths", 0.5)
@@ -213,7 +215,7 @@ def pdf_plot_2d(ax, dist, *args, **kwargs):
     return contf, cont
 
 
-def scatter_plot_2d(ax, dist, *args, **kwargs):
+def scatter_plot_2d(ax, dist, index=[0, 1], *args, **kwargs):
     """Plot samples from a 2d marginalised distribution.
 
     This functions as a wrapper around :meth:`matplotlib.axes.Axes.plot`,
@@ -235,31 +237,11 @@ def scatter_plot_2d(ax, dist, *args, **kwargs):
         A list of line objects representing the plotted data (same as
         :meth:`matplotlib.axes.Axes.plot` command).
     """
-    kwargs = normalize_kwargs(
-        kwargs,
-        alias_mapping=dict(
-            lw=["linewidth", "linewidths"],
-            ls=["linestyle", "linestyles"],
-            color=["c"],
-            mfc=["fc", "facecolor"],
-            mec=["ec", "edgecolor"],
-            cmap=["colormap"],
-        ),
-        drop=["ls", "lw"],
-    )
-    kwargs = cbook.normalize_kwargs(kwargs, mlines.Line2D)
-
-    markersize = kwargs.pop("markersize", 1)
-    cmap = kwargs.pop("cmap", None)
-    color = kwargs.pop(
-        "color", (ax._get_lines.get_next_color() if cmap is None else cmap(0.68))
-    )
-
-    kwargs.pop("q", None)
-
-    N = 1000
-    x = dist.rvs(N)
-    return anesthetic_scatter_plot_2d(x[:, 0], x[:, 1], *args, **kwargs)
+    nplot = kwargs.pop("nplot_2d", 1000)
+    x = dist.rvs(nplot)
+    y = x[:, index[1]]
+    x = x[:, index[0]]
+    return anesthetic_scatter_plot_2d(ax, x, y, *args, **kwargs)
 
 
 def plot_1d(dist, axes=None, *args, **kwargs):
@@ -345,13 +327,13 @@ def plot_2d(dist, axes=None, *args, **kwargs):
     if axes is None:
         fig, axes = make_2d_axes(params)
     rvs = dist.rvs(1000).reshape(1000, -1, dist.dim)
-    for i, x in enumerate(axes.columns):
-        for j, y in enumerate(axes.index):
-            marg = dist.marginalise(list(set(params) - {i, j}))
-            if i == j:
-                pdf_plot_1d(axes.loc[x, x].twin, marg, *args, **kwargs)
-            elif i < j:
-                pdf_plot_2d(axes.loc[y, x], marg, *args, **kwargs)
-            else:
-                scatter_plot_2d(axes.loc[y, x], marg, *args, **kwargs)
+    for y, row in axes.iterrows():
+        for x, ax in row.items():
+            marg = dist.marginalise(list(set(params) - {x, y}))
+            if ax.position == "diagonal":
+                pdf_plot_1d(ax.twin, marg, *args, **kwargs)
+            elif ax.position == "lower":
+                pdf_plot_2d(ax, marg, *args, **kwargs)
+            elif ax.position == "upper":
+                scatter_plot_2d(ax, marg, index=[1, 0], *args, **kwargs)
     return axes
diff --git a/lsbi/stats.py b/lsbi/stats.py
index 3be9066..a3fccda 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -345,21 +345,19 @@ def __getitem__(self, arg):
 
     def plot_1d(self, axes=None, *args, **kwargs):  # noqa: D102
         if self.shape:
-            return [
-                self.plot_1d(self[i], axes=axes, *args, **kwargs)
-                for i in range(self.shape[0])
-            ]
+            for i in range(self.shape[0]):
+                axes = self[i].plot_1d(axes, *args, **kwargs)
         else:
-            return lsbi.stats.plot_2d(self, axes=axes, *args, **kwargs)
+            return lsbi.plot.plot_1d(self, axes, *args, **kwargs)
+        return axes
 
     def plot_2d(self, axes=None, *args, **kwargs):  # noqa:D102
         if self.shape:
-            return [
-                self.plot_2d(self[i], axes=axes, *args, **kwargs)
-                for i in range(self.shape[0])
-            ]
+            for i in range(self.shape[0]):
+                axes = self[i].plot_2d(axes, *args, **kwargs)
         else:
-            return lsbi.stats.plot_2d(self, axes=axes, *args, **kwargs)
+            return lsbi.plot.plot_1d(self, axes, *args, **kwargs)
+        return axes
 
 
 multivariate_normal.plot_1d.__doc__ = lsbi.plot.plot_1d.__doc__
@@ -609,21 +607,19 @@ def __getitem__(self, arg):  # noqa: D105
 
     def plot_1d(self, axes=None, *args, **kwargs):  # noqa: D102
         if self.shape[:-1]:
-            return [
-                self.plot_1d(self[i], axes=axes, *args, **kwargs)
-                for i in range(self.shape[0])
-            ]
+            for i in range(self.shape[0]):
+                axes = self[i].plot_1d(axes=axes, *args, **kwargs)
         else:
-            return lsbi.stats.plot_2d(self, axes=axes, *args, **kwargs)
+            return lsbi.plot.plot_1d(self, axes=axes, *args, **kwargs)
+        return axes
 
     def plot_2d(self, axes=None, *args, **kwargs):  # noqa:D102
         if self.shape[:-1]:
-            return [
-                self.plot_2d(self[i], axes=axes, *args, **kwargs)
-                for i in range(self.shape[0])
-            ]
+            for i in range(self.shape[0]):
+                axes = self[i].plot_2d(axes=axes, *args, **kwargs)
         else:
-            return lsbi.stats.plot_2d(self, axes=axes, *args, **kwargs)
+            return lsbi.plot.plot_2d(self, axes=axes, *args, **kwargs)
+        return axes
 
 
 mixture_normal.plot_1d.__doc__ = lsbi.plot.plot_1d.__doc__

From 28f87e5d06a76430b9a6035473e47557c741cd73 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 28 Feb 2024 19:19:22 +0000
Subject: [PATCH 093/117] 1d->2d error

---
 lsbi/stats.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lsbi/stats.py b/lsbi/stats.py
index a3fccda..b5410f8 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -356,7 +356,7 @@ def plot_2d(self, axes=None, *args, **kwargs):  # noqa:D102
             for i in range(self.shape[0]):
                 axes = self[i].plot_2d(axes, *args, **kwargs)
         else:
-            return lsbi.plot.plot_1d(self, axes, *args, **kwargs)
+            return lsbi.plot.plot_2d(self, axes, *args, **kwargs)
         return axes
 
 

From d2f73a1c2698547ebb512f0f2635f8217ccf7cd4 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Thu, 29 Feb 2024 12:07:02 +0000
Subject: [PATCH 094/117] updated black adjustment

---
 lsbi/plot.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lsbi/plot.py b/lsbi/plot.py
index 181aca9..aa8fce4 100644
--- a/lsbi/plot.py
+++ b/lsbi/plot.py
@@ -1,4 +1,5 @@
 """anesthetic-style plotting functions for distributions."""
+
 import matplotlib.cbook as cbook
 import matplotlib.lines as mlines
 import matplotlib.pyplot as plt

From c5d41adcc130d131f5375cfa0d9f7382b1e5c970 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Thu, 29 Feb 2024 13:32:22 +0000
Subject: [PATCH 095/117] Updated sphinx documentation

---
 docs/source/lsbi.rst | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/docs/source/lsbi.rst b/docs/source/lsbi.rst
index 287a8e5..1bf9598 100644
--- a/docs/source/lsbi.rst
+++ b/docs/source/lsbi.rst
@@ -24,6 +24,17 @@ lsbi.network module
    :undoc-members:
 
 
+lsbi.plot module
+----------------
+
+.. automodule:: lsbi.plot
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
+
+
 lsbi.stats module
 -----------------
 

From e1add3fbce57902cb170d2919753f89bdb7bdab2 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Fri, 1 Mar 2024 11:08:23 +0000
Subject: [PATCH 096/117] Added anesthetic to plot optional dependency

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index f998f19..f958baf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,7 +16,6 @@ requires-python = ">=3.8"
 dependencies = [
     'numpy',
     'scipy',
-    'matplotlib',
     'torch',
 ]
 classifiers = [
@@ -43,6 +42,7 @@ classifiers = [
 [project.optional-dependencies]
 docs = ["sphinx", "sphinx_rtd_theme", "numpydoc"]
 test = ["pytest", "pytest-cov", "flake8", "pydocstyle", "packaging", "pre-commit"]
+plot = ["anesthetic"]
 
 [tool.setuptools.dynamic]
 version = {attr = "lsbi._version.__version__"}

From 5efce71e6441a76d4a99376a4c013c2516d872c0 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Fri, 1 Mar 2024 11:09:34 +0000
Subject: [PATCH 097/117] Added anesthetic as a dependency

---
 pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index f958baf..3af70eb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,7 +16,9 @@ requires-python = ">=3.8"
 dependencies = [
     'numpy',
     'scipy',
+    'matplotlib',
     'torch',
+    'anesthetic',
 ]
 classifiers = [
     "Programming Language :: Python :: 3",
@@ -42,7 +44,6 @@ classifiers = [
 [project.optional-dependencies]
 docs = ["sphinx", "sphinx_rtd_theme", "numpydoc"]
 test = ["pytest", "pytest-cov", "flake8", "pydocstyle", "packaging", "pre-commit"]
-plot = ["anesthetic"]
 
 [tool.setuptools.dynamic]
 version = {attr = "lsbi._version.__version__"}

From 8cc25cca1ab29ea5df13598e9237e34af71f174e Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Fri, 1 Mar 2024 23:12:41 +0000
Subject: [PATCH 098/117] tests now available

---
 lsbi/plot.py       | 44 +++++++++++++++++++++++++-------------------
 lsbi/stats.py      | 22 ++++++++++++++++------
 tests/test_plot.py | 27 ++++++++++++++++++++++-----
 3 files changed, 63 insertions(+), 30 deletions(-)

diff --git a/lsbi/plot.py b/lsbi/plot.py
index aa8fce4..c72f06e 100644
--- a/lsbi/plot.py
+++ b/lsbi/plot.py
@@ -5,7 +5,13 @@
 import matplotlib.pyplot as plt
 import numpy as np
 from anesthetic import make_1d_axes, make_2d_axes
-from anesthetic.plot import basic_cmap, normalize_kwargs, quantile_plot_interval
+from anesthetic.plot import (
+    AxesDataFrame,
+    AxesSeries,
+    basic_cmap,
+    normalize_kwargs,
+    quantile_plot_interval,
+)
 from anesthetic.plot import scatter_plot_2d as anesthetic_scatter_plot_2d
 from anesthetic.plot import set_colors
 from anesthetic.utils import (
@@ -15,7 +21,7 @@
 from matplotlib.colors import LinearSegmentedColormap
 
 
-def pdf_plot_1d(ax, dist, index=0, *args, **kwargs):
+def pdf_plot_1d(ax, dist, *args, **kwargs):
     """Plot a 1D probability density estimate.
 
     This is in the same style as anesthetic, but since we have analytic expressions for the marginal densities we can plot the pdf directly
@@ -71,7 +77,7 @@ def pdf_plot_1d(ax, dist, index=0, *args, **kwargs):
     x = dist.rvs(nplot)
     logpdf = dist.logpdf(x)
     logpdfmin = np.sort(logpdf)[::-1][int(0.997 * nplot)]
-    x = np.atleast_2d(x)[..., index]
+    x = np.atleast_2d(x)[..., 0]
     i = np.argsort(x)
     x = x[i]
     logpdf = logpdf[i]
@@ -105,7 +111,7 @@ def pdf_plot_1d(ax, dist, index=0, *args, **kwargs):
     return ans
 
 
-def pdf_plot_2d(ax, dist, index=[0, 1], *args, **kwargs):
+def pdf_plot_2d(ax, dist, *args, **kwargs):
     """Plot a 2d marginalised distribution as contours.
 
     This is in the same style as anesthetic, but since we have analytic expressions for the marginal densities we can plot the pdf directly
@@ -166,8 +172,8 @@ def pdf_plot_2d(ax, dist, index=[0, 1], *args, **kwargs):
     x = dist.rvs(nplot)
     P = dist.pdf(x)
     levels = iso_probability_contours_from_samples(P, contours=levels)
-    y = np.atleast_1d(x[..., index[1]])
-    x = np.atleast_1d(x[..., index[0]])
+    y = np.atleast_1d(x[..., 1])
+    x = np.atleast_1d(x[..., 0])
 
     if facecolor not in [None, "None", "none"]:
         linewidths = kwargs.pop("linewidths", 0.5)
@@ -216,7 +222,7 @@ def pdf_plot_2d(ax, dist, index=[0, 1], *args, **kwargs):
     return contf, cont
 
 
-def scatter_plot_2d(ax, dist, index=[0, 1], *args, **kwargs):
+def scatter_plot_2d(ax, dist, *args, **kwargs):
     """Plot samples from a 2d marginalised distribution.
 
     This functions as a wrapper around :meth:`matplotlib.axes.Axes.plot`,
@@ -240,8 +246,8 @@ def scatter_plot_2d(ax, dist, index=[0, 1], *args, **kwargs):
     """
     nplot = kwargs.pop("nplot_2d", 1000)
     x = dist.rvs(nplot)
-    y = x[:, index[1]]
-    x = x[:, index[0]]
+    y = x[:, 1]
+    x = x[:, 0]
     return anesthetic_scatter_plot_2d(ax, x, y, *args, **kwargs)
 
 
@@ -273,11 +279,12 @@ def plot_1d(dist, axes=None, *args, **kwargs):
         Pandas array of axes objects
 
     """
-    params = kwargs.pop("params", list(range(dist.dim)))
     if axes is None:
-        fig, axes = make_1d_axes(params)
+        axes = list(range(dist.dim))
+    if not isinstance(axes, AxesSeries):
+        fig, axes = make_1d_axes(axes)
     for i, ax in enumerate(axes):
-        d = dist.marginalise(list(set(params) - {i}))
+        d = dist[i]
         pdf_plot_1d(ax, d, *args, **kwargs)
     return axes
 
@@ -324,17 +331,16 @@ def plot_2d(dist, axes=None, *args, **kwargs):
         Pandas array of axes objects
 
     """
-    params = kwargs.pop("params", list(range(dist.dim)))
     if axes is None:
-        fig, axes = make_2d_axes(params)
-    rvs = dist.rvs(1000).reshape(1000, -1, dist.dim)
+        axes = list(range(dist.dim))
+    if not isinstance(axes, AxesDataFrame):
+        fig, axes = make_2d_axes(axes)
     for y, row in axes.iterrows():
         for x, ax in row.items():
-            marg = dist.marginalise(list(set(params) - {x, y}))
             if ax.position == "diagonal":
-                pdf_plot_1d(ax.twin, marg, *args, **kwargs)
+                pdf_plot_1d(ax.twin, dist[x], *args, **kwargs)
             elif ax.position == "lower":
-                pdf_plot_2d(ax, marg, *args, **kwargs)
+                pdf_plot_2d(ax, dist[[x, y]], *args, **kwargs)
             elif ax.position == "upper":
-                scatter_plot_2d(ax, marg, index=[1, 0], *args, **kwargs)
+                scatter_plot_2d(ax, dist[[x, y]], *args, **kwargs)
     return axes
diff --git a/lsbi/stats.py b/lsbi/stats.py
index b5410f8..9258120 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -334,13 +334,23 @@ def __getitem__(self, arg):
         (2,)
         """
         dist = deepcopy(self)
-        dist.mean = np.broadcast_to(self.mean, (*self.shape, self.dim))[arg]
-        if self.diagonal:
-            dist.cov = np.broadcast_to(self.cov, (*self.shape, self.dim))[arg]
+        if self.shape == ():
+            dist.mean = (np.ones(self.dim) * self.mean)[..., arg]
+            if self.diagonal:
+                dist.cov = (np.ones(self.dim) * self.cov)[..., arg]
+            else:
+                dist.cov = self.cov[..., arg, :][..., arg]
+            dist._dim = len(np.atleast_1d(dist.mean))
         else:
-            dist.cov = np.broadcast_to(self.cov, (*self.shape, self.dim, self.dim))[arg]
-        dist._shape = dist.mean.shape[:-1]
-        dist._dim = dist.mean.shape[-1]
+            dist.mean = np.broadcast_to(self.mean, (*self.shape, self.dim))[arg]
+            if dist.diagonal:
+                dist.cov = np.broadcast_to(self.cov, (*self.shape, self.dim))[arg]
+            else:
+                dist.cov = np.broadcast_to(self.cov, (*self.shape, self.dim, self.dim))[
+                    arg
+                ]
+            dist._shape = dist.mean.shape[:-1]
+            dist._dim = dist.mean.shape[-1]
         return dist
 
     def plot_1d(self, axes=None, *args, **kwargs):  # noqa: D102
diff --git a/tests/test_plot.py b/tests/test_plot.py
index 6d6e70a..92d63a3 100644
--- a/tests/test_plot.py
+++ b/tests/test_plot.py
@@ -3,7 +3,15 @@
 import scipy.stats
 
 import lsbi.stats
-from lsbi.plot import pdf_plot_1d, pdf_plot_2d
+from lsbi.plot import (
+    make_1d_axes,
+    make_2d_axes,
+    pdf_plot_1d,
+    pdf_plot_2d,
+    plot_1d,
+    plot_2d,
+    scatter_plot_2d,
+)
 
 
 @pytest.fixture(autouse=True)
@@ -12,7 +20,11 @@ def close_figures_on_teardown():
     plt.close("all")
 
 
-dists = [lsbi.stats.multivariate_normal(), scipy.stats.multivariate_normal()]
+dists = [
+    lsbi.stats.multivariate_normal(),
+    lsbi.stats.multivariate_normal(dim=5),
+    scipy.stats.multivariate_normal(),
+]
 
 
 @pytest.mark.parametrize("dist", dists)
@@ -26,7 +38,6 @@ def test_pdf_plot_1d(dist):
 
 @pytest.mark.parametrize("dist", dists)
 def test_pdf_plot_2d(dist):
-    dist = scipy.stats.multivariate_normal([0.1, 0.2])
     fig, ax = plt.subplots()
     pdf_plot_2d(ax, dist)
     pdf_plot_2d(ax, dist, facecolor=None, ec="k")
@@ -40,11 +51,17 @@ def test_scatter_plot_2d(dist):
 
 @pytest.mark.parametrize("dist", dists)
 def test_plot_1d(dist):
-    fig, ax = plt.subplots()
+    plot_1d(dist)
+    if dist.dim > 1:
+        plot_1d(dist, [0, 2, 4])
+    fig, ax = make_1d_axes(list(range(dist.dim)))
     plot_1d(dist, ax)
 
 
 @pytest.mark.parametrize("dist", dists)
 def test_plot_2d(dist):
-    fig, ax = plt.subplots()
+    plot_2d(dist)
+    if dist.dim > 1:
+        plot_2d(dist, [0, 2, 4])
+    fig, ax = make_2d_axes(list(range(dist.dim)))
     plot_2d(dist, ax)

From ea8e6e0bdf22f10a0e0d47de547f8c890e4c09c9 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Fri, 1 Mar 2024 23:24:28 +0000
Subject: [PATCH 099/117] Restricted tests to valid sections

---
 lsbi/plot.py       | 4 ++--
 tests/test_plot.py | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/lsbi/plot.py b/lsbi/plot.py
index c72f06e..22cd505 100644
--- a/lsbi/plot.py
+++ b/lsbi/plot.py
@@ -91,13 +91,13 @@ def pdf_plot_1d(ax, dist, *args, **kwargs):
         if facecolor is True:
             facecolor = color
 
-        c = iso_probability_contours_from_samples(pp, contours=levels)
+        c = iso_probability_contours_from_samples(pdf, contours=levels)
         cmap = basic_cmap(facecolor)
         fill = []
         for j in range(len(c) - 1):
             fill.append(
                 ax.fill_between(
-                    x, pp, where=pp >= c[j], color=cmap(c[j]), edgecolor=edgecolor
+                    x, pdf, where=pdf >= c[j], color=cmap(c[j]), edgecolor=edgecolor
                 )
             )
 
diff --git a/tests/test_plot.py b/tests/test_plot.py
index 92d63a3..1de45f5 100644
--- a/tests/test_plot.py
+++ b/tests/test_plot.py
@@ -36,20 +36,20 @@ def test_pdf_plot_1d(dist):
     pdf_plot_1d(ax, dist, density=True)
 
 
-@pytest.mark.parametrize("dist", dists)
+@pytest.mark.parametrize("dist", dists[1:2])
 def test_pdf_plot_2d(dist):
     fig, ax = plt.subplots()
     pdf_plot_2d(ax, dist)
     pdf_plot_2d(ax, dist, facecolor=None, ec="k")
 
 
-@pytest.mark.parametrize("dist", dists)
+@pytest.mark.parametrize("dist", dists[1:2])
 def test_scatter_plot_2d(dist):
     fig, ax = plt.subplots()
     scatter_plot_2d(ax, dist)
 
 
-@pytest.mark.parametrize("dist", dists)
+@pytest.mark.parametrize("dist", dists[:2])
 def test_plot_1d(dist):
     plot_1d(dist)
     if dist.dim > 1:
@@ -58,7 +58,7 @@ def test_plot_1d(dist):
     plot_1d(dist, ax)
 
 
-@pytest.mark.parametrize("dist", dists)
+@pytest.mark.parametrize("dist", dists[:2])
 def test_plot_2d(dist):
     plot_2d(dist)
     if dist.dim > 1:

From e468392aab6321d04e534f555a00412f6700e419 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Tue, 5 Mar 2024 22:31:49 +0000
Subject: [PATCH 100/117] Halfway through stats

---
 tests/test_stats.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tests/test_stats.py b/tests/test_stats.py
index 6f957a4..b633250 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -95,6 +95,28 @@ def random(self, dim, shape, mean_shape, cov_shape, diagonal):
     def test_getitem(self, dim, shape, mean_shape, cov_shape, diagonal):
         dist = self.random(dim, shape, mean_shape, cov_shape, diagonal)
 
+        if len(dist.shape) == 0:
+            dist_2 = dist[0]
+            assert isinstance(dist_2, self.cls)
+            assert dist_2.shape == ()
+            assert dist_2.dim == 1
+
+            if dist.dim > 1:
+                dist_2 = dist[[0, 1]]
+                assert isinstance(dist_2, self.cls)
+                assert dist_2.shape == ()
+                assert dist_2.dim == 2
+
+            if dist.dim > 2:
+                dist_2 = dist[[0, 2]]
+                assert isinstance(dist_2, self.cls)
+                assert dist_2.shape == ()
+                assert dist_2.dim == 2
+                dist_2 = dist[:2]
+                assert isinstance(dist_2, self.cls)
+                assert dist_2.shape == ()
+                assert dist_2.dim == 2
+
         if len(dist.shape) > 0:
             dist_2 = dist[0]
             assert isinstance(dist_2, self.cls)

From 6127eb54d65a84478bccd7f59849716b810782c8 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Tue, 5 Mar 2024 23:21:15 +0000
Subject: [PATCH 101/117] implemented Bayesian model dimensionality

---
 lsbi/model.py       | 44 +++++++++++++++++++++++++++-
 lsbi/stats.py       | 70 +++++++++++++++++++++++++++++++++++++++++++++
 tests/test_model.py | 10 +++++++
 tests/test_stats.py | 37 +++++++++++++++++++++++-
 4 files changed, 159 insertions(+), 2 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index b5fc90e..a2637d7 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -6,7 +6,7 @@
 from numpy.linalg import inv, solve
 from scipy.special import logsumexp
 
-from lsbi.stats import dkl, mixture_normal, multivariate_normal
+from lsbi.stats import bmd, dkl, mixture_normal, multivariate_normal
 from lsbi.utils import alias, dediagonalise, logdet
 
 
@@ -264,6 +264,10 @@ def ppd(self, D0):
     def dkl(self, D, n=0):
         """KL divergence between the posterior and prior.
 
+        Analytically this is
+
+        1/2 (log|1 + M Σ M'/ C| + tr(Σ_P/Σ)
+
         Parameters
         ----------
         D : array_like, shape (..., d)
@@ -273,6 +277,26 @@ def dkl(self, D, n=0):
         """
         return dkl(self.posterior(D), self.prior(), n)
 
+    def bmd(self, D, n=0):
+        """Bayesian model dimensionality.
+
+        Parameters
+        ----------
+        D : array_like, shape (..., d)
+            Data to form the posterior
+        n : int, optional
+            Number of samples for a monte carlo estimate, defaults to 0
+        """
+        return bmd(self.posterior(D), self.prior(), n)
+
+    def mutual_information(self):
+        """Mutual information.
+
+        Analytically this is
+
+        """
+        return inv(self.posterior(D).cov)
+
     @property
     def _M(self):
         return dediagonalise(self.M, self.diagonal_M, self.d, self.n)
@@ -463,6 +487,24 @@ def dkl(self, D, n=0):
         x = p.rvs(size=(n, *self.shape[:-1]), broadcast=True)
         return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).mean(axis=0)
 
+    def bmd(self, D, n=0):
+        """Bayesian model dimensionality.
+
+        Parameters
+        ----------
+        D : array_like, shape (..., d)
+            Data to form the posterior
+        n : int, optional
+            Number of samples for a monte carlo estimate, defaults to 0
+        """
+        if n == 0:
+            raise ValueError("MixtureModel requires a monte carlo estimate. Use n>0.")
+
+        p = self.posterior(D)
+        q = self.prior()
+        x = p.rvs(size=(n, *self.shape[:-1]), broadcast=True)
+        return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).var(axis=0)
+
 
 class ReducedLinearModel(object):
     """A model with no data.
diff --git a/lsbi/stats.py b/lsbi/stats.py
index 562db6c..b584c6b 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -588,6 +588,11 @@ def __getitem__(self, arg):  # noqa: D105
 def dkl(p, q, n=0):
     """Kullback-Leibler divergence between two distributions.
 
+    if P ~ N(p,P) and Q ~ N(q,Q) then
+
+    D_KL(P||Q) = <log(P/Q)>_P
+    = 1/2 * (log(|Q|/|P|) - d + tr(Q^{-1} P) + (q - p)' Q^{-1} (q - p))
+
     Parameters
     ----------
     p : lsbi.stats.multivariate_normal
@@ -623,3 +628,68 @@ def dkl(p, q, n=0):
             dkl = dkl + np.einsum("...ij,...ji->...", invq, p.cov)
 
     return dkl / 2
+
+
+def bmd(p, q, n=0):
+    """Bayesian model dimensionality between two distributions.
+
+    if P ~ N(p,P) and Q ~ N(q,Q) then
+
+    bmd/2 = var(log P/Q)_P
+    = 1/2 tr(Q^{-1} P Q^{-1} P) - 1/2 (tr(Q^{-1} P))^2
+    + (q - p)' Q^{-1} P Q^{-1} (q - p) + d/2
+
+    Parameters
+    ----------
+    p : lsbi.stats.multivariate_normal
+    q : lsbi.stats.multivariate_normal
+    n : int, optional, default=0
+        Number of samples to mcmc estimate the divergence.
+
+    Returns
+    -------
+    bmd : array_like
+        Bayesian model dimensionality between p and q.
+    """
+    shape = np.broadcast_shapes(p.shape, q.shape)
+    if n:
+        x = p.rvs(size=(n, *shape), broadcast=True)
+        return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).var(
+            axis=0
+        ) * 2
+
+    bmd = p.dim / 2 * np.ones(shape)
+    pq = (p.mean - q.mean) * np.ones(p.dim)
+    if p.diagonal and q.diagonal:
+        pcov = p.cov * np.ones(p.dim)
+        qcov = q.cov * np.ones(q.dim)
+        bmd = bmd + (pq**2 * pcov / qcov**2).sum(axis=-1)
+        bmd = bmd + (pcov**2 / qcov**2).sum(axis=-1) / 2
+        bmd = bmd - (pcov / qcov).sum(axis=-1)
+    elif p.diagonal:
+        invq = inv(q.cov)
+        pcov = p.cov * np.ones(p.dim)
+        bmd = bmd + np.einsum(
+            "...i,...ij,...j,...jl,...l->...", pq, invq, pcov, invq, pq
+        )
+        bmd = bmd - np.einsum("...jj,...j->...", invq, pcov)
+        bmd = bmd + np.einsum("...lj,...j,...jl,...l->...", invq, pcov, invq, pcov) / 2
+    elif q.diagonal:
+        invq = np.ones(q.dim) / q.cov
+        pcov = p.cov * np.ones(p.dim)
+        bmd = bmd + np.einsum(
+            "...i,...i,...ik,...k,...k->...", pq, invq, pcov, invq, pq
+        )
+        bmd = bmd - np.einsum("...j,...jj->...", invq, pcov)
+        bmd = bmd + np.einsum("...j,...jk,...k,...kj->...", invq, pcov, invq, pcov) / 2
+    else:
+        invq = inv(q.cov)
+        bmd = bmd + np.einsum(
+            "...i,...ij,...jk,...kl,...l->...", pq, invq, p.cov, invq, pq
+        )
+        bmd = bmd - np.einsum("...ij,...ji->...", invq, p.cov)
+        bmd = (
+            bmd
+            + np.einsum("...ij,...jk,...kl,...li->...", invq, p.cov, invq, p.cov) / 2
+        )
+    return bmd * 2
diff --git a/tests/test_model.py b/tests/test_model.py
index 8d85a72..fe9c657 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -265,6 +265,10 @@ def test_posterior(
         assert dkl.shape == model.shape
         assert (dkl >= 0).all()
 
+        bmd = model.bmd(D)
+        assert bmd.shape == model.shape
+        assert (bmd >= 0).all()
+
     def test_evidence(
         self,
         M_shape,
@@ -620,6 +624,12 @@ def test_posterior(
         dkl = model.dkl(D, 10)
         assert dkl.shape == model.shape[:-1]
 
+        with pytest.raises(ValueError):
+            model.bmd(D)
+
+        bmd = model.bmd(D, 10)
+        assert bmd.shape == model.shape[:-1]
+
     def test_evidence(
         self,
         logw_shape,
diff --git a/tests/test_stats.py b/tests/test_stats.py
index 6f957a4..f4841ad 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -5,7 +5,7 @@
 from scipy.special import logsumexp
 from scipy.stats import multivariate_normal as scipy_multivariate_normal
 
-from lsbi.stats import dkl, mixture_normal, multivariate_normal
+from lsbi.stats import bmd, dkl, mixture_normal, multivariate_normal
 
 shapes = [(2, 3), (3,), ()]
 sizes = [(6, 5), (5,), ()]
@@ -582,3 +582,38 @@ def test_dkl(
     assert dkl_mc.shape == np.broadcast_shapes(p.shape, q.shape)
 
     assert_allclose(dkl_pq, dkl_mc, atol=1)
+
+
+@pytest.mark.parametrize("dim_p, shape_p, mean_shape_p, cov_shape_p, diagonal_p", tests)
+@pytest.mark.parametrize("dim_q, shape_q, mean_shape_q, cov_shape_q, diagonal_q", tests)
+def test_bmd(
+    dim_p,
+    shape_p,
+    mean_shape_p,
+    cov_shape_p,
+    diagonal_p,
+    dim_q,
+    shape_q,
+    mean_shape_q,
+    cov_shape_q,
+    diagonal_q,
+):
+    p = TestMultivariateNormal().random(
+        dim, shape_p, mean_shape_p, cov_shape_p, diagonal_p
+    )
+    q = TestMultivariateNormal().random(
+        dim, shape_q, mean_shape_q, cov_shape_q, diagonal_q
+    )
+
+    bmd_pq = bmd(p, q)
+
+    assert_allclose(bmd(p, p), 0, atol=1e-10)
+    assert_allclose(bmd(q, q), 0, atol=1e-10)
+
+    assert (bmd_pq >= 0).all()
+    assert bmd_pq.shape == np.broadcast_shapes(p.shape, q.shape)
+
+    bmd_mc = bmd(p, q, 10000)
+    assert bmd_mc.shape == np.broadcast_shapes(p.shape, q.shape)
+
+    assert_allclose(bmd_pq, bmd_mc, atol=1)

From d28419e71e4fd321a850b3601925b92bd30e975f Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 6 Mar 2024 00:02:50 +0000
Subject: [PATCH 102/117] Added mutual information and total dimensionality

---
 lsbi/model.py       | 65 ++++++++++++++++++++++++++++++++++++++++++---
 tests/test_model.py | 32 ++++++++++++++++++++++
 tests/test_stats.py |  4 +--
 3 files changed, 95 insertions(+), 6 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index a2637d7..e40cbe4 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -266,7 +266,9 @@ def dkl(self, D, n=0):
 
         Analytically this is
 
-        1/2 (log|1 + M Σ M'/ C| + tr(Σ_P/Σ)
+        <log P(θ|D)/P(θ)>_P(θ|D)
+
+        1/2 (log|1 + M Σ M'/ C| - tr M Σ M'/ (C+M Σ M')  + (μ - μ_P)' Σ^-1 (μ - μ_P))
 
         Parameters
         ----------
@@ -280,6 +282,11 @@ def dkl(self, D, n=0):
     def bmd(self, D, n=0):
         """Bayesian model dimensionality.
 
+        Analytically this is
+        bmd/2 = var(log P(θ|D)/P(θ))_P(θ|D)
+
+        = 1/2 tr(M Σ M'/ (C+M Σ M'))^2 + (μ - μ_P)' Σ^-1 Σ_P Σ^-1(μ - μ_P)
+
         Parameters
         ----------
         D : array_like, shape (..., d)
@@ -289,13 +296,51 @@ def bmd(self, D, n=0):
         """
         return bmd(self.posterior(D), self.prior(), n)
 
-    def mutual_information(self):
-        """Mutual information.
+    def mutual_information(self, n=0):
+        """Mutual information between the parameters and the data.
 
         Analytically this is
 
+        <log P(D|θ)/P(D)>_P(D|θ)
+
+        = log|1 + M Σ M'/ C|/2
         """
-        return inv(self.posterior(D).cov)
+        if n > 0:
+            D = self.evidence().rvs(n)
+            θ = self.posterior(D).rvs(n, broadcast=True)
+            return (
+                self.posterior(D).logpdf(θ, broadcast=True)
+                - self.prior().logpdf(θ, broadcast=True)
+            ).mean(axis=0)
+
+        MΣM = np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M)
+        C = self._C
+        return (
+            logdet(C + np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M))
+            / 2
+            - logdet(C) / 2
+        )
+
+    def dimensionality(self):
+        """Model dimensionality.
+
+        Analytically this is
+
+        bmd/2 = <bmd/2>_P(D)
+
+        = tr(M Σ M'/ (C+M Σ M'))  - 1/2 tr(M Σ M'/ (C+M Σ M'))^2
+        """
+        if n > 0:
+            n = int(n**0.5)
+            D = self.evidence().rvs(n)
+            θ = self.posterior(D).rvs(n)
+            logR = self.posterior(D).logpdf(θ, broadcast=True) - self.prior().logpdf(θ)
+            return logR.var(axis=0).mean(axis=0) * 2
+        MΣM = np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M)
+        C = self._C
+        return 2 * np.trace(MΣM @ inv(C + MΣM)) - np.trace(
+            MΣM @ inv(C + MΣM) @ MΣM @ inv(C + MΣM)
+        )
 
     @property
     def _M(self):
@@ -505,6 +550,18 @@ def bmd(self, D, n=0):
         x = p.rvs(size=(n, *self.shape[:-1]), broadcast=True)
         return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).var(axis=0)
 
+    def mutual_information(self, n=0):
+        """Mutual information between the parameters and the data."""
+        if n == 0:
+            raise ValueError("MixtureModel requires a monte carlo estimate. Use n>0.")
+        return super().mutual_information(n)
+
+    def dimensionality(self):
+        """Model dimensionality."""
+        if n == 0:
+            raise ValueError("MixtureModel requires a monte carlo estimate. Use n>0.")
+        return super().dimensionality(n)
+
 
 class ReducedLinearModel(object):
     """A model with no data.
diff --git a/tests/test_model.py b/tests/test_model.py
index fe9c657..0f77768 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -269,6 +269,26 @@ def test_posterior(
         assert bmd.shape == model.shape
         assert (bmd >= 0).all()
 
+        mutual_information = model.mutual_information()
+        assert mutual_information.shape == model.shape
+        assert (mutual_information >= 0).all()
+
+        N = 1000
+        mutual_information_mc = model.mutual_information_mc(N)
+        assert mutual_information_mc.shape == model.shape
+        assert (mutual_information_mc >= 0).all()
+        assert_allclose(mutual_information, mutual_information_mc, rtol=1)
+
+        dimensionality = model.dimensionality()
+        assert dimensionality.shape == model.shape
+        assert (dimensionality >= 0).all()
+
+        N = 1000
+        dimensionality_mc = model.dimensionality_mc(N)
+        assert dimensionality_mc.shape == model.shape
+        assert (dimensionality_mc >= 0).all()
+        assert_allclose(dimensionality, dimensionality_mc, rtol=1)
+
     def test_evidence(
         self,
         M_shape,
@@ -630,6 +650,18 @@ def test_posterior(
         bmd = model.bmd(D, 10)
         assert bmd.shape == model.shape[:-1]
 
+        with pytest.raises(ValueError):
+            model.mutual_information()
+
+        mutual_information = model.mutual_information(10)
+        assert mutual_information.shape == model.shape[:-1]
+
+        with pytest.raises(ValueError):
+            model.mutual_information()
+
+        mutual_information_mc = model.mutual_information_mc(10)
+        assert mutual_information_mc.shape == model.shape[:-1]
+
     def test_evidence(
         self,
         logw_shape,
diff --git a/tests/test_stats.py b/tests/test_stats.py
index f4841ad..37286b3 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -613,7 +613,7 @@ def test_bmd(
     assert (bmd_pq >= 0).all()
     assert bmd_pq.shape == np.broadcast_shapes(p.shape, q.shape)
 
-    bmd_mc = bmd(p, q, 10000)
+    bmd_mc = bmd(p, q, 1000)
     assert bmd_mc.shape == np.broadcast_shapes(p.shape, q.shape)
 
-    assert_allclose(bmd_pq, bmd_mc, atol=1)
+    assert_allclose(bmd_pq, bmd_mc, rtol=1)

From 99b4f46ee8950478de140a1c1ddebf3c2d592291 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 6 Mar 2024 00:11:02 +0000
Subject: [PATCH 103/117] Updated model dimensionality for tests

---
 lsbi/model.py       | 10 ++++++----
 tests/test_model.py | 13 ++++++-------
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index e40cbe4..6d0c9d0 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -306,12 +306,14 @@ def mutual_information(self, n=0):
         = log|1 + M Σ M'/ C|/2
         """
         if n > 0:
+            n = int(n**0.5)
             D = self.evidence().rvs(n)
-            θ = self.posterior(D).rvs(n, broadcast=True)
+            θ = self.posterior(D).rvs(n)
             return (
-                self.posterior(D).logpdf(θ, broadcast=True)
-                - self.prior().logpdf(θ, broadcast=True)
-            ).mean(axis=0)
+                (self.posterior(D).logpdf(θ, broadcast=True) - self.prior().logpdf(θ))
+                .mean(axis=0)
+                .mean(axis=0)
+            )
 
         MΣM = np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M)
         C = self._C
diff --git a/tests/test_model.py b/tests/test_model.py
index 0f77768..196923a 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -273,8 +273,8 @@ def test_posterior(
         assert mutual_information.shape == model.shape
         assert (mutual_information >= 0).all()
 
-        N = 1000
-        mutual_information_mc = model.mutual_information_mc(N)
+        n = 1000
+        mutual_information_mc = model.mutual_information(n)
         assert mutual_information_mc.shape == model.shape
         assert (mutual_information_mc >= 0).all()
         assert_allclose(mutual_information, mutual_information_mc, rtol=1)
@@ -283,8 +283,7 @@ def test_posterior(
         assert dimensionality.shape == model.shape
         assert (dimensionality >= 0).all()
 
-        N = 1000
-        dimensionality_mc = model.dimensionality_mc(N)
+        dimensionality_mc = model.dimensionality(n)
         assert dimensionality_mc.shape == model.shape
         assert (dimensionality_mc >= 0).all()
         assert_allclose(dimensionality, dimensionality_mc, rtol=1)
@@ -657,10 +656,10 @@ def test_posterior(
         assert mutual_information.shape == model.shape[:-1]
 
         with pytest.raises(ValueError):
-            model.mutual_information()
+            model.dimensionality()
 
-        mutual_information_mc = model.mutual_information_mc(10)
-        assert mutual_information_mc.shape == model.shape[:-1]
+        dimensionality = model.dimensionality(10)
+        assert dimensionality.shape == model.shape[:-1]
 
     def test_evidence(
         self,

From 9a15e22c8e03408bdd0d6212966a35b54c9837b2 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 6 Mar 2024 08:19:26 +0000
Subject: [PATCH 104/117] Tests now passing

---
 lsbi/model.py       | 87 ++++++++++++++++++++++++---------------------
 tests/test_model.py | 10 +++---
 2 files changed, 50 insertions(+), 47 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index 6d0c9d0..be6c9e8 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -261,7 +261,7 @@ def ppd(self, D0):
         """P(D|D0) as a distribution object."""
         return self.update(D0).evidence()
 
-    def dkl(self, D, n=0):
+    def dkl(self, D, N=0):
         """KL divergence between the posterior and prior.
 
         Analytically this is
@@ -274,12 +274,12 @@ def dkl(self, D, n=0):
         ----------
         D : array_like, shape (..., d)
             Data to form the posterior
-        n : int, optional
+        N : int, optional
             Number of samples for a monte carlo estimate, defaults to 0
         """
-        return dkl(self.posterior(D), self.prior(), n)
+        return dkl(self.posterior(D), self.prior(), N)
 
-    def bmd(self, D, n=0):
+    def bmd(self, D, N=0):
         """Bayesian model dimensionality.
 
         Analytically this is
@@ -294,9 +294,9 @@ def bmd(self, D, n=0):
         n : int, optional
             Number of samples for a monte carlo estimate, defaults to 0
         """
-        return bmd(self.posterior(D), self.prior(), n)
+        return bmd(self.posterior(D), self.prior(), N)
 
-    def mutual_information(self, n=0):
+    def mutual_information(self, N=0):
         """Mutual information between the parameters and the data.
 
         Analytically this is
@@ -305,25 +305,24 @@ def mutual_information(self, n=0):
 
         = log|1 + M Σ M'/ C|/2
         """
-        if n > 0:
-            n = int(n**0.5)
-            D = self.evidence().rvs(n)
-            θ = self.posterior(D).rvs(n)
+        if N > 0:
+            N = int(N**0.5)
+            D = self.evidence().rvs(N)
+            θ = self.posterior(D).rvs(N)
             return (
-                (self.posterior(D).logpdf(θ, broadcast=True) - self.prior().logpdf(θ))
+                (
+                    self.posterior(D).logpdf(θ, broadcast=True)
+                    - self.prior().logpdf(θ, broadcast=True)
+                )
                 .mean(axis=0)
                 .mean(axis=0)
             )
 
         MΣM = np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M)
         C = self._C
-        return (
-            logdet(C + np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M))
-            / 2
-            - logdet(C) / 2
-        )
+        return np.broadcast_to(logdet(C + MΣM) / 2 - logdet(C) / 2, self.shape)
 
-    def dimensionality(self):
+    def dimensionality(self, N=0):
         """Model dimensionality.
 
         Analytically this is
@@ -332,16 +331,22 @@ def dimensionality(self):
 
         = tr(M Σ M'/ (C+M Σ M'))  - 1/2 tr(M Σ M'/ (C+M Σ M'))^2
         """
-        if n > 0:
-            n = int(n**0.5)
-            D = self.evidence().rvs(n)
-            θ = self.posterior(D).rvs(n)
-            logR = self.posterior(D).logpdf(θ, broadcast=True) - self.prior().logpdf(θ)
+        if N > 0:
+            N = int(N**0.5)
+            D = self.evidence().rvs(N)
+            θ = self.posterior(D).rvs(N)
+            logR = self.posterior(D).logpdf(θ, broadcast=True) - self.prior().logpdf(
+                θ, broadcast=True
+            )
             return logR.var(axis=0).mean(axis=0) * 2
         MΣM = np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M)
         C = self._C
-        return 2 * np.trace(MΣM @ inv(C + MΣM)) - np.trace(
-            MΣM @ inv(C + MΣM) @ MΣM @ inv(C + MΣM)
+        invCpMΣM = inv(C + MΣM)
+
+        return np.broadcast_to(
+            2 * np.einsum("...ij,...ji->...", MΣM, invCpMΣM)
+            - np.einsum("...ij,...jk,...kl,...li->...", MΣM, invCpMΣM, MΣM, invCpMΣM),
+            self.shape,
         )
 
     @property
@@ -516,53 +521,53 @@ def update(self, D, inplace=False):
         if not inplace:
             return dist
 
-    def dkl(self, D, n=0):
+    def dkl(self, D, N=0):
         """KL divergence between the posterior and prior.
 
         Parameters
         ----------
         D : array_like, shape (..., d)
             Data to form the posterior
-        n : int, optional
+        N : int, optional
             Number of samples for a monte carlo estimate, defaults to 0
         """
-        if n == 0:
-            raise ValueError("MixtureModel requires a monte carlo estimate. Use n>0.")
+        if N == 0:
+            raise ValueError("MixtureModel requires a monte carlo estimate. Use N>0.")
 
         p = self.posterior(D)
         q = self.prior()
-        x = p.rvs(size=(n, *self.shape[:-1]), broadcast=True)
+        x = p.rvs(size=(N, *self.shape[:-1]), broadcast=True)
         return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).mean(axis=0)
 
-    def bmd(self, D, n=0):
+    def bmd(self, D, N=0):
         """Bayesian model dimensionality.
 
         Parameters
         ----------
         D : array_like, shape (..., d)
             Data to form the posterior
-        n : int, optional
+        N : int, optional
             Number of samples for a monte carlo estimate, defaults to 0
         """
-        if n == 0:
-            raise ValueError("MixtureModel requires a monte carlo estimate. Use n>0.")
+        if N == 0:
+            raise ValueError("MixtureModel requires a monte carlo estimate. Use N>0.")
 
         p = self.posterior(D)
         q = self.prior()
-        x = p.rvs(size=(n, *self.shape[:-1]), broadcast=True)
+        x = p.rvs(size=(N, *self.shape[:-1]), broadcast=True)
         return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).var(axis=0)
 
-    def mutual_information(self, n=0):
+    def mutual_information(self, N=0):
         """Mutual information between the parameters and the data."""
-        if n == 0:
-            raise ValueError("MixtureModel requires a monte carlo estimate. Use n>0.")
-        return super().mutual_information(n)
+        if N == 0:
+            raise ValueError("MixtureModel requires a monte carlo estimate. Use N>0.")
+        return super().mutual_information(N)
 
     def dimensionality(self):
         """Model dimensionality."""
-        if n == 0:
-            raise ValueError("MixtureModel requires a monte carlo estimate. Use n>0.")
-        return super().dimensionality(n)
+        if N == 0:
+            raise ValueError("MixtureModel requires a monte carlo estimate. Use N>0.")
+        return super().dimensionality(N)
 
 
 class ReducedLinearModel(object):
diff --git a/tests/test_model.py b/tests/test_model.py
index 196923a..f71c889 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -271,21 +271,19 @@ def test_posterior(
 
         mutual_information = model.mutual_information()
         assert mutual_information.shape == model.shape
+        mutual_information.shape
         assert (mutual_information >= 0).all()
 
-        n = 1000
-        mutual_information_mc = model.mutual_information(n)
+        mutual_information_mc = model.mutual_information(N)
         assert mutual_information_mc.shape == model.shape
-        assert (mutual_information_mc >= 0).all()
-        assert_allclose(mutual_information, mutual_information_mc, rtol=1)
+        assert_allclose(mutual_information, mutual_information_mc, atol=1)
 
         dimensionality = model.dimensionality()
         assert dimensionality.shape == model.shape
         assert (dimensionality >= 0).all()
 
-        dimensionality_mc = model.dimensionality(n)
+        dimensionality_mc = model.dimensionality(N)
         assert dimensionality_mc.shape == model.shape
-        assert (dimensionality_mc >= 0).all()
         assert_allclose(dimensionality, dimensionality_mc, rtol=1)
 
     def test_evidence(

From 5bf5fb44339f40189d89d0b4d8bd0d5eac679f8e Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 6 Mar 2024 08:53:20 +0000
Subject: [PATCH 105/117] added monte carlo error estimates

---
 lsbi/model.py       | 68 +++++++++++++++++++++++++++++++++------------
 lsbi/stats.py       | 37 ++++++++++++++++--------
 tests/test_model.py |  8 +++---
 tests/test_stats.py |  8 +++---
 4 files changed, 84 insertions(+), 37 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index be6c9e8..670a61b 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -296,7 +296,7 @@ def bmd(self, D, N=0):
         """
         return bmd(self.posterior(D), self.prior(), N)
 
-    def mutual_information(self, N=0):
+    def mutual_information(self, N=0, mcerror=False):
         """Mutual information between the parameters and the data.
 
         Analytically this is
@@ -304,25 +304,31 @@ def mutual_information(self, N=0):
         <log P(D|θ)/P(D)>_P(D|θ)
 
         = log|1 + M Σ M'/ C|/2
+        Parameters
+        ----------
+        N : int, optional
+            Number of samples for a monte carlo estimate, defaults to 0
+        mcerror: bool, optional
+            Produce a monte carlo error estimate
         """
         if N > 0:
             N = int(N**0.5)
             D = self.evidence().rvs(N)
-            θ = self.posterior(D).rvs(N)
-            return (
-                (
-                    self.posterior(D).logpdf(θ, broadcast=True)
-                    - self.prior().logpdf(θ, broadcast=True)
-                )
-                .mean(axis=0)
-                .mean(axis=0)
+            θ = self.posterior(D).rvs()
+            logR = self.posterior(D).logpdf(θ, broadcast=True) - self.prior().logpdf(
+                θ, broadcast=True
             )
+            ans = logR.mean(axis=0)
+            if mcerror:
+                var = logR.var(axis=0) / (N - 1)
+                ans = (ans, var**0.5)
+            return ans
 
         MΣM = np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M)
         C = self._C
         return np.broadcast_to(logdet(C + MΣM) / 2 - logdet(C) / 2, self.shape)
 
-    def dimensionality(self, N=0):
+    def dimensionality(self, N=0, mcerror=False):
         """Model dimensionality.
 
         Analytically this is
@@ -330,6 +336,13 @@ def dimensionality(self, N=0):
         bmd/2 = <bmd/2>_P(D)
 
         = tr(M Σ M'/ (C+M Σ M'))  - 1/2 tr(M Σ M'/ (C+M Σ M'))^2
+
+        Parameters
+        ----------
+        N : int, optional
+            Number of samples for a monte carlo estimate, defaults to 0
+        mcerror: bool, optional
+            Produce a monte carlo error estimate
         """
         if N > 0:
             N = int(N**0.5)
@@ -338,7 +351,12 @@ def dimensionality(self, N=0):
             logR = self.posterior(D).logpdf(θ, broadcast=True) - self.prior().logpdf(
                 θ, broadcast=True
             )
-            return logR.var(axis=0).mean(axis=0) * 2
+            ans = logR.var(axis=0).mean(axis=0) * 2
+            if mcerror:
+                var = logR.var(axis=0).var(axis=0) / (2 * (N - 1) * (N - 1))
+                ans = (ans, var**0.5)
+            return ans
+
         MΣM = np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M)
         C = self._C
         invCpMΣM = inv(C + MΣM)
@@ -557,17 +575,33 @@ def bmd(self, D, N=0):
         x = p.rvs(size=(N, *self.shape[:-1]), broadcast=True)
         return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).var(axis=0)
 
-    def mutual_information(self, N=0):
-        """Mutual information between the parameters and the data."""
+    def mutual_information(self, N=0, mcerror=False):
+        """Mutual information between the parameters and the data.
+
+        Parameters
+        ----------
+        N : int, optional
+            Number of samples for a monte carlo estimate, defaults to 0
+        mcerror: bool, optional
+            Produce a monte carlo error estimate
+        """
         if N == 0:
             raise ValueError("MixtureModel requires a monte carlo estimate. Use N>0.")
-        return super().mutual_information(N)
+        return super().mutual_information(N, mcerror)
 
-    def dimensionality(self):
-        """Model dimensionality."""
+    def dimensionality(self, N=0, mcerror=False):
+        """Model dimensionality.
+
+        Parameters
+        ----------
+        N : int, optional
+            Number of samples for a monte carlo estimate, defaults to 0
+        mcerror: bool, optional
+            Produce a monte carlo error estimate
+        """
         if N == 0:
             raise ValueError("MixtureModel requires a monte carlo estimate. Use N>0.")
-        return super().dimensionality(N)
+        return super().dimensionality(N, mcerror)
 
 
 class ReducedLinearModel(object):
diff --git a/lsbi/stats.py b/lsbi/stats.py
index b584c6b..29a632e 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -585,7 +585,7 @@ def __getitem__(self, arg):  # noqa: D105
         return dist
 
 
-def dkl(p, q, n=0):
+def dkl(p, q, N=0, mcerror=False):
     """Kullback-Leibler divergence between two distributions.
 
     if P ~ N(p,P) and Q ~ N(q,Q) then
@@ -597,8 +597,10 @@ def dkl(p, q, n=0):
     ----------
     p : lsbi.stats.multivariate_normal
     q : lsbi.stats.multivariate_normal
-    n : int, optional, default=0
+    N : int, optional, default=0
         Number of samples to mcmc estimate the divergence.
+    mcerror: bool, optional, default=False
+        Produce a Monte Carlo error estimate
 
     Returns
     -------
@@ -606,9 +608,15 @@ def dkl(p, q, n=0):
         Kullback-Leibler divergence between p and q.
     """
     shape = np.broadcast_shapes(p.shape, q.shape)
-    if n:
-        x = p.rvs(size=(n, *shape), broadcast=True)
-        return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).mean(axis=0)
+    if N:
+        x = p.rvs(size=(N, *shape), broadcast=True)
+        logR = p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)
+        ans = logR.mean(axis=0)
+        if mcerror:
+            var = logR.var(axis=0) / N
+            ans = (ans, var**0.5)
+        return ans
+
     dkl = -p.dim * np.ones(shape)
     dkl = dkl + logdet(q.cov * np.ones(q.dim), q.diagonal)
     dkl = dkl - logdet(p.cov * np.ones(p.dim), p.diagonal)
@@ -630,7 +638,7 @@ def dkl(p, q, n=0):
     return dkl / 2
 
 
-def bmd(p, q, n=0):
+def bmd(p, q, N=0, mcerror=False):
     """Bayesian model dimensionality between two distributions.
 
     if P ~ N(p,P) and Q ~ N(q,Q) then
@@ -643,8 +651,10 @@ def bmd(p, q, n=0):
     ----------
     p : lsbi.stats.multivariate_normal
     q : lsbi.stats.multivariate_normal
-    n : int, optional, default=0
+    N : int, optional, default=0
         Number of samples to mcmc estimate the divergence.
+    mcerror: bool, optional, default=False
+        Produce a Monte Carlo error estimate
 
     Returns
     -------
@@ -652,11 +662,14 @@ def bmd(p, q, n=0):
         Bayesian model dimensionality between p and q.
     """
     shape = np.broadcast_shapes(p.shape, q.shape)
-    if n:
-        x = p.rvs(size=(n, *shape), broadcast=True)
-        return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).var(
-            axis=0
-        ) * 2
+    if N:
+        x = p.rvs(size=(N, *shape), broadcast=True)
+        logR = p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)
+        ans = logR.var(axis=0) * 2
+        if mcerror:
+            var = logR.var(axis=0) / (2 * (N - 1)) * 4
+            ans = (ans, var**0.5)
+        return ans
 
     bmd = p.dim / 2 * np.ones(shape)
     pq = (p.mean - q.mean) * np.ones(p.dim)
diff --git a/tests/test_model.py b/tests/test_model.py
index f71c889..c9b68a1 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -274,17 +274,17 @@ def test_posterior(
         mutual_information.shape
         assert (mutual_information >= 0).all()
 
-        mutual_information_mc = model.mutual_information(N)
+        mutual_information_mc, err = model.mutual_information(N, True)
         assert mutual_information_mc.shape == model.shape
-        assert_allclose(mutual_information, mutual_information_mc, atol=1)
+        assert_allclose(mutual_information, mutual_information_mc, atol=(5 * err).max())
 
         dimensionality = model.dimensionality()
         assert dimensionality.shape == model.shape
         assert (dimensionality >= 0).all()
 
-        dimensionality_mc = model.dimensionality(N)
+        dimensionality_mc, err = model.dimensionality(N, True)
         assert dimensionality_mc.shape == model.shape
-        assert_allclose(dimensionality, dimensionality_mc, rtol=1)
+        assert_allclose(dimensionality, dimensionality_mc, atol=(5 * err).max())
 
     def test_evidence(
         self,
diff --git a/tests/test_stats.py b/tests/test_stats.py
index 37286b3..a4a74d7 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -578,10 +578,10 @@ def test_dkl(
     assert (dkl_pq >= 0).all()
     assert dkl_pq.shape == np.broadcast_shapes(p.shape, q.shape)
 
-    dkl_mc = dkl(p, q, 1000)
+    dkl_mc, err = dkl(p, q, 1000, True)
     assert dkl_mc.shape == np.broadcast_shapes(p.shape, q.shape)
 
-    assert_allclose(dkl_pq, dkl_mc, atol=1)
+    assert_allclose(dkl_pq, dkl_mc, atol=(5 * err).max())
 
 
 @pytest.mark.parametrize("dim_p, shape_p, mean_shape_p, cov_shape_p, diagonal_p", tests)
@@ -613,7 +613,7 @@ def test_bmd(
     assert (bmd_pq >= 0).all()
     assert bmd_pq.shape == np.broadcast_shapes(p.shape, q.shape)
 
-    bmd_mc = bmd(p, q, 1000)
+    bmd_mc, err = bmd(p, q, 1000, True)
     assert bmd_mc.shape == np.broadcast_shapes(p.shape, q.shape)
 
-    assert_allclose(bmd_pq, bmd_mc, rtol=1)
+    assert_allclose(bmd_pq, bmd_mc, atol=(5 * err).max())

From ca4d0d298caada591a7fd8019e33db007d1c70d7 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 6 Mar 2024 10:05:41 +0000
Subject: [PATCH 106/117] Trying to debug underestimate of dimensionality error

---
 lsbi/model.py       | 9 +++++----
 lsbi/stats.py       | 8 ++++----
 tests/test_model.py | 4 ++--
 tests/test_stats.py | 4 ++--
 4 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index 670a61b..e940016 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -320,8 +320,8 @@ def mutual_information(self, N=0, mcerror=False):
             )
             ans = logR.mean(axis=0)
             if mcerror:
-                var = logR.var(axis=0) / (N - 1)
-                ans = (ans, var**0.5)
+                err = (logR.var(axis=0) / (N - 1)) ** 0.5
+                ans = (ans, err)
             return ans
 
         MΣM = np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M)
@@ -353,8 +353,9 @@ def dimensionality(self, N=0, mcerror=False):
             )
             ans = logR.var(axis=0).mean(axis=0) * 2
             if mcerror:
-                var = logR.var(axis=0).var(axis=0) / (2 * (N - 1) * (N - 1))
-                ans = (ans, var**0.5)
+                err = logR.var(axis=0) * (2 / (N - 1)) ** 0.5 * 2
+                err = ((err**2).sum(axis=0) / (N - 1)) ** 0.5
+                ans = (ans, err)
             return ans
 
         MΣM = np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M)
diff --git a/lsbi/stats.py b/lsbi/stats.py
index 29a632e..2274420 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -613,8 +613,8 @@ def dkl(p, q, N=0, mcerror=False):
         logR = p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)
         ans = logR.mean(axis=0)
         if mcerror:
-            var = logR.var(axis=0) / N
-            ans = (ans, var**0.5)
+            err = (logR.var(axis=0) / (N - 1)) ** 0.5
+            ans = (ans, err)
         return ans
 
     dkl = -p.dim * np.ones(shape)
@@ -667,8 +667,8 @@ def bmd(p, q, N=0, mcerror=False):
         logR = p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)
         ans = logR.var(axis=0) * 2
         if mcerror:
-            var = logR.var(axis=0) / (2 * (N - 1)) * 4
-            ans = (ans, var**0.5)
+            err = logR.var(axis=0) * (2 / (N - 1)) ** 0.5 * 2
+            ans = (ans, err)
         return ans
 
     bmd = p.dim / 2 * np.ones(shape)
diff --git a/tests/test_model.py b/tests/test_model.py
index c9b68a1..e291952 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -276,7 +276,7 @@ def test_posterior(
 
         mutual_information_mc, err = model.mutual_information(N, True)
         assert mutual_information_mc.shape == model.shape
-        assert_allclose(mutual_information, mutual_information_mc, atol=(5 * err).max())
+        assert_allclose((mutual_information - mutual_information_mc) / err, 0, atol=5)
 
         dimensionality = model.dimensionality()
         assert dimensionality.shape == model.shape
@@ -284,7 +284,7 @@ def test_posterior(
 
         dimensionality_mc, err = model.dimensionality(N, True)
         assert dimensionality_mc.shape == model.shape
-        assert_allclose(dimensionality, dimensionality_mc, atol=(5 * err).max())
+        assert_allclose((dimensionality - dimensionality_mc) / err, 0, atol=5)
 
     def test_evidence(
         self,
diff --git a/tests/test_stats.py b/tests/test_stats.py
index a4a74d7..15cb784 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -581,7 +581,7 @@ def test_dkl(
     dkl_mc, err = dkl(p, q, 1000, True)
     assert dkl_mc.shape == np.broadcast_shapes(p.shape, q.shape)
 
-    assert_allclose(dkl_pq, dkl_mc, atol=(5 * err).max())
+    assert_allclose((dkl_pq - dkl_mc) / err, 0, atol=5)
 
 
 @pytest.mark.parametrize("dim_p, shape_p, mean_shape_p, cov_shape_p, diagonal_p", tests)
@@ -616,4 +616,4 @@ def test_bmd(
     bmd_mc, err = bmd(p, q, 1000, True)
     assert bmd_mc.shape == np.broadcast_shapes(p.shape, q.shape)
 
-    assert_allclose(bmd_pq, bmd_mc, atol=(5 * err).max())
+    assert_allclose((bmd_pq - bmd_mc) / err, 0, atol=5)

From 5a82db73efccecc21feaa23626cda4f6fd4af78d Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 6 Mar 2024 18:06:37 +0000
Subject: [PATCH 107/117] Made some notes about bmd error estimation. To naut.
 Let's just give it generous error bars

---
 lsbi/stats.py       | 16 +++++++++++++++-
 tests/test_model.py |  1 +
 tests/test_stats.py |  5 +++--
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/lsbi/stats.py b/lsbi/stats.py
index 2274420..16980e0 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -647,6 +647,17 @@ def bmd(p, q, N=0, mcerror=False):
     = 1/2 tr(Q^{-1} P Q^{-1} P) - 1/2 (tr(Q^{-1} P))^2
     + (q - p)' Q^{-1} P Q^{-1} (q - p) + d/2
 
+    From:
+    https://stats.stackexchange.com/q/333838
+    we can estimate the error in the sample variance S as:
+
+    S^2/sigma^2 =  chi_df^2 / df
+    df = 2n/(kappa-(n-3)/(n-1))
+
+    kappa is the kurtosis, so if a normal distribution is assumed, kappa = 3 and df = n-1
+    Here we take kappa to be the kurtosis of the logR, which should in
+    principle be 3 + 12/d for a chi squared distribution. since logR is distributed as chi squared with bmd degrees of freedom, we take kappa = 3 + 12/(bmd)
+
     Parameters
     ----------
     p : lsbi.stats.multivariate_normal
@@ -667,7 +678,10 @@ def bmd(p, q, N=0, mcerror=False):
         logR = p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)
         ans = logR.var(axis=0) * 2
         if mcerror:
-            err = logR.var(axis=0) * (2 / (N - 1)) ** 0.5 * 2
+            # kappa = 3 + 12 / ans
+            # df = 2 * N / (kappa - (N - 3) / (N - 1))
+            # err = ans * (2 / df) ** 0.5
+            err = ans * (2 / N - 1) ** 0.5
             ans = (ans, err)
         return ans
 
diff --git a/tests/test_model.py b/tests/test_model.py
index e291952..4bb5a69 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -285,6 +285,7 @@ def test_posterior(
         dimensionality_mc, err = model.dimensionality(N, True)
         assert dimensionality_mc.shape == model.shape
         assert_allclose((dimensionality - dimensionality_mc) / err, 0, atol=5)
+        plt.hist(logR[:, 0, 0], bins=100)
 
     def test_evidence(
         self,
diff --git a/tests/test_stats.py b/tests/test_stats.py
index 15cb784..874884e 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -563,6 +563,7 @@ def test_dkl(
     cov_shape_q,
     diagonal_q,
 ):
+    dim = dim_p
     p = TestMultivariateNormal().random(
         dim, shape_p, mean_shape_p, cov_shape_p, diagonal_p
     )
@@ -598,6 +599,7 @@ def test_bmd(
     cov_shape_q,
     diagonal_q,
 ):
+    dim = dim_p
     p = TestMultivariateNormal().random(
         dim, shape_p, mean_shape_p, cov_shape_p, diagonal_p
     )
@@ -615,5 +617,4 @@ def test_bmd(
 
     bmd_mc, err = bmd(p, q, 1000, True)
     assert bmd_mc.shape == np.broadcast_shapes(p.shape, q.shape)
-
-    assert_allclose((bmd_pq - bmd_mc) / err, 0, atol=5)
+    assert_allclose((bmd_pq - bmd_mc) / err, 0, atol=10)

From c091a36d3e1ba79ac45c0fc335578dc2dc28ddba Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 6 Mar 2024 18:07:10 +0000
Subject: [PATCH 108/117] Cleaned up stats.py

---
 lsbi/stats.py | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/lsbi/stats.py b/lsbi/stats.py
index 16980e0..f8bd7c8 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -647,17 +647,6 @@ def bmd(p, q, N=0, mcerror=False):
     = 1/2 tr(Q^{-1} P Q^{-1} P) - 1/2 (tr(Q^{-1} P))^2
     + (q - p)' Q^{-1} P Q^{-1} (q - p) + d/2
 
-    From:
-    https://stats.stackexchange.com/q/333838
-    we can estimate the error in the sample variance S as:
-
-    S^2/sigma^2 =  chi_df^2 / df
-    df = 2n/(kappa-(n-3)/(n-1))
-
-    kappa is the kurtosis, so if a normal distribution is assumed, kappa = 3 and df = n-1
-    Here we take kappa to be the kurtosis of the logR, which should in
-    principle be 3 + 12/d for a chi squared distribution. since logR is distributed as chi squared with bmd degrees of freedom, we take kappa = 3 + 12/(bmd)
-
     Parameters
     ----------
     p : lsbi.stats.multivariate_normal
@@ -678,9 +667,6 @@ def bmd(p, q, N=0, mcerror=False):
         logR = p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)
         ans = logR.var(axis=0) * 2
         if mcerror:
-            # kappa = 3 + 12 / ans
-            # df = 2 * N / (kappa - (N - 3) / (N - 1))
-            # err = ans * (2 / df) ** 0.5
             err = ans * (2 / N - 1) ** 0.5
             ans = (ans, err)
         return ans

From d0e083bd6060ce7f81d69a55b908350ce829020f Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 6 Mar 2024 19:29:59 +0000
Subject: [PATCH 109/117] Tests now up to date

---
 tests/test_model.py | 5 ++---
 tests/test_stats.py | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/test_model.py b/tests/test_model.py
index 4bb5a69..1615933 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -276,7 +276,7 @@ def test_posterior(
 
         mutual_information_mc, err = model.mutual_information(N, True)
         assert mutual_information_mc.shape == model.shape
-        assert_allclose((mutual_information - mutual_information_mc) / err, 0, atol=5)
+        assert_allclose((mutual_information - mutual_information_mc) / err, 0, atol=10)
 
         dimensionality = model.dimensionality()
         assert dimensionality.shape == model.shape
@@ -284,8 +284,7 @@ def test_posterior(
 
         dimensionality_mc, err = model.dimensionality(N, True)
         assert dimensionality_mc.shape == model.shape
-        assert_allclose((dimensionality - dimensionality_mc) / err, 0, atol=5)
-        plt.hist(logR[:, 0, 0], bins=100)
+        assert_allclose((dimensionality - dimensionality_mc) / err, 0, atol=10)
 
     def test_evidence(
         self,
diff --git a/tests/test_stats.py b/tests/test_stats.py
index 874884e..d054fab 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -582,7 +582,7 @@ def test_dkl(
     dkl_mc, err = dkl(p, q, 1000, True)
     assert dkl_mc.shape == np.broadcast_shapes(p.shape, q.shape)
 
-    assert_allclose((dkl_pq - dkl_mc) / err, 0, atol=5)
+    assert_allclose((dkl_pq - dkl_mc) / err, 0, atol=10)
 
 
 @pytest.mark.parametrize("dim_p, shape_p, mean_shape_p, cov_shape_p, diagonal_p", tests)

From 362b06a7a8fe8ab28f45069e9d01131ff8aa053f Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 6 Mar 2024 19:31:42 +0000
Subject: [PATCH 110/117] bump version to 0.13.0

---
 README.rst       | 2 +-
 lsbi/_version.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index 8be0d84..7b4e4de 100644
--- a/README.rst
+++ b/README.rst
@@ -3,7 +3,7 @@ lsbi: Linear Simulation Based Inference
 =======================================
 :lsbi: Linear Simulation Based Inference
 :Author: Will Handley & David Yallup
-:Version: 0.12.0
+:Version: 0.13.0
 :Homepage: https://github.com/handley-lab/lsbi
 :Documentation: http://lsbi.readthedocs.io/
 
diff --git a/lsbi/_version.py b/lsbi/_version.py
index ea370a8..f23a6b3 100644
--- a/lsbi/_version.py
+++ b/lsbi/_version.py
@@ -1 +1 @@
-__version__ = "0.12.0"
+__version__ = "0.13.0"

From ba3abce0a79f1f69396fca75515d8a85e0c0c224 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 6 Mar 2024 22:31:55 +0000
Subject: [PATCH 111/117] Fixed documentation

---
 lsbi/model.py | 16 ++++++----------
 lsbi/stats.py |  6 +++---
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index e940016..a8b82ee 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -266,9 +266,8 @@ def dkl(self, D, N=0):
 
         Analytically this is
 
-        <log P(θ|D)/P(θ)>_P(θ|D)
-
-        1/2 (log|1 + M Σ M'/ C| - tr M Σ M'/ (C+M Σ M')  + (μ - μ_P)' Σ^-1 (μ - μ_P))
+        <log P(θ|D)/P(θ)>_P(θ|D) = 1/2 (log|1 + M Σ M'/ C|
+        - tr M Σ M'/ (C+M Σ M')  + (μ - μ_P)' Σ^-1 (μ - μ_P))
 
         Parameters
         ----------
@@ -283,9 +282,9 @@ def bmd(self, D, N=0):
         """Bayesian model dimensionality.
 
         Analytically this is
-        bmd/2 = var(log P(θ|D)/P(θ))_P(θ|D)
 
-        = 1/2 tr(M Σ M'/ (C+M Σ M'))^2 + (μ - μ_P)' Σ^-1 Σ_P Σ^-1(μ - μ_P)
+        var(log P(θ|D)/P(θ))_P(θ|D) = 1/2 tr(M Σ M'/ (C+M Σ M'))^2
+        + (μ - μ_P)' Σ^-1 Σ_P Σ^-1(μ - μ_P)
 
         Parameters
         ----------
@@ -301,9 +300,8 @@ def mutual_information(self, N=0, mcerror=False):
 
         Analytically this is
 
-        <log P(D|θ)/P(D)>_P(D|θ)
+        <log P(D|θ)/P(D)>_P(D,θ) = log|1 + M Σ M'/ C|/2
 
-        = log|1 + M Σ M'/ C|/2
         Parameters
         ----------
         N : int, optional
@@ -333,9 +331,7 @@ def dimensionality(self, N=0, mcerror=False):
 
         Analytically this is
 
-        bmd/2 = <bmd/2>_P(D)
-
-        = tr(M Σ M'/ (C+M Σ M'))  - 1/2 tr(M Σ M'/ (C+M Σ M'))^2
+        <bmd/2>_P(D) = tr(M Σ M'/ (C+M Σ M'))  - 1/2 tr(M Σ M'/ (C+M Σ M'))^2
 
         Parameters
         ----------
diff --git a/lsbi/stats.py b/lsbi/stats.py
index f8bd7c8..e6fc8a2 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -586,12 +586,12 @@ def __getitem__(self, arg):  # noqa: D105
 
 
 def dkl(p, q, N=0, mcerror=False):
-    """Kullback-Leibler divergence between two distributions.
+    r"""Kullback-Leibler divergence between two distributions.
 
     if P ~ N(p,P) and Q ~ N(q,Q) then
 
-    D_KL(P||Q) = <log(P/Q)>_P
-    = 1/2 * (log(|Q|/|P|) - d + tr(Q^{-1} P) + (q - p)' Q^{-1} (q - p))
+    D_KL(P\||Q) = <log(P/Q)>_P =
+    1/2 * (log(\|Q|/\|P|) - d + tr(Q^{-1} P) + (q - p)' Q^{-1} (q - p))
 
     Parameters
     ----------

From 6974b7a0640d972c2f772d7b2fe3510dab557a2b Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Mon, 13 May 2024 11:24:34 +0100
Subject: [PATCH 112/117] Corrected cholesky

---
 lsbi/stats.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/lsbi/stats.py b/lsbi/stats.py
index 9258120..e6d82e4 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -100,8 +100,9 @@ def logpdf(self, x, broadcast=False):
             chi2 = (dx**2 / self.cov).sum(axis=-1)
             norm = -np.log(2 * np.pi * np.ones(self.dim) * self.cov).sum(axis=-1) / 2
         else:
-            chi2 = np.einsum("...j,...jk,...k->...", dx, inv(self.cov), dx)
-            norm = -logdet(2 * np.pi * self.cov) / 2
+            cov = np.atleast_2d(self.cov)
+            chi2 = np.einsum("...j,...jk,...k->...", dx, inv(cov), dx)
+            norm = -logdet(2 * np.pi * cov) / 2
         return norm - chi2 / 2
 
     def pdf(self, x, broadcast=False):
@@ -146,7 +147,8 @@ def rvs(self, size=(), broadcast=False):
         if self.diagonal:
             return self.mean + np.sqrt(self.cov) * x
         else:
-            return self.mean + np.einsum("...jk,...k->...j", cholesky(self.cov), x)
+            L = cholesky(np.atleast_2d(self.cov))
+            return self.mean + np.einsum("...jk,...k->...j", L, x)
 
     def predict(self, A=1, b=0, diagonal=False):
         """Predict the mean and covariance of a linear transformation.
@@ -296,14 +298,15 @@ def bijector(self, x, inverse=False):
             if self.diagonal:
                 y = (x - mean) / np.sqrt(self.cov)
             else:
-                y = np.einsum("...jk,...k->...j", inv(cholesky(self.cov)), x - mean)
+                Linv = inv(cholesky(np.atleast_2d(self.cov)))
+                y = np.einsum("...jk,...k->...j", Linv, x - mean)
             return scipy.stats.norm.cdf(y)
         else:
             y = scipy.stats.norm.ppf(x)
             if self.diagonal:
                 return mean + np.sqrt(self.cov) * y
             else:
-                L = cholesky(self.cov)
+                L = cholesky(np.atleast_2d(self.cov))
                 return mean + np.einsum("...jk,...k->...j", L, y)
 
     def __getitem__(self, arg):
@@ -516,7 +519,7 @@ def rvs(self, size=(), broadcast=False):
             L = np.take_along_axis(np.moveaxis(L, -2, 0), i[..., None], axis=0)
             rvs = mean + L * x
         else:
-            L = cholesky(self.cov)
+            L = cholesky(np.atleast_2d(self.cov))
             L = np.broadcast_to(L, (*self.shape, self.dim, self.dim))
             L = np.take_along_axis(np.moveaxis(L, -3, 0), i[..., None, None], axis=0)
             rvs = mean + np.einsum("...ij,...j->...i", L, x)

From b0e2205a345a31a2e7f47998e4297bf03279de29 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Tue, 14 May 2024 16:16:40 +0100
Subject: [PATCH 113/117] Updated plotting for mixtures

---
 lsbi/plot.py  |  2 +-
 lsbi/stats.py | 22 +++++++++++++++++++---
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/lsbi/plot.py b/lsbi/plot.py
index 22cd505..6e5b35c 100644
--- a/lsbi/plot.py
+++ b/lsbi/plot.py
@@ -338,7 +338,7 @@ def plot_2d(dist, axes=None, *args, **kwargs):
     for y, row in axes.iterrows():
         for x, ax in row.items():
             if ax.position == "diagonal":
-                pdf_plot_1d(ax.twin, dist[x], *args, **kwargs)
+                pdf_plot_1d(ax.twin, dist[[x]], *args, **kwargs)
             elif ax.position == "lower":
                 pdf_plot_2d(ax, dist[[x, y]], *args, **kwargs)
             elif ax.position == "upper":
diff --git a/lsbi/stats.py b/lsbi/stats.py
index e6d82e4..6655eb3 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -613,9 +613,25 @@ def f(t):
             return theta
 
     def __getitem__(self, arg):  # noqa: D105
-        dist = super().__getitem__(arg)
-        dist.__class__ = mixture_normal
-        dist.logw = np.broadcast_to(self.logw, self.shape)[arg]
+        dist = deepcopy(self)
+        if self.shape[:-1] == ():
+            dist.mean = (np.ones((*self.shape, self.dim)) * self.mean)[..., arg]
+            if self.diagonal:
+                dist.cov = (np.ones((*self.shape, self.dim)) * self.cov)[..., arg]
+            else:
+                dist.cov = self.cov[..., arg, :][..., arg]
+            dist._dim = np.shape(dist.mean)[-1]
+        else:
+            dist.mean = np.broadcast_to(self.mean, (*self.shape, self.dim))[arg]
+            if dist.diagonal:
+                dist.cov = np.broadcast_to(self.cov, (*self.shape, self.dim))[arg]
+            else:
+                dist.cov = np.broadcast_to(self.cov, (*self.shape, self.dim, self.dim))[
+                    arg
+                ]
+            dist.logw = np.broadcast_to(self.logw, self.shape)[arg]
+            dist._shape = dist.mean.shape[:-1]
+            dist._dim = dist.mean.shape[-1]
         return dist
 
     def plot_1d(self, axes=None, *args, **kwargs):  # noqa: D102

From 1c94fc114ebc9504a792fc968d17f431036b262b Mon Sep 17 00:00:00 2001
From: yallup <david.yallup@gmail.com>
Date: Thu, 4 Jul 2024 13:34:19 +0100
Subject: [PATCH 114/117] remove torch dep

---
 README.rst             |   7 +-
 docs/source/lsbi.rst   |   9 +-
 lsbi/_version.py       |   2 +-
 lsbi/network.py        | 205 -----------------------------------------
 pyproject.toml         |   1 -
 tests/test_networks.py |  81 ----------------
 6 files changed, 5 insertions(+), 300 deletions(-)
 delete mode 100644 lsbi/network.py
 delete mode 100644 tests/test_networks.py

diff --git a/README.rst b/README.rst
index 7b4e4de..ef8df8c 100644
--- a/README.rst
+++ b/README.rst
@@ -3,7 +3,7 @@ lsbi: Linear Simulation Based Inference
 =======================================
 :lsbi: Linear Simulation Based Inference
 :Author: Will Handley & David Yallup
-:Version: 0.13.0
+:Version: 0.13.1
 :Homepage: https://github.com/handley-lab/lsbi
 :Documentation: http://lsbi.readthedocs.io/
 
@@ -69,8 +69,8 @@ You can check that things are working by running the test suite:
 .. code:: bash
 
     python -m pytest
-    black .
-    isort --profile black .
+    black lsbi
+    isort --profile black lsbi
     pydocstyle --convention=numpy lsbi
 
 
@@ -134,7 +134,6 @@ There are many ways you can contribute via the `GitHub repository <https://githu
 
 - You can `open an issue <https://github.com/handley-lab/lsbi/issues>`__ to report bugs or to propose new features.
 - Pull requests are very welcome. Note that if you are going to propose major changes, be sure to open an issue for discussion first, to make sure that your PR will be accepted before you spend effort coding it.
-- Adding models and data to the grid. Contact `Will Handley <mailto:wh260@cam.ac.uk>`__ to request models or ask for your own to be uploaded.
 
 
 Questions/Comments
diff --git a/docs/source/lsbi.rst b/docs/source/lsbi.rst
index 287a8e5..f404b0f 100644
--- a/docs/source/lsbi.rst
+++ b/docs/source/lsbi.rst
@@ -16,20 +16,13 @@ lsbi.model module
    :show-inheritance:
 
 
-lsbi.network module
--------------------
-
-.. automodule:: lsbi.network
-   :members:
-   :undoc-members:
-
-
 lsbi.stats module
 -----------------
 
 .. automodule:: lsbi.stats
    :members:
    :undoc-members:
+   :show-inheritance:
 
 
 lsbi.utils module
diff --git a/lsbi/_version.py b/lsbi/_version.py
index f23a6b3..7e0dc0e 100644
--- a/lsbi/_version.py
+++ b/lsbi/_version.py
@@ -1 +1 @@
-__version__ = "0.13.0"
+__version__ = "0.13.1"
diff --git a/lsbi/network.py b/lsbi/network.py
deleted file mode 100644
index ad3c0ed..0000000
--- a/lsbi/network.py
+++ /dev/null
@@ -1,205 +0,0 @@
-"""Simple binary classifiers to perform model comparison."""
-
-import torch
-import torch.nn as nn
-import torch.optim as optim
-from torch.optim.lr_scheduler import ExponentialLR
-
-
-class BinaryClassifierBase(nn.Module):
-    """Base model for binary classification. Following 2305.11241.
-
-    A simple binary classifier:
-        - 5 hidden layers:
-            - Layer 1 with initial_dim units
-            - Layers 2-4 with internal_dim units
-        - Leaky ReLU activation function
-        - Batch normalization
-        - Output layer with 1 unit linear classifier unit
-        - Adam optimizer with default learning rate 0.001
-        - Exponential learning rate decay with default decay rate 0.95
-
-    Parameters
-    ----------
-    input_dim : int
-        Dimension of the input data.
-    internal_dim : int, optional (default=16)
-        Dimension of the internal layers of the network.
-    initial_dim : int, optional (default=130)
-        Dimension of the first layer of the network.
-    """
-
-    def __init__(self, input_dim, internal_dim=16, initial_dim=130):
-        super(BinaryClassifierBase, self).__init__()
-
-        self.model = nn.Sequential(
-            nn.Linear(input_dim, initial_dim),
-            nn.LeakyReLU(),
-            nn.BatchNorm1d(initial_dim),
-            nn.Linear(initial_dim, internal_dim),
-            nn.LeakyReLU(),
-            nn.BatchNorm1d(internal_dim),
-            nn.Linear(internal_dim, internal_dim),
-            nn.LeakyReLU(),
-            nn.BatchNorm1d(internal_dim),
-            nn.Linear(internal_dim, internal_dim),
-            nn.LeakyReLU(),
-            nn.BatchNorm1d(internal_dim),
-            nn.Linear(internal_dim, internal_dim),
-            nn.LeakyReLU(),
-            nn.Linear(internal_dim, 1),
-        )
-
-    def forward(self, x):
-        """Forward pass through the network, logit output."""
-        return self.model(x)
-
-    def loss(self, x, y):
-        """Loss function for the network."""
-        raise NotImplementedError
-
-    def predict(self, x):
-        """Predict the Bayes Factor."""
-        raise NotImplementedError
-
-    def fit(self, X, y, **kwargs):
-        """Fit classifier on input features X to predict labels y.
-
-        Parameters
-        ----------
-        X : array-like, shape (n_samples, n_features)
-            Input data.
-        y : array-like, shape (n_samples,)
-            Target values.
-        num_epochs : int, optional (default=10)
-            Number of epochs to train the network.
-        batch_size : int, optional (default=128)
-            Batch size for training.
-        decay_rate : float, optional (default=0.95)
-            Decay rate for the learning rate scheduler.
-        lr : float, optional (default=0.001)
-            Learning rate for the optimizer.
-        device : str, optional (default="cpu")
-            Device to use for training.
-        """
-        num_epochs = kwargs.get("num_epochs", 10)
-        batch_size = kwargs.get("batch_size", 128)
-        decay_rate = kwargs.get("decay_rate", 0.95)
-        lr = kwargs.get("lr", 0.001)
-        device = torch.device(kwargs.get("device", "cpu"))
-
-        print("Using device: ", device)
-
-        # Convert labels to torch tensor
-        X = torch.tensor(X, dtype=torch.float32)
-        labels = torch.tensor(y, dtype=torch.float32)
-        labels = labels.unsqueeze(1)
-        labels = labels.to(device)
-
-        # Create a DataLoader for batch training
-        dataset = torch.utils.data.TensorDataset(X, labels)
-        dataloader = torch.utils.data.DataLoader(
-            dataset, batch_size=batch_size, shuffle=True
-        )
-
-        # Define the loss function and optimizer
-        criterion = self.loss
-        optimizer = optim.Adam(self.parameters(), lr=lr)
-
-        # Create the scheduler and pass in the optimizer and decay rate
-        scheduler = ExponentialLR(optimizer, gamma=decay_rate)
-
-        # Create a DataLoader for batch training
-        self.to(device=device, dtype=torch.float32)
-
-        for epoch in range(num_epochs):
-            epoch_loss = []
-            for i, (inputs, targets) in enumerate(dataloader):
-                # Clear gradients
-                optimizer.zero_grad()
-                inputs = inputs.to(device)
-                # Forward pass
-                loss = criterion(inputs, targets)
-                epoch_loss.append(loss.item())
-                # Backward pass and optimize
-                loss.backward()
-                optimizer.step()
-
-            # Print loss for every epoch
-            scheduler.step()
-            mean_loss = torch.mean(torch.tensor(epoch_loss)).item()
-            print(f"Epoch {epoch+1}/{num_epochs}, Loss: {mean_loss}")
-
-        # once training is done, set the model to eval(), ensures batchnorm
-        # and dropout are not used during inference
-        self.model.eval()
-
-
-class BinaryClassifier(BinaryClassifierBase):
-    """
-    Extends the BinaryClassifierBase to use a BCE loss function.
-
-    Furnishes with a direction prediction of the Bayes Factor.
-    """
-
-    def loss(self, x, y):
-        """Binary cross entropy loss function for the network."""
-        y_ = self.forward(x)
-        return nn.BCEWithLogitsLoss()(y_, y)
-
-    def predict(self, x):
-        """Predict the log Bayes Factor.
-
-        log K = lnP(Class 1) - lnP(Class 0)
-        """
-        # ensure model is in eval just in case
-        self.model.eval()
-
-        x = torch.tensor(x, dtype=torch.float32)
-        x = torch.atleast_2d(x)
-        pred = self.forward(x)
-        pred = nn.Sigmoid()(pred)
-        return (torch.log(pred) - torch.log(1 - pred)).detach().numpy()
-
-
-class BinaryClassifierLPop(BinaryClassifierBase):
-    """
-    Extends the BinaryClassifierBase to use a LPop Exponential loss.
-
-    Furnishes with a direction prediction of the Bayes Factor.
-
-    Parameters
-    ----------
-    alpha : float, optional (default=2.0)
-        Scale factor for the exponent transform.
-    """
-
-    def __init__(self, *args, **kwargs):
-        self.alpha = kwargs.pop("alpha", 2.0)
-        super(BinaryClassifierLPop, self).__init__(*args, **kwargs)
-
-    def lpop(self, x):
-        """Leaky parity odd power transform."""
-        return x + x * torch.pow(torch.abs(x), self.alpha - 1.0)
-
-    def loss(self, x, y):
-        """Lpop Loss function for the network."""
-        x = self.forward(x)
-        return torch.exp(
-            torch.logsumexp((0.5 - y) * self.lpop(x), dim=0)
-            - torch.log(torch.tensor(x.shape[0], dtype=torch.float64))
-        ).squeeze()
-
-    def predict(self, x):
-        """Predict the log Bayes Factor.
-
-        log K = lnP(Class 1) - lnP(Class 0)
-        """
-        # ensure model is in eval just in case
-        self.model.eval()
-
-        x = torch.tensor(x, dtype=torch.float32)
-        x = torch.atleast_2d(x)
-        pred = self.forward(x)
-        pred = self.lpop(pred)
-        return pred.detach().numpy()
diff --git a/pyproject.toml b/pyproject.toml
index f998f19..c231ccd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,7 +17,6 @@ dependencies = [
     'numpy',
     'scipy',
     'matplotlib',
-    'torch',
 ]
 classifiers = [
     "Programming Language :: Python :: 3",
diff --git a/tests/test_networks.py b/tests/test_networks.py
deleted file mode 100644
index afe1caf..0000000
--- a/tests/test_networks.py
+++ /dev/null
@@ -1,81 +0,0 @@
-import numpy as np
-import pytest
-import torch
-
-from lsbi.network import BinaryClassifier, BinaryClassifierBase, BinaryClassifierLPop
-
-
-@pytest.mark.parametrize("input_dim", [1, 100])
-@pytest.mark.parametrize("internal_dim", [16, 32])
-@pytest.mark.parametrize("initial_dim", [130, 256])
-class TestClassifierBase:
-    CLS = BinaryClassifierBase
-
-    @pytest.fixture
-    def model(self, input_dim, internal_dim, initial_dim):
-        return self.CLS(input_dim, internal_dim, initial_dim)
-
-    @pytest.fixture
-    def x(self, input_dim):
-        return torch.tensor(np.random.rand(10, input_dim), dtype=torch.float32)
-
-    @pytest.fixture
-    def y(self):
-        return torch.tensor(np.random.randint(0, 2, size=(10, 1)), dtype=torch.float32)
-
-    def fit_model(self, model, input_dim):
-        data_size = 10
-        data = np.random.rand(data_size, input_dim)
-        labels = np.random.randint(0, 2, size=(data_size))
-        y_start = model.predict(data)
-        model.fit(data, labels, num_epochs=1)
-        y_end = model.predict(data)
-        return y_start, y_end
-
-    def test_init(self, model):
-        assert isinstance(model, BinaryClassifierBase)
-
-    def test_forward(self, model, x):
-        y = model.forward(x)
-        assert y.shape == (10, 1)
-
-    def test_loss(self, model, x, y):
-        with pytest.raises(NotImplementedError):
-            model.loss(x, y)
-
-    def test_predict(self, model, x):
-        with pytest.raises(NotImplementedError):
-            model.predict(x)
-
-    def test_fit(self, model, x):
-        with pytest.raises(NotImplementedError):
-            y_start, y_end = self.fit_model(model, x.shape[1])
-
-
-class TestClassifier(TestClassifierBase):
-    CLS = BinaryClassifier
-
-    def test_loss(self, model, x, y):
-        loss = model.loss(x, y)
-        assert loss.detach().numpy().shape == ()
-
-    @pytest.mark.filterwarnings("ignore::UserWarning")
-    def test_fit(self, model, x):
-        y_start, y_end = self.fit_model(model, x.shape[1])
-        assert (y_start != y_end).any()
-
-    @pytest.mark.parametrize("size", [-1, 1])
-    @pytest.mark.filterwarnings("ignore::UserWarning")
-    def test_predict(self, model, x, size):
-        y = model.predict(x[:size].squeeze(0))
-        assert y.shape == (len(x[:size]), 1)
-        assert isinstance(y, np.ndarray)
-
-
-@pytest.mark.parametrize("alpha", [2, 5])
-class TestClassifierLPop(TestClassifier):
-    CLS = BinaryClassifierLPop
-
-    @pytest.fixture
-    def model(self, input_dim, internal_dim, initial_dim, alpha):
-        return self.CLS(input_dim, internal_dim, initial_dim, alpha=alpha)

From b28d8b3f9f61528341d822bb6e84e1453cd5a23d Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Thu, 25 Jul 2024 12:38:15 +0100
Subject: [PATCH 115/117] Removed BMD changes

---
 README.rst          |   4 +-
 lsbi/model.py       | 153 +-------
 lsbi/model_1.py     | 825 ++++++++++++++++++++++++++++++++++++++++++++
 lsbi/stats.py       |  95 +----
 tests/test_model.py |  39 ---
 tests/test_stats.py |  42 +--
 6 files changed, 844 insertions(+), 314 deletions(-)
 create mode 100644 lsbi/model_1.py

diff --git a/README.rst b/README.rst
index c863392..89e21c3 100644
--- a/README.rst
+++ b/README.rst
@@ -69,8 +69,8 @@ You can check that things are working by running the test suite:
 .. code:: bash
 
     python -m pytest
-    black lsbi
-    isort --profile black lsbi
+    black .
+    isort --profile black .
     pydocstyle --convention=numpy lsbi
 
 
diff --git a/lsbi/model.py b/lsbi/model.py
index a8b82ee..b5fc90e 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -6,7 +6,7 @@
 from numpy.linalg import inv, solve
 from scipy.special import logsumexp
 
-from lsbi.stats import bmd, dkl, mixture_normal, multivariate_normal
+from lsbi.stats import dkl, mixture_normal, multivariate_normal
 from lsbi.utils import alias, dediagonalise, logdet
 
 
@@ -261,31 +261,9 @@ def ppd(self, D0):
         """P(D|D0) as a distribution object."""
         return self.update(D0).evidence()
 
-    def dkl(self, D, N=0):
+    def dkl(self, D, n=0):
         """KL divergence between the posterior and prior.
 
-        Analytically this is
-
-        <log P(θ|D)/P(θ)>_P(θ|D) = 1/2 (log|1 + M Σ M'/ C|
-        - tr M Σ M'/ (C+M Σ M')  + (μ - μ_P)' Σ^-1 (μ - μ_P))
-
-        Parameters
-        ----------
-        D : array_like, shape (..., d)
-            Data to form the posterior
-        N : int, optional
-            Number of samples for a monte carlo estimate, defaults to 0
-        """
-        return dkl(self.posterior(D), self.prior(), N)
-
-    def bmd(self, D, N=0):
-        """Bayesian model dimensionality.
-
-        Analytically this is
-
-        var(log P(θ|D)/P(θ))_P(θ|D) = 1/2 tr(M Σ M'/ (C+M Σ M'))^2
-        + (μ - μ_P)' Σ^-1 Σ_P Σ^-1(μ - μ_P)
-
         Parameters
         ----------
         D : array_like, shape (..., d)
@@ -293,76 +271,7 @@ def bmd(self, D, N=0):
         n : int, optional
             Number of samples for a monte carlo estimate, defaults to 0
         """
-        return bmd(self.posterior(D), self.prior(), N)
-
-    def mutual_information(self, N=0, mcerror=False):
-        """Mutual information between the parameters and the data.
-
-        Analytically this is
-
-        <log P(D|θ)/P(D)>_P(D,θ) = log|1 + M Σ M'/ C|/2
-
-        Parameters
-        ----------
-        N : int, optional
-            Number of samples for a monte carlo estimate, defaults to 0
-        mcerror: bool, optional
-            Produce a monte carlo error estimate
-        """
-        if N > 0:
-            N = int(N**0.5)
-            D = self.evidence().rvs(N)
-            θ = self.posterior(D).rvs()
-            logR = self.posterior(D).logpdf(θ, broadcast=True) - self.prior().logpdf(
-                θ, broadcast=True
-            )
-            ans = logR.mean(axis=0)
-            if mcerror:
-                err = (logR.var(axis=0) / (N - 1)) ** 0.5
-                ans = (ans, err)
-            return ans
-
-        MΣM = np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M)
-        C = self._C
-        return np.broadcast_to(logdet(C + MΣM) / 2 - logdet(C) / 2, self.shape)
-
-    def dimensionality(self, N=0, mcerror=False):
-        """Model dimensionality.
-
-        Analytically this is
-
-        <bmd/2>_P(D) = tr(M Σ M'/ (C+M Σ M'))  - 1/2 tr(M Σ M'/ (C+M Σ M'))^2
-
-        Parameters
-        ----------
-        N : int, optional
-            Number of samples for a monte carlo estimate, defaults to 0
-        mcerror: bool, optional
-            Produce a monte carlo error estimate
-        """
-        if N > 0:
-            N = int(N**0.5)
-            D = self.evidence().rvs(N)
-            θ = self.posterior(D).rvs(N)
-            logR = self.posterior(D).logpdf(θ, broadcast=True) - self.prior().logpdf(
-                θ, broadcast=True
-            )
-            ans = logR.var(axis=0).mean(axis=0) * 2
-            if mcerror:
-                err = logR.var(axis=0) * (2 / (N - 1)) ** 0.5 * 2
-                err = ((err**2).sum(axis=0) / (N - 1)) ** 0.5
-                ans = (ans, err)
-            return ans
-
-        MΣM = np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M)
-        C = self._C
-        invCpMΣM = inv(C + MΣM)
-
-        return np.broadcast_to(
-            2 * np.einsum("...ij,...ji->...", MΣM, invCpMΣM)
-            - np.einsum("...ij,...jk,...kl,...li->...", MΣM, invCpMΣM, MΣM, invCpMΣM),
-            self.shape,
-        )
+        return dkl(self.posterior(D), self.prior(), n)
 
     @property
     def _M(self):
@@ -536,70 +445,24 @@ def update(self, D, inplace=False):
         if not inplace:
             return dist
 
-    def dkl(self, D, N=0):
+    def dkl(self, D, n=0):
         """KL divergence between the posterior and prior.
 
         Parameters
         ----------
         D : array_like, shape (..., d)
             Data to form the posterior
-        N : int, optional
+        n : int, optional
             Number of samples for a monte carlo estimate, defaults to 0
         """
-        if N == 0:
-            raise ValueError("MixtureModel requires a monte carlo estimate. Use N>0.")
+        if n == 0:
+            raise ValueError("MixtureModel requires a monte carlo estimate. Use n>0.")
 
         p = self.posterior(D)
         q = self.prior()
-        x = p.rvs(size=(N, *self.shape[:-1]), broadcast=True)
+        x = p.rvs(size=(n, *self.shape[:-1]), broadcast=True)
         return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).mean(axis=0)
 
-    def bmd(self, D, N=0):
-        """Bayesian model dimensionality.
-
-        Parameters
-        ----------
-        D : array_like, shape (..., d)
-            Data to form the posterior
-        N : int, optional
-            Number of samples for a monte carlo estimate, defaults to 0
-        """
-        if N == 0:
-            raise ValueError("MixtureModel requires a monte carlo estimate. Use N>0.")
-
-        p = self.posterior(D)
-        q = self.prior()
-        x = p.rvs(size=(N, *self.shape[:-1]), broadcast=True)
-        return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).var(axis=0)
-
-    def mutual_information(self, N=0, mcerror=False):
-        """Mutual information between the parameters and the data.
-
-        Parameters
-        ----------
-        N : int, optional
-            Number of samples for a monte carlo estimate, defaults to 0
-        mcerror: bool, optional
-            Produce a monte carlo error estimate
-        """
-        if N == 0:
-            raise ValueError("MixtureModel requires a monte carlo estimate. Use N>0.")
-        return super().mutual_information(N, mcerror)
-
-    def dimensionality(self, N=0, mcerror=False):
-        """Model dimensionality.
-
-        Parameters
-        ----------
-        N : int, optional
-            Number of samples for a monte carlo estimate, defaults to 0
-        mcerror: bool, optional
-            Produce a monte carlo error estimate
-        """
-        if N == 0:
-            raise ValueError("MixtureModel requires a monte carlo estimate. Use N>0.")
-        return super().dimensionality(N, mcerror)
-
 
 class ReducedLinearModel(object):
     """A model with no data.
diff --git a/lsbi/model_1.py b/lsbi/model_1.py
new file mode 100644
index 0000000..b127245
--- /dev/null
+++ b/lsbi/model_1.py
@@ -0,0 +1,825 @@
+"""Gaussian models for linear Bayesian inference."""
+
+import copy
+
+import numpy as np
+from numpy.linalg import inv, solve
+from scipy.special import logsumexp
+
+from lsbi.stats import dkl, mixture_normal, multivariate_normal
+from lsbi.utils import alias, dediagonalise, logdet
+
+
+class LinearModel(object):
+    """A multilinear model.
+
+    D|θ ~ N( m + M θ, C )
+    θ   ~ N( μ, Σ )
+
+    Defined by:
+        Parameters:       θ (..., n,)
+        Data:             D (..., d,)
+        Model:            M (..., d, n)
+        Prior mean:       μ (..., n,)
+        Prior covariance: Σ (..., n, n)
+        Data mean:        m (..., d,)
+        Data covariance:  C (..., d, d)
+
+    where the ellipses indicate arbitrary (broadcastable) additional copies.
+
+    Parameters
+    ----------
+    M : array_like, optional
+        if ndim>=2: model matrices
+        if ndim==1: model matrix with vector diagonal for all components
+        if ndim==0: scalar * rectangular identity matrix for all components
+        Defaults to rectangular identity matrix
+    m : array_like, optional
+        if ndim>=1: data means
+        if ndim==0: scalar * unit vector for all components
+        Defaults to 0 for all components
+    C : array_like, optional
+        if ndim>=2: data covariances
+        if ndim==1: data covariance with vector diagonal for all components
+        if ndim==0: scalar * identity matrix for all components
+        Defaults to rectangular identity matrix
+    μ : (or mu) array_like, optional
+        if ndim>=1: prior means
+        if ndim==0: scalar * unit vector for all components
+        Defaults to 0 for all components
+        Prior mean, defaults to zero vector
+    Σ : (or Sigma) array_like, optional
+        if ndim>=2: prior covariances
+        if ndim==1: prior covariance with vector diagonal for all components
+        if ndim==0: scalar * identity matrix for all components
+        Defaults to k copies of identity matrices
+    n : int, optional
+        Number of parameters, defaults to automatically inferred value
+    d : int, optional
+        Number of data dimensions, defaults to automatically inferred value
+    shape : (), optional
+        Number of mixture components, defaults to automatically inferred value
+    """
+
+    def __init__(self, *args, **kwargs):
+        self.M = kwargs.pop("M", 1)
+        self.diagonal_M = kwargs.pop("diagonal_M", False)
+        if len(np.shape(self.M)) < 2:
+            self.diagonal_M = True
+        self.m = kwargs.pop("m", 0)
+        self.C = kwargs.pop("C", 1)
+        self.diagonal_C = kwargs.pop("diagonal_C", False)
+        if len(np.shape(self.C)) < 2:
+            self.diagonal_C = True
+        self.μ = kwargs.pop("μ", 0)
+        self.μ = kwargs.pop("mu", self.μ)
+        self.Σ = kwargs.pop("Σ", 1)
+        self.Σ = kwargs.pop("Sigma", self.Σ)
+        self.diagonal_Σ = kwargs.pop("diagonal_Σ", False)
+        self.diagonal_Σ = kwargs.pop("diagonal_Sigma", self.diagonal_Σ)
+        if len(np.shape(self.Σ)) < 2:
+            self.diagonal_Σ = True
+        self._shape = kwargs.pop("shape", ())
+        self._n = kwargs.pop("n", 1)
+        self._d = kwargs.pop("d", 1)
+
+        if kwargs:
+            raise ValueError(f"Unrecognised arguments: {kwargs}")
+
+    @property
+    def shape(self):
+        """Shape of the distribution."""
+        return np.broadcast_shapes(
+            np.shape(self.M)[: -2 + self.diagonal_M],
+            np.shape(self.m)[:-1],
+            np.shape(self.C)[: -2 + self.diagonal_C],
+            np.shape(self.μ)[:-1],
+            np.shape(self.Σ)[: -2 + self.diagonal_Σ],
+            self._shape,
+        )
+
+    @property
+    def n(self):
+        """Dimension of the distribution."""
+        return np.max(
+            [
+                *np.shape(self.M)[len(np.shape(self.M)) - 1 + self.diagonal_M :],
+                *np.shape(self.Σ)[-2 + self.diagonal_Σ :],
+                *np.shape(self.μ)[-1:],
+                self._n,
+            ]
+        )
+
+    @property
+    def d(self):
+        """Dimensionality of data space len(D)."""
+        return np.max(
+            [
+                *np.shape(self.M)[-2 + self.diagonal_M : -1],
+                *np.shape(self.C)[-2 + self.diagonal_C :],
+                *np.shape(self.m)[-1:],
+                self._d,
+            ]
+        )
+
+    def model(self, θ):
+        """Model matrix M(θ) for a given parameter vector.
+
+        M(θ) = m + M θ
+
+        Parameters
+        ----------
+        θ : array_like, shape (..., n,)
+        """
+        return self.m + np.einsum("...ja,...a->...j", self._M, θ * np.ones(self.n))
+
+    def likelihood(self, θ):
+        """P(D|θ) as a distribution object.
+
+        D|θ ~ N( m + M θ, C )
+        θ   ~ N( μ, Σ )
+
+        Parameters
+        ----------
+        θ : array_like, shape (k, n)
+        """
+        μ = self.model(θ)
+        return multivariate_normal(μ, self.C, self.shape, self.d, self.diagonal_C)
+
+    def prior(self):
+        """P(θ) as a distribution object.
+
+        θ ~ N( μ, Σ )
+        """
+        return multivariate_normal(self.μ, self.Σ, self.shape, self.n, self.diagonal_Σ)
+
+    def posterior(self, D):
+        """P(θ|D) as a distribution object.
+
+        θ|D ~ N( μ + S M'C^{-1}(D - m - M μ), S )
+        S = (Σ^{-1} + M'C^{-1}M)^{-1}
+
+        Parameters
+        ----------
+        D : array_like, shape (d,)
+        """
+        values = D - self.model(self.μ)
+
+        diagonal_Σ = self.diagonal_C and self.diagonal_Σ and self.diagonal_M
+
+        if diagonal_Σ:
+            dim = min(self.n, self.d)
+            shape = np.broadcast_shapes(self.shape, values.shape[:-1])
+            C = np.atleast_1d(self.C)[..., :dim]
+            M = np.atleast_1d(self.M)[..., :dim]
+            Σ = self.Σ * np.ones((*shape, self.n))
+            Σ[..., :dim] = 1 / (1 / Σ[..., :dim] + M**2 / C)
+
+            μ = self.μ * np.ones((*shape, self.n))
+            μ[..., :dim] = μ[..., :dim] + Σ[..., :dim] * M / C * values[..., :dim]
+        else:
+            if self.diagonal_C:
+                invC = np.eye(self.d) / np.atleast_1d(self.C)[..., None, :]
+            else:
+                invC = inv(self.C)
+
+            if self.diagonal_Σ:
+                invΣ = np.eye(self.n) / np.atleast_1d(self.Σ)[..., None, :]
+            else:
+                invΣ = inv(self.Σ)
+
+            Σ = inv(
+                invΣ + np.einsum("...aj,...ab,...bk->...jk", self._M, invC, self._M)
+            )
+            μ = self.μ + np.einsum(
+                "...ja,...ba,...bc,...c->...j", Σ, self._M, invC, values
+            )
+
+        return multivariate_normal(μ, Σ, self.shape, self.n, diagonal_Σ)
+
+    def evidence(self):
+        """P(D) as a distribution object.
+
+        D ~ N( m + M μ, C + M Σ M' )
+        """
+        diagonal_Σ = self.diagonal_C and self.diagonal_Σ and self.diagonal_M
+        if diagonal_Σ:
+            dim = min(self.n, self.d)
+            M = np.atleast_1d(self.M)[..., :dim]
+            S = np.atleast_1d(self.Σ)[..., :dim]
+            Σ = self.C * np.ones(
+                (
+                    *self.shape,
+                    self.d,
+                )
+            )
+            Σ[..., :dim] = Σ[..., :dim] + S * M**2
+        else:
+            Σ = self._C + np.einsum(
+                "...ja,...ab,...kb->...jk", self._M, self._Σ, self._M
+            )
+        μ = self.model(self.μ)
+        return multivariate_normal(μ, Σ, self.shape, self.d, diagonal_Σ)
+
+    def joint(self):
+        """P(θ, D) as a distribution object.
+
+        [θ] ~ N( [   μ   ]   [ Σ      Σ M'   ] )
+        [D]    ( [m + M μ] , [M Σ  C + M Σ M'] )
+        """
+        evidence = self.evidence()
+        prior = self.prior()
+        b = np.broadcast_to(prior.mean, self.shape + (self.n,))
+        a = np.broadcast_to(evidence.mean, self.shape + (self.d,))
+        μ = np.block([b, a])
+        A = dediagonalise(prior.cov, prior.diagonal, self.n)
+        A = np.broadcast_to(A, self.shape + (self.n, self.n))
+        D = dediagonalise(evidence.cov, evidence.diagonal, self.d)
+        D = np.broadcast_to(D, self.shape + (self.d, self.d))
+        C = np.einsum("...ja,...al->...jl", self._M, self._Σ)
+        C = np.broadcast_to(C, self.shape + (self.d, self.n))
+        B = np.moveaxis(C, -1, -2)
+        Σ = np.block([[A, B], [C, D]])
+        return multivariate_normal(μ, Σ, self.shape, self.n + self.d)
+
+    def update(self, D, inplace=False):
+        """Bayesian update of the model with data.
+
+        Parameters
+        ----------
+        D : array_like, shape (..., d)
+        """
+        dist = copy.deepcopy(self) if not inplace else self
+        posterior = self.posterior(D)
+        dist.μ = posterior.mean
+        dist.Σ = posterior.cov
+        dist.diagonal_Σ = posterior.diagonal
+        if not inplace:
+            return dist
+
+    def ppd(self, D0):
+        """P(D|D0) as a distribution object."""
+        return self.update(D0).evidence()
+
+    def dkl(self, D, n=0):
+        """KL divergence between the posterior and prior.
+
+        Parameters
+        ----------
+        D : array_like, shape (..., d)
+            Data to form the posterior
+        n : int, optional
+            Number of samples for a monte carlo estimate, defaults to 0
+        """
+        return dkl(self.posterior(D), self.prior(), n)
+
+    @property
+    def _M(self):
+        return dediagonalise(self.M, self.diagonal_M, self.d, self.n)
+
+    @property
+    def _C(self):
+        return dediagonalise(self.C, self.diagonal_C, self.d)
+
+    @property
+    def _Σ(self):
+        return dediagonalise(self.Σ, self.diagonal_Σ, self.n)
+
+    def reduce(self, D):
+        """Reduce the model to a reduced model.
+
+        Σ_L = (M' C^{-1} M)^{-1}
+        μ_L = Σ_L M' C^{-1} (D-m)
+        logLmax = - log|2 π C|/2 - (D-m)'C^{-1}(D-m)/2
+
+
+        Parameters
+        ----------
+        D : array_like, shape (..., d)
+            Data to form the reduced model
+
+        Returns
+        -------
+        ReducedLinearModel
+        """
+        # TODO modify this to work with diagonal matrices
+        diagonal_Σ_L = self.diagonal_C and self.diagonal_M
+        if diagonal_Σ_L:
+            dim = min(self.n, self.d)
+            M = np.atleast_1d(self.M)[..., :dim]
+            C = np.atleast_1d(self.C)[..., :dim]
+            Σ_L = np.inf * np.ones((*shape, self.n))
+            Σ_L[..., :dim] = C / M**2
+            μ_L = np.zeros((*shape, self.n))
+            μ_L = Σ_L[...:dim] * M / C * (D - self.m)
+            logLmax = (
+                -logdet(2 * np.pi * C, self.diagonal_C) / 2
+                - np.einsum("...a,...ab,...b->...", D - self.m, inv(C), D - self.m) / 2
+            )
+        else:
+            if self.diagonal_C:
+                invC = np.eye(self.d) / np.atleast_1d(self.C)[..., None, :]
+            else:
+                invC = inv(self.C)
+            Σ_L = inv(np.einsum("...ja,...ab,...kb->...jk", self._M, invC, self._M))
+            μ_L = np.einsum(
+                "...ja,...ab,...bc,...c->...j", Σ_L, self._M, invC, D - self.m
+            )
+            logLmax = (
+                -logdet(2 * np.pi * self.C, self.diagonal_C) / 2
+                - np.einsum("...a,...ab,...b->...", D - self.m, invC, D - self.m) / 2
+            )
+        return ReducedLinearModel(
+            μ=self.μ,
+            Σ=self.Σ,
+            diagonal_Σ=self.diagonal_Σ,
+            logLmax=logLmax,
+            μ_L=μ_L,
+            Σ_L=Σ_L,
+            n=self.n,
+            shape=self.shape,
+        )
+
+
+alias(LinearModel, "μ", "mu")
+alias(LinearModel, "Σ", "Sigma")
+alias(LinearModel, "diagonal_Σ", "diagonal_Sigma")
+
+
+class MixtureModel(LinearModel):
+    """A linear mixture model.
+
+    D|θ, w ~ N( m + M θ, C )
+    θ|w    ~ N( μ, Σ )
+    w      ~ categorical( exp(logw) )
+
+    Defined by:
+        Parameters:          θ     (..., n,)
+        Data:                D     (..., d,)
+        Prior means:         μ     (..., k, n)
+        Prior covariances:   Σ     (..., k, n, n)
+        Data means:          m     (..., k, d)
+        Data covariances:    C     (..., k, d, d)
+        log mixture weights: logw  (..., k,)
+
+    Parameters
+    ----------
+    M : array_like, optional
+        if ndim>=2: model matrices
+        if ndim==1: model matrix with vector diagonal for all components
+        if scalar: scalar * rectangular identity matrix for all components
+        Defaults to k copies of rectangular identity matrices
+    m : array_like, optional
+        if ndim>=1: data means
+        if scalar: scalar * unit vector for all components
+        Defaults to 0 for all components
+    C : array_like, optional
+        if ndim>=2: data covariances
+        if ndim==1: data covariance with vector diagonal for all components
+        if scalar: scalar * identity matrix for all components
+        Defaults to k copies of identity matrices
+    μ : array_like, optional
+        if ndim>=1: prior means
+        if scalar: scalar * unit vector for all components
+        Defaults to 0 for all components
+        Prior mean, defaults to zero vector
+    Σ : array_like, optional
+        if ndim>=2: prior covariances
+        if ndim==1: prior covariance with vector diagonal for all components
+        if scalar: scalar * identity matrix for all components
+        Defaults to k copies of identity matrices
+    logw : array_like, optional
+        if ndim>=1: log mixture weights
+        if scalar: scalar * unit vector
+        Defaults to uniform weights
+    n : int, optional
+        Number of parameters, defaults to automatically inferred value
+    d : int, optional
+        Number of data dimensions, defaults to automatically inferred value
+    """
+
+    def __init__(self, *args, **kwargs):
+        self.logw = kwargs.pop("logw", 0)
+        super().__init__(*args, **kwargs)
+
+    @property
+    def shape(self):
+        """Shape of the distribution."""
+        return np.broadcast_shapes(np.shape(self.logw), super().shape)
+
+    @property
+    def k(self):
+        """Number of mixture components."""
+        if self.shape == ():
+            return 1
+        return self.shape[-1]
+
+    def likelihood(self, θ):
+        """P(D|θ) as a distribution object.
+
+        D|θ,w ~ N( m + M θ, C )
+        w|θ   ~ categorical(...)
+
+        Parameters
+        ----------
+        θ : array_like, shape (n,)
+        """
+        dist = super().likelihood(np.expand_dims(θ, -2))
+        dist.__class__ = mixture_normal
+        prior = self.prior()
+        dist.logw = prior.logpdf(θ, broadcast=True, joint=True)
+        dist.logw = dist.logw - logsumexp(dist.logw, axis=-1, keepdims=True)
+        return dist
+
+    def prior(self):
+        """P(θ) as a distribution object.
+
+        θ|w ~ N( μ, Σ )
+        w   ~ categorical(exp(logw))
+        """
+        dist = super().prior()
+        dist.__class__ = mixture_normal
+        dist.logw = self.logw
+        return dist
+
+    def posterior(self, D):
+        """P(θ|D) as a distribution object.
+
+        θ|D, w ~ N( μ + S M'C^{-1}(D - m - M μ), S )
+        w|D    ~ P(D|w)P(w)/P(D)
+        S = (Σ^{-1} + M'C^{-1}M)^{-1}
+
+        Parameters
+        ----------
+        D : array_like, shape (d,)
+        """
+        dist = super().posterior(np.expand_dims(D, -2))
+        dist.__class__ = mixture_normal
+        evidence = self.evidence()
+        dist.logw = evidence.logpdf(D, broadcast=True, joint=True)
+        dist.logw = dist.logw - logsumexp(dist.logw, axis=-1, keepdims=True)
+        return dist
+
+    def evidence(self):
+        """P(D) as a distribution object.
+
+        D|w ~ N( m + M μ, C + M Σ M' )
+        w   ~ categorical(exp(logw))
+        """
+        dist = super().evidence()
+        dist.__class__ = mixture_normal
+        dist.logw = self.logw
+        return dist
+
+    def joint(self):
+        """P(D, θ) as a distribution object.
+
+        [θ] | w ~ N( [   μ   ]   [ Σ      Σ M'   ] )
+        [D] |      ( [m + M μ] , [M Σ  C + M Σ M'] )
+
+        w           ~ categorical(exp(logw))
+        """
+        dist = super().joint()
+        dist.__class__ = mixture_normal
+        dist.logw = self.logw
+        return dist
+
+    def update(self, D, inplace=False):
+        """Bayesian update of the model with data.
+
+        Parameters
+        ----------
+        D : array_like, shape (..., d)
+        """
+        dist = copy.deepcopy(self) if not inplace else self
+        posterior = self.posterior(D)
+        dist.μ = posterior.mean
+        dist.Σ = posterior.cov
+        dist.diagonal_Σ = posterior.diagonal
+        dist.logw = posterior.logw
+        if not inplace:
+            return dist
+
+    def dkl(self, D, n=0):
+        """KL divergence between the posterior and prior.
+
+        Parameters
+        ----------
+        D : array_like, shape (..., d)
+            Data to form the posterior
+        n : int, optional
+            Number of samples for a monte carlo estimate, defaults to 0
+        """
+        if n == 0:
+            raise ValueError("MixtureModel requires a monte carlo estimate. Use n>0.")
+
+        p = self.posterior(D)
+        q = self.prior()
+        x = p.rvs(size=(n, *self.shape[:-1]), broadcast=True)
+        return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).mean(axis=0)
+
+
+class ReducedLinearModel(object):
+    """A model with no data.
+
+    If a Likelihood is Gaussian in the parameters, it is sometimes more
+    clear/efficient to phrase it in terms of a parameter covariance, parameter
+    mean and peak value:
+
+    log L(D|θ) = logLmax - (θ - μ_L)' Σ_L^{-1} (θ - μ_L)/2
+    log π(θ) = - log|2 π Σ|/2 - (θ - μ)' Σ^{-1} (θ - μ)/2
+
+    log P(θ|D) = - log|2 π Σ_P|/2 - (θ - μ_P)' Σ_P^{-1} (θ - μ_P)/2
+    log Z(D) = logLmax + log π(μ_L) - log P(μ_L)
+
+    Σ_P^{-1} = Σ^{-1} + Σ_L^{-1}
+    Σ_P^{-1} μ_P = Σ^{-1} μ + Σ_L^{-1} μ_L
+
+    We can link this to a data-based model with the relations:
+
+    Sigma_L = (M' C^{-1} M)^{-1}
+    mu_L = Sigma_L M' C^{-1} (D-m)
+    logLmax =
+    - log|2 pi C|/2 - (D-m)'C^{-1}(C - M (M' C^{-1} M)^{-1} M' )C^{-1}(D-m)/2
+
+    See the LinearModel.reduce method for an implementation of this
+
+    Parameters
+    ----------
+    mu_L : array_like
+        Likelihood peak
+    Sigma_L : array_like
+        Likelihood covariance
+    logLmax : float, optional
+        Likelihood maximum, defaults to zero
+    mmu_pi : array_like, optional
+        Prior mean, defaults to zero vector
+    Sigma_pi : array_like, optional
+        Prior covariance, defaults to identity matrix
+    """
+
+    def __init__(self, *args, **kwargs):
+        self.mu_L = np.atleast_1d(kwargs.pop("mu_L"))
+        self.Sigma_L = np.atleast_2d(kwargs.pop("Sigma_L", None))
+        self.logLmax = kwargs.pop("logLmax", 0)
+        self.mu_pi = np.atleast_1d(kwargs.pop("mu_pi", np.zeros_like(self.mu_L)))
+        self.Sigma_pi = np.atleast_2d(kwargs.pop("Sigma_pi", np.eye(len(self.mu_pi))))
+        self.Sigma_P = inv(inv(self.Sigma_pi) + inv(self.Sigma_L))
+        self.mu_P = self.Sigma_P @ (
+            solve(self.Sigma_pi, self.mu_pi) + solve(self.Sigma_L, self.mu_L)
+        )
+
+    def prior(self):
+        """P(θ) as a distribution object."""
+        return multivariate_normal(self.μ, self.Σ, self.shape, self.n, self.diagonal_Σ)
+
+    def posterior(self):
+        """P(θ|D) as a distribution object."""
+        if diagonal:
+            Σ_P = 1 / (1 / self.Σ + 1 / self.Σ_L)
+        elif self.diagonal_Σ_L:
+            Σ_P = inv(inv(self.Σ) + np.eye(self.n) / self.Σ_L[..., None, :])
+        elif self.diagonal_Σ:
+            Σ_P = inv(np.eye(self.n) / self.Σ[..., None, :] + inv(self.Σ_L))
+        else:
+            Σ_P = inv(inv(self.Σ) + inv(self.Σ_L))
+
+        if self.diagonal_Σ_L:
+            x_L = self.μ_L / self.Σ_L
+        else:
+            x_L = np.einsum(
+                "...ij,...j->...i", inv(self.Σ_L), np.ones(self.n) * self.μ_L
+            )
+
+        if self.diagonal_Σ:
+            x = μ / self.Σ
+        else:
+            x = np.einsum("...ij,...j->...i", inv(self.Σ), np.ones(self.n) * self.μ)
+
+        μ_P = np.einsum("...ij,...j->...i", Σ_P, x + x_L)
+        return multivariate_normal(μ_P, Σ_P, self.shape, self.n, diagonal)
+
+    def logπ(self, θ):
+        """P(θ) as a scalar."""
+        return self.prior().logpdf(θ)
+
+    def logP(self, θ):
+        """P(θ|D) as a scalar."""
+        return self.posterior().logpdf(θ)
+
+    def logL(self, θ):
+        """P(D|θ) as a scalar."""
+        return (
+            self.logLmax
+            + multivariate_normal(self.mu_L, self.Sigma_L).logpdf(θ)
+            + logdet(2 * np.pi * self.Sigma_L) / 2
+        )
+
+    def logZ(self):
+        """P(D) as a scalar."""
+        return (
+            self.logLmax
+            + logdet(self.Sigma_P) / 2
+            - logdet(self.Sigma_pi) / 2
+            - (self.mu_P - self.mu_pi)
+            @ solve(self.Sigma_pi, self.mu_P - self.mu_pi)
+            / 2
+            - (self.mu_P - self.mu_L) @ solve(self.Sigma_L, self.mu_P - self.mu_L) / 2
+        )
+
+    def DKL(self):
+        """D_KL(P(θ|D)||P(θ)) the Kullback-Leibler divergence."""
+        return (
+            logdet(self.Sigma_pi)
+            - logdet(self.Sigma_P)
+            + np.trace(inv(self.Sigma_pi) @ self.Sigma_P - 1)
+            + (self.mu_P - self.mu_pi) @ solve(self.Sigma_pi, self.mu_P - self.mu_pi)
+        ) / 2
+
+
+class ReducedLinearModelUniformPrior(object):
+    """A model with no data.
+
+    Gaussian likelihood in the parameters
+
+    logL(θ) = logLmax - (θ - mu_L)' Sigma_L^{-1} (θ - mu_L)
+
+    Uniform prior
+
+    We can link this to a data-based model with the relations:
+
+    Sigma_L = (M' C^{-1} M)^{-1}
+    mu_L = Sigma_L M' C^{-1} (D-m)
+    logLmax =
+    -log|2 pi C|/2 - (D-m)'C^{-1}(C - M (M' C^{-1} M)^{-1} M' )C^{-1}(D-m)/2
+
+    Parameters
+    ----------
+    mu_L : array_like
+        Likelihood peak
+    Sigma_L : array_like
+        Likelihood covariance
+    logLmax : float, optional
+        Likelihood maximum, defaults to zero
+    logV : float, optional
+        log prior volume, defaults to zero
+    """
+
+    def __init__(self, *args, **kwargs):
+        self.mu_L = np.atleast_1d(kwargs.pop("mu_L"))
+        self.Sigma_L = np.atleast_2d(kwargs.pop("Sigma_L"))
+        self.logLmax = kwargs.pop("logLmax", 0)
+        self.logV = kwargs.pop("logV", 0)
+        self.Sigma_P = self.Sigma_L
+        self.mu_P = self.mu_L
+
+    def posterior(self):
+        """P(θ|D) as a distribution object."""
+        return multivariate_normal(
+            self.μ_L, self.Σ_L, self.shape, self.n, self.diagonal
+        )
+
+    def logpi(self, θ):
+        """P(θ) as a scalar."""
+        return -self.logV
+
+    def logP(self, θ):
+        """P(θ|D) as a scalar."""
+        return self.posterior().logpdf(θ)
+
+    def logL(self, θ):
+        """P(D|θ) as a scalar."""
+        dist = multivariate_normal(
+            self.μ_L, self.Σ_L, self.shape, self.n, self.diagonal
+        )
+        return self.logLmax + dist.logpdf(θ) - dist.logpdf(dist.μ_L)
+
+    def logZ(self):
+        """P(D) as a scalar."""
+        return self.logLmax + logdet(2 * np.pi * self.Sigma_P) / 2 - self.logV
+
+    def DKL(self):
+        """D_KL(P(θ|D)||P(θ)) the Kullback-Leibler divergence."""
+        dkl = np.ones(self.shape) * self.logV
+        dkl -= logdet(2 * np.pi * np.e * self.Σ_L, self.diagonal) / 2
+        return dkl
+
+
+class ReducedMixtureModel(ReducedLinearModel):
+    """A model with no data.
+
+    Gaussian likelihood in the parameters
+
+    logL(θ) = logLmax - (θ - μ_L)' Σ_L^{-1} (θ - μ_L)
+
+    We can link this to a data-based model with the relations:
+
+    Σ_L = (M' C^{-1} M)^{-1}
+    μ_L = Σ_L M' C^{-1} (D-m)
+    logLmax =
+    - log|2 π C|/2 - (D-m)'C^{-1}(C - M (M' C^{-1} M)^{-1} M' )C^{-1}(D-m)/2
+
+    Parameters
+    ----------
+    μ_L : array_like
+        Likelihood peak
+    Σ_L : array_like
+        Likelihood covariance
+    logLmax : float, optional
+        Likelihood maximum, defaults to zero
+    logw : array_like, optional
+        log mixture weights
+        if ndim>=1: log mixture weights
+        if scalar: scalar * unit vector
+        Defaults to uniform weights
+    logw_L: array_like, optional
+        log mixture weights for the likelihood
+        if ndim>=1: log mixture weights
+        if scalar: scalar * unit vector
+        Defaults to uniform weights
+    n : int, optional
+        Number of parameters, defaults to automatically inferred value
+    """
+
+    def __init__(self, *args, **kwargs):
+        self.logw = kwargs.pop("logw", 0)
+        self.logw_L = kwargs.pop("logw_L", 0)
+        super().__init__(*args, **kwargs)
+
+    @property
+    def shape(self):
+        """Shape of the distribution."""
+        return np.broadcast_shapes(
+            np.shape(self.logw), np.shape(self.logw_L), super().shape
+        )
+
+    @property
+    def k(self):
+        """Number of mixture components of the distribution."""
+        return self.shape[-1]
+
+    def prior(self):
+        """P(θ) as a scipy distribution object."""
+        dist = super().prior()
+        dist.__class__ = mixture_normal
+        dist.logw = self.logw
+        return dist
+
+    def posterior(self):
+        """P(θ|D) as a scipy distribution object."""
+        dist = super().posterior()
+        dist.__class__ = mixture_normal
+        dist.logw = dist.logw + self.logw_L
+        return dist
+
+    def logL(self, θ):
+        """P(D|θ) as a scalar."""
+        dist = super().likelihood(θ)
+        dist.__class__ = mixture
+        dist.logw = self.logw
+        pass
+
+
+class ReducedMixtureModelUniformPrior(ReducedLinearModelUniformPrior):
+    """Fill in docstring."""
+
+    def __init__(self, *args, **kwargs):
+        self.logw_L = kwargs.pop("logw_L", 0)
+        super().__init__(*args, **kwargs)
+
+    @property
+    def shape(self):
+        """Shape of the distribution."""
+        return np.broadcast_shapes(np.shape(self.logw_L), super().shape)
+
+    @property
+    def k(self):
+        """Number of mixture components of the distribution."""
+        return self.shape[-1]
+
+    def posterior(self):
+        """P(θ|D) as a scipy distribution object."""
+        dist = super().posterior()
+        dist.__class__ = mixture_normal
+        dist.logw = self.logw_L
+        return dist
+
+    def logπ(self, θ):
+        """P(θ) as a scalar."""
+        return -self.logV
+
+    def logP(self, θ):
+        """P(θ|D) as a scalar."""
+        return self.posterior().logpdf(θ)
+
+    def logL(self, θ):
+        """P(D|θ) as a scalar."""
+        pass
+
+    def logZ(self):
+        """To be implemented."""
+        pass
+
+    def dkl(self):
+        """To be implemented."""
+        pass
diff --git a/lsbi/stats.py b/lsbi/stats.py
index e6fc8a2..562db6c 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -585,22 +585,15 @@ def __getitem__(self, arg):  # noqa: D105
         return dist
 
 
-def dkl(p, q, N=0, mcerror=False):
-    r"""Kullback-Leibler divergence between two distributions.
-
-    if P ~ N(p,P) and Q ~ N(q,Q) then
-
-    D_KL(P\||Q) = <log(P/Q)>_P =
-    1/2 * (log(\|Q|/\|P|) - d + tr(Q^{-1} P) + (q - p)' Q^{-1} (q - p))
+def dkl(p, q, n=0):
+    """Kullback-Leibler divergence between two distributions.
 
     Parameters
     ----------
     p : lsbi.stats.multivariate_normal
     q : lsbi.stats.multivariate_normal
-    N : int, optional, default=0
+    n : int, optional, default=0
         Number of samples to mcmc estimate the divergence.
-    mcerror: bool, optional, default=False
-        Produce a Monte Carlo error estimate
 
     Returns
     -------
@@ -608,15 +601,9 @@ def dkl(p, q, N=0, mcerror=False):
         Kullback-Leibler divergence between p and q.
     """
     shape = np.broadcast_shapes(p.shape, q.shape)
-    if N:
-        x = p.rvs(size=(N, *shape), broadcast=True)
-        logR = p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)
-        ans = logR.mean(axis=0)
-        if mcerror:
-            err = (logR.var(axis=0) / (N - 1)) ** 0.5
-            ans = (ans, err)
-        return ans
-
+    if n:
+        x = p.rvs(size=(n, *shape), broadcast=True)
+        return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).mean(axis=0)
     dkl = -p.dim * np.ones(shape)
     dkl = dkl + logdet(q.cov * np.ones(q.dim), q.diagonal)
     dkl = dkl - logdet(p.cov * np.ones(p.dim), p.diagonal)
@@ -636,73 +623,3 @@ def dkl(p, q, N=0, mcerror=False):
             dkl = dkl + np.einsum("...ij,...ji->...", invq, p.cov)
 
     return dkl / 2
-
-
-def bmd(p, q, N=0, mcerror=False):
-    """Bayesian model dimensionality between two distributions.
-
-    if P ~ N(p,P) and Q ~ N(q,Q) then
-
-    bmd/2 = var(log P/Q)_P
-    = 1/2 tr(Q^{-1} P Q^{-1} P) - 1/2 (tr(Q^{-1} P))^2
-    + (q - p)' Q^{-1} P Q^{-1} (q - p) + d/2
-
-    Parameters
-    ----------
-    p : lsbi.stats.multivariate_normal
-    q : lsbi.stats.multivariate_normal
-    N : int, optional, default=0
-        Number of samples to mcmc estimate the divergence.
-    mcerror: bool, optional, default=False
-        Produce a Monte Carlo error estimate
-
-    Returns
-    -------
-    bmd : array_like
-        Bayesian model dimensionality between p and q.
-    """
-    shape = np.broadcast_shapes(p.shape, q.shape)
-    if N:
-        x = p.rvs(size=(N, *shape), broadcast=True)
-        logR = p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)
-        ans = logR.var(axis=0) * 2
-        if mcerror:
-            err = ans * (2 / N - 1) ** 0.5
-            ans = (ans, err)
-        return ans
-
-    bmd = p.dim / 2 * np.ones(shape)
-    pq = (p.mean - q.mean) * np.ones(p.dim)
-    if p.diagonal and q.diagonal:
-        pcov = p.cov * np.ones(p.dim)
-        qcov = q.cov * np.ones(q.dim)
-        bmd = bmd + (pq**2 * pcov / qcov**2).sum(axis=-1)
-        bmd = bmd + (pcov**2 / qcov**2).sum(axis=-1) / 2
-        bmd = bmd - (pcov / qcov).sum(axis=-1)
-    elif p.diagonal:
-        invq = inv(q.cov)
-        pcov = p.cov * np.ones(p.dim)
-        bmd = bmd + np.einsum(
-            "...i,...ij,...j,...jl,...l->...", pq, invq, pcov, invq, pq
-        )
-        bmd = bmd - np.einsum("...jj,...j->...", invq, pcov)
-        bmd = bmd + np.einsum("...lj,...j,...jl,...l->...", invq, pcov, invq, pcov) / 2
-    elif q.diagonal:
-        invq = np.ones(q.dim) / q.cov
-        pcov = p.cov * np.ones(p.dim)
-        bmd = bmd + np.einsum(
-            "...i,...i,...ik,...k,...k->...", pq, invq, pcov, invq, pq
-        )
-        bmd = bmd - np.einsum("...j,...jj->...", invq, pcov)
-        bmd = bmd + np.einsum("...j,...jk,...k,...kj->...", invq, pcov, invq, pcov) / 2
-    else:
-        invq = inv(q.cov)
-        bmd = bmd + np.einsum(
-            "...i,...ij,...jk,...kl,...l->...", pq, invq, p.cov, invq, pq
-        )
-        bmd = bmd - np.einsum("...ij,...ji->...", invq, p.cov)
-        bmd = (
-            bmd
-            + np.einsum("...ij,...jk,...kl,...li->...", invq, p.cov, invq, p.cov) / 2
-        )
-    return bmd * 2
diff --git a/tests/test_model.py b/tests/test_model.py
index 1615933..8d85a72 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -265,27 +265,6 @@ def test_posterior(
         assert dkl.shape == model.shape
         assert (dkl >= 0).all()
 
-        bmd = model.bmd(D)
-        assert bmd.shape == model.shape
-        assert (bmd >= 0).all()
-
-        mutual_information = model.mutual_information()
-        assert mutual_information.shape == model.shape
-        mutual_information.shape
-        assert (mutual_information >= 0).all()
-
-        mutual_information_mc, err = model.mutual_information(N, True)
-        assert mutual_information_mc.shape == model.shape
-        assert_allclose((mutual_information - mutual_information_mc) / err, 0, atol=10)
-
-        dimensionality = model.dimensionality()
-        assert dimensionality.shape == model.shape
-        assert (dimensionality >= 0).all()
-
-        dimensionality_mc, err = model.dimensionality(N, True)
-        assert dimensionality_mc.shape == model.shape
-        assert_allclose((dimensionality - dimensionality_mc) / err, 0, atol=10)
-
     def test_evidence(
         self,
         M_shape,
@@ -641,24 +620,6 @@ def test_posterior(
         dkl = model.dkl(D, 10)
         assert dkl.shape == model.shape[:-1]
 
-        with pytest.raises(ValueError):
-            model.bmd(D)
-
-        bmd = model.bmd(D, 10)
-        assert bmd.shape == model.shape[:-1]
-
-        with pytest.raises(ValueError):
-            model.mutual_information()
-
-        mutual_information = model.mutual_information(10)
-        assert mutual_information.shape == model.shape[:-1]
-
-        with pytest.raises(ValueError):
-            model.dimensionality()
-
-        dimensionality = model.dimensionality(10)
-        assert dimensionality.shape == model.shape[:-1]
-
     def test_evidence(
         self,
         logw_shape,
diff --git a/tests/test_stats.py b/tests/test_stats.py
index d054fab..6f957a4 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -5,7 +5,7 @@
 from scipy.special import logsumexp
 from scipy.stats import multivariate_normal as scipy_multivariate_normal
 
-from lsbi.stats import bmd, dkl, mixture_normal, multivariate_normal
+from lsbi.stats import dkl, mixture_normal, multivariate_normal
 
 shapes = [(2, 3), (3,), ()]
 sizes = [(6, 5), (5,), ()]
@@ -563,7 +563,6 @@ def test_dkl(
     cov_shape_q,
     diagonal_q,
 ):
-    dim = dim_p
     p = TestMultivariateNormal().random(
         dim, shape_p, mean_shape_p, cov_shape_p, diagonal_p
     )
@@ -579,42 +578,7 @@ def test_dkl(
     assert (dkl_pq >= 0).all()
     assert dkl_pq.shape == np.broadcast_shapes(p.shape, q.shape)
 
-    dkl_mc, err = dkl(p, q, 1000, True)
+    dkl_mc = dkl(p, q, 1000)
     assert dkl_mc.shape == np.broadcast_shapes(p.shape, q.shape)
 
-    assert_allclose((dkl_pq - dkl_mc) / err, 0, atol=10)
-
-
-@pytest.mark.parametrize("dim_p, shape_p, mean_shape_p, cov_shape_p, diagonal_p", tests)
-@pytest.mark.parametrize("dim_q, shape_q, mean_shape_q, cov_shape_q, diagonal_q", tests)
-def test_bmd(
-    dim_p,
-    shape_p,
-    mean_shape_p,
-    cov_shape_p,
-    diagonal_p,
-    dim_q,
-    shape_q,
-    mean_shape_q,
-    cov_shape_q,
-    diagonal_q,
-):
-    dim = dim_p
-    p = TestMultivariateNormal().random(
-        dim, shape_p, mean_shape_p, cov_shape_p, diagonal_p
-    )
-    q = TestMultivariateNormal().random(
-        dim, shape_q, mean_shape_q, cov_shape_q, diagonal_q
-    )
-
-    bmd_pq = bmd(p, q)
-
-    assert_allclose(bmd(p, p), 0, atol=1e-10)
-    assert_allclose(bmd(q, q), 0, atol=1e-10)
-
-    assert (bmd_pq >= 0).all()
-    assert bmd_pq.shape == np.broadcast_shapes(p.shape, q.shape)
-
-    bmd_mc, err = bmd(p, q, 1000, True)
-    assert bmd_mc.shape == np.broadcast_shapes(p.shape, q.shape)
-    assert_allclose((bmd_pq - bmd_mc) / err, 0, atol=10)
+    assert_allclose(dkl_pq, dkl_mc, atol=1)

From 1293df3cc373777dc49872a3820588c97b3e69eb Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Thu, 25 Jul 2024 14:33:13 +0100
Subject: [PATCH 116/117] removed temporary file

---
 lsbi/model_1.py | 825 ------------------------------------------------
 1 file changed, 825 deletions(-)
 delete mode 100644 lsbi/model_1.py

diff --git a/lsbi/model_1.py b/lsbi/model_1.py
deleted file mode 100644
index b127245..0000000
--- a/lsbi/model_1.py
+++ /dev/null
@@ -1,825 +0,0 @@
-"""Gaussian models for linear Bayesian inference."""
-
-import copy
-
-import numpy as np
-from numpy.linalg import inv, solve
-from scipy.special import logsumexp
-
-from lsbi.stats import dkl, mixture_normal, multivariate_normal
-from lsbi.utils import alias, dediagonalise, logdet
-
-
-class LinearModel(object):
-    """A multilinear model.
-
-    D|θ ~ N( m + M θ, C )
-    θ   ~ N( μ, Σ )
-
-    Defined by:
-        Parameters:       θ (..., n,)
-        Data:             D (..., d,)
-        Model:            M (..., d, n)
-        Prior mean:       μ (..., n,)
-        Prior covariance: Σ (..., n, n)
-        Data mean:        m (..., d,)
-        Data covariance:  C (..., d, d)
-
-    where the ellipses indicate arbitrary (broadcastable) additional copies.
-
-    Parameters
-    ----------
-    M : array_like, optional
-        if ndim>=2: model matrices
-        if ndim==1: model matrix with vector diagonal for all components
-        if ndim==0: scalar * rectangular identity matrix for all components
-        Defaults to rectangular identity matrix
-    m : array_like, optional
-        if ndim>=1: data means
-        if ndim==0: scalar * unit vector for all components
-        Defaults to 0 for all components
-    C : array_like, optional
-        if ndim>=2: data covariances
-        if ndim==1: data covariance with vector diagonal for all components
-        if ndim==0: scalar * identity matrix for all components
-        Defaults to rectangular identity matrix
-    μ : (or mu) array_like, optional
-        if ndim>=1: prior means
-        if ndim==0: scalar * unit vector for all components
-        Defaults to 0 for all components
-        Prior mean, defaults to zero vector
-    Σ : (or Sigma) array_like, optional
-        if ndim>=2: prior covariances
-        if ndim==1: prior covariance with vector diagonal for all components
-        if ndim==0: scalar * identity matrix for all components
-        Defaults to k copies of identity matrices
-    n : int, optional
-        Number of parameters, defaults to automatically inferred value
-    d : int, optional
-        Number of data dimensions, defaults to automatically inferred value
-    shape : (), optional
-        Number of mixture components, defaults to automatically inferred value
-    """
-
-    def __init__(self, *args, **kwargs):
-        self.M = kwargs.pop("M", 1)
-        self.diagonal_M = kwargs.pop("diagonal_M", False)
-        if len(np.shape(self.M)) < 2:
-            self.diagonal_M = True
-        self.m = kwargs.pop("m", 0)
-        self.C = kwargs.pop("C", 1)
-        self.diagonal_C = kwargs.pop("diagonal_C", False)
-        if len(np.shape(self.C)) < 2:
-            self.diagonal_C = True
-        self.μ = kwargs.pop("μ", 0)
-        self.μ = kwargs.pop("mu", self.μ)
-        self.Σ = kwargs.pop("Σ", 1)
-        self.Σ = kwargs.pop("Sigma", self.Σ)
-        self.diagonal_Σ = kwargs.pop("diagonal_Σ", False)
-        self.diagonal_Σ = kwargs.pop("diagonal_Sigma", self.diagonal_Σ)
-        if len(np.shape(self.Σ)) < 2:
-            self.diagonal_Σ = True
-        self._shape = kwargs.pop("shape", ())
-        self._n = kwargs.pop("n", 1)
-        self._d = kwargs.pop("d", 1)
-
-        if kwargs:
-            raise ValueError(f"Unrecognised arguments: {kwargs}")
-
-    @property
-    def shape(self):
-        """Shape of the distribution."""
-        return np.broadcast_shapes(
-            np.shape(self.M)[: -2 + self.diagonal_M],
-            np.shape(self.m)[:-1],
-            np.shape(self.C)[: -2 + self.diagonal_C],
-            np.shape(self.μ)[:-1],
-            np.shape(self.Σ)[: -2 + self.diagonal_Σ],
-            self._shape,
-        )
-
-    @property
-    def n(self):
-        """Dimension of the distribution."""
-        return np.max(
-            [
-                *np.shape(self.M)[len(np.shape(self.M)) - 1 + self.diagonal_M :],
-                *np.shape(self.Σ)[-2 + self.diagonal_Σ :],
-                *np.shape(self.μ)[-1:],
-                self._n,
-            ]
-        )
-
-    @property
-    def d(self):
-        """Dimensionality of data space len(D)."""
-        return np.max(
-            [
-                *np.shape(self.M)[-2 + self.diagonal_M : -1],
-                *np.shape(self.C)[-2 + self.diagonal_C :],
-                *np.shape(self.m)[-1:],
-                self._d,
-            ]
-        )
-
-    def model(self, θ):
-        """Model matrix M(θ) for a given parameter vector.
-
-        M(θ) = m + M θ
-
-        Parameters
-        ----------
-        θ : array_like, shape (..., n,)
-        """
-        return self.m + np.einsum("...ja,...a->...j", self._M, θ * np.ones(self.n))
-
-    def likelihood(self, θ):
-        """P(D|θ) as a distribution object.
-
-        D|θ ~ N( m + M θ, C )
-        θ   ~ N( μ, Σ )
-
-        Parameters
-        ----------
-        θ : array_like, shape (k, n)
-        """
-        μ = self.model(θ)
-        return multivariate_normal(μ, self.C, self.shape, self.d, self.diagonal_C)
-
-    def prior(self):
-        """P(θ) as a distribution object.
-
-        θ ~ N( μ, Σ )
-        """
-        return multivariate_normal(self.μ, self.Σ, self.shape, self.n, self.diagonal_Σ)
-
-    def posterior(self, D):
-        """P(θ|D) as a distribution object.
-
-        θ|D ~ N( μ + S M'C^{-1}(D - m - M μ), S )
-        S = (Σ^{-1} + M'C^{-1}M)^{-1}
-
-        Parameters
-        ----------
-        D : array_like, shape (d,)
-        """
-        values = D - self.model(self.μ)
-
-        diagonal_Σ = self.diagonal_C and self.diagonal_Σ and self.diagonal_M
-
-        if diagonal_Σ:
-            dim = min(self.n, self.d)
-            shape = np.broadcast_shapes(self.shape, values.shape[:-1])
-            C = np.atleast_1d(self.C)[..., :dim]
-            M = np.atleast_1d(self.M)[..., :dim]
-            Σ = self.Σ * np.ones((*shape, self.n))
-            Σ[..., :dim] = 1 / (1 / Σ[..., :dim] + M**2 / C)
-
-            μ = self.μ * np.ones((*shape, self.n))
-            μ[..., :dim] = μ[..., :dim] + Σ[..., :dim] * M / C * values[..., :dim]
-        else:
-            if self.diagonal_C:
-                invC = np.eye(self.d) / np.atleast_1d(self.C)[..., None, :]
-            else:
-                invC = inv(self.C)
-
-            if self.diagonal_Σ:
-                invΣ = np.eye(self.n) / np.atleast_1d(self.Σ)[..., None, :]
-            else:
-                invΣ = inv(self.Σ)
-
-            Σ = inv(
-                invΣ + np.einsum("...aj,...ab,...bk->...jk", self._M, invC, self._M)
-            )
-            μ = self.μ + np.einsum(
-                "...ja,...ba,...bc,...c->...j", Σ, self._M, invC, values
-            )
-
-        return multivariate_normal(μ, Σ, self.shape, self.n, diagonal_Σ)
-
-    def evidence(self):
-        """P(D) as a distribution object.
-
-        D ~ N( m + M μ, C + M Σ M' )
-        """
-        diagonal_Σ = self.diagonal_C and self.diagonal_Σ and self.diagonal_M
-        if diagonal_Σ:
-            dim = min(self.n, self.d)
-            M = np.atleast_1d(self.M)[..., :dim]
-            S = np.atleast_1d(self.Σ)[..., :dim]
-            Σ = self.C * np.ones(
-                (
-                    *self.shape,
-                    self.d,
-                )
-            )
-            Σ[..., :dim] = Σ[..., :dim] + S * M**2
-        else:
-            Σ = self._C + np.einsum(
-                "...ja,...ab,...kb->...jk", self._M, self._Σ, self._M
-            )
-        μ = self.model(self.μ)
-        return multivariate_normal(μ, Σ, self.shape, self.d, diagonal_Σ)
-
-    def joint(self):
-        """P(θ, D) as a distribution object.
-
-        [θ] ~ N( [   μ   ]   [ Σ      Σ M'   ] )
-        [D]    ( [m + M μ] , [M Σ  C + M Σ M'] )
-        """
-        evidence = self.evidence()
-        prior = self.prior()
-        b = np.broadcast_to(prior.mean, self.shape + (self.n,))
-        a = np.broadcast_to(evidence.mean, self.shape + (self.d,))
-        μ = np.block([b, a])
-        A = dediagonalise(prior.cov, prior.diagonal, self.n)
-        A = np.broadcast_to(A, self.shape + (self.n, self.n))
-        D = dediagonalise(evidence.cov, evidence.diagonal, self.d)
-        D = np.broadcast_to(D, self.shape + (self.d, self.d))
-        C = np.einsum("...ja,...al->...jl", self._M, self._Σ)
-        C = np.broadcast_to(C, self.shape + (self.d, self.n))
-        B = np.moveaxis(C, -1, -2)
-        Σ = np.block([[A, B], [C, D]])
-        return multivariate_normal(μ, Σ, self.shape, self.n + self.d)
-
-    def update(self, D, inplace=False):
-        """Bayesian update of the model with data.
-
-        Parameters
-        ----------
-        D : array_like, shape (..., d)
-        """
-        dist = copy.deepcopy(self) if not inplace else self
-        posterior = self.posterior(D)
-        dist.μ = posterior.mean
-        dist.Σ = posterior.cov
-        dist.diagonal_Σ = posterior.diagonal
-        if not inplace:
-            return dist
-
-    def ppd(self, D0):
-        """P(D|D0) as a distribution object."""
-        return self.update(D0).evidence()
-
-    def dkl(self, D, n=0):
-        """KL divergence between the posterior and prior.
-
-        Parameters
-        ----------
-        D : array_like, shape (..., d)
-            Data to form the posterior
-        n : int, optional
-            Number of samples for a monte carlo estimate, defaults to 0
-        """
-        return dkl(self.posterior(D), self.prior(), n)
-
-    @property
-    def _M(self):
-        return dediagonalise(self.M, self.diagonal_M, self.d, self.n)
-
-    @property
-    def _C(self):
-        return dediagonalise(self.C, self.diagonal_C, self.d)
-
-    @property
-    def _Σ(self):
-        return dediagonalise(self.Σ, self.diagonal_Σ, self.n)
-
-    def reduce(self, D):
-        """Reduce the model to a reduced model.
-
-        Σ_L = (M' C^{-1} M)^{-1}
-        μ_L = Σ_L M' C^{-1} (D-m)
-        logLmax = - log|2 π C|/2 - (D-m)'C^{-1}(D-m)/2
-
-
-        Parameters
-        ----------
-        D : array_like, shape (..., d)
-            Data to form the reduced model
-
-        Returns
-        -------
-        ReducedLinearModel
-        """
-        # TODO modify this to work with diagonal matrices
-        diagonal_Σ_L = self.diagonal_C and self.diagonal_M
-        if diagonal_Σ_L:
-            dim = min(self.n, self.d)
-            M = np.atleast_1d(self.M)[..., :dim]
-            C = np.atleast_1d(self.C)[..., :dim]
-            Σ_L = np.inf * np.ones((*shape, self.n))
-            Σ_L[..., :dim] = C / M**2
-            μ_L = np.zeros((*shape, self.n))
-            μ_L = Σ_L[...:dim] * M / C * (D - self.m)
-            logLmax = (
-                -logdet(2 * np.pi * C, self.diagonal_C) / 2
-                - np.einsum("...a,...ab,...b->...", D - self.m, inv(C), D - self.m) / 2
-            )
-        else:
-            if self.diagonal_C:
-                invC = np.eye(self.d) / np.atleast_1d(self.C)[..., None, :]
-            else:
-                invC = inv(self.C)
-            Σ_L = inv(np.einsum("...ja,...ab,...kb->...jk", self._M, invC, self._M))
-            μ_L = np.einsum(
-                "...ja,...ab,...bc,...c->...j", Σ_L, self._M, invC, D - self.m
-            )
-            logLmax = (
-                -logdet(2 * np.pi * self.C, self.diagonal_C) / 2
-                - np.einsum("...a,...ab,...b->...", D - self.m, invC, D - self.m) / 2
-            )
-        return ReducedLinearModel(
-            μ=self.μ,
-            Σ=self.Σ,
-            diagonal_Σ=self.diagonal_Σ,
-            logLmax=logLmax,
-            μ_L=μ_L,
-            Σ_L=Σ_L,
-            n=self.n,
-            shape=self.shape,
-        )
-
-
-alias(LinearModel, "μ", "mu")
-alias(LinearModel, "Σ", "Sigma")
-alias(LinearModel, "diagonal_Σ", "diagonal_Sigma")
-
-
-class MixtureModel(LinearModel):
-    """A linear mixture model.
-
-    D|θ, w ~ N( m + M θ, C )
-    θ|w    ~ N( μ, Σ )
-    w      ~ categorical( exp(logw) )
-
-    Defined by:
-        Parameters:          θ     (..., n,)
-        Data:                D     (..., d,)
-        Prior means:         μ     (..., k, n)
-        Prior covariances:   Σ     (..., k, n, n)
-        Data means:          m     (..., k, d)
-        Data covariances:    C     (..., k, d, d)
-        log mixture weights: logw  (..., k,)
-
-    Parameters
-    ----------
-    M : array_like, optional
-        if ndim>=2: model matrices
-        if ndim==1: model matrix with vector diagonal for all components
-        if scalar: scalar * rectangular identity matrix for all components
-        Defaults to k copies of rectangular identity matrices
-    m : array_like, optional
-        if ndim>=1: data means
-        if scalar: scalar * unit vector for all components
-        Defaults to 0 for all components
-    C : array_like, optional
-        if ndim>=2: data covariances
-        if ndim==1: data covariance with vector diagonal for all components
-        if scalar: scalar * identity matrix for all components
-        Defaults to k copies of identity matrices
-    μ : array_like, optional
-        if ndim>=1: prior means
-        if scalar: scalar * unit vector for all components
-        Defaults to 0 for all components
-        Prior mean, defaults to zero vector
-    Σ : array_like, optional
-        if ndim>=2: prior covariances
-        if ndim==1: prior covariance with vector diagonal for all components
-        if scalar: scalar * identity matrix for all components
-        Defaults to k copies of identity matrices
-    logw : array_like, optional
-        if ndim>=1: log mixture weights
-        if scalar: scalar * unit vector
-        Defaults to uniform weights
-    n : int, optional
-        Number of parameters, defaults to automatically inferred value
-    d : int, optional
-        Number of data dimensions, defaults to automatically inferred value
-    """
-
-    def __init__(self, *args, **kwargs):
-        self.logw = kwargs.pop("logw", 0)
-        super().__init__(*args, **kwargs)
-
-    @property
-    def shape(self):
-        """Shape of the distribution."""
-        return np.broadcast_shapes(np.shape(self.logw), super().shape)
-
-    @property
-    def k(self):
-        """Number of mixture components."""
-        if self.shape == ():
-            return 1
-        return self.shape[-1]
-
-    def likelihood(self, θ):
-        """P(D|θ) as a distribution object.
-
-        D|θ,w ~ N( m + M θ, C )
-        w|θ   ~ categorical(...)
-
-        Parameters
-        ----------
-        θ : array_like, shape (n,)
-        """
-        dist = super().likelihood(np.expand_dims(θ, -2))
-        dist.__class__ = mixture_normal
-        prior = self.prior()
-        dist.logw = prior.logpdf(θ, broadcast=True, joint=True)
-        dist.logw = dist.logw - logsumexp(dist.logw, axis=-1, keepdims=True)
-        return dist
-
-    def prior(self):
-        """P(θ) as a distribution object.
-
-        θ|w ~ N( μ, Σ )
-        w   ~ categorical(exp(logw))
-        """
-        dist = super().prior()
-        dist.__class__ = mixture_normal
-        dist.logw = self.logw
-        return dist
-
-    def posterior(self, D):
-        """P(θ|D) as a distribution object.
-
-        θ|D, w ~ N( μ + S M'C^{-1}(D - m - M μ), S )
-        w|D    ~ P(D|w)P(w)/P(D)
-        S = (Σ^{-1} + M'C^{-1}M)^{-1}
-
-        Parameters
-        ----------
-        D : array_like, shape (d,)
-        """
-        dist = super().posterior(np.expand_dims(D, -2))
-        dist.__class__ = mixture_normal
-        evidence = self.evidence()
-        dist.logw = evidence.logpdf(D, broadcast=True, joint=True)
-        dist.logw = dist.logw - logsumexp(dist.logw, axis=-1, keepdims=True)
-        return dist
-
-    def evidence(self):
-        """P(D) as a distribution object.
-
-        D|w ~ N( m + M μ, C + M Σ M' )
-        w   ~ categorical(exp(logw))
-        """
-        dist = super().evidence()
-        dist.__class__ = mixture_normal
-        dist.logw = self.logw
-        return dist
-
-    def joint(self):
-        """P(D, θ) as a distribution object.
-
-        [θ] | w ~ N( [   μ   ]   [ Σ      Σ M'   ] )
-        [D] |      ( [m + M μ] , [M Σ  C + M Σ M'] )
-
-        w           ~ categorical(exp(logw))
-        """
-        dist = super().joint()
-        dist.__class__ = mixture_normal
-        dist.logw = self.logw
-        return dist
-
-    def update(self, D, inplace=False):
-        """Bayesian update of the model with data.
-
-        Parameters
-        ----------
-        D : array_like, shape (..., d)
-        """
-        dist = copy.deepcopy(self) if not inplace else self
-        posterior = self.posterior(D)
-        dist.μ = posterior.mean
-        dist.Σ = posterior.cov
-        dist.diagonal_Σ = posterior.diagonal
-        dist.logw = posterior.logw
-        if not inplace:
-            return dist
-
-    def dkl(self, D, n=0):
-        """KL divergence between the posterior and prior.
-
-        Parameters
-        ----------
-        D : array_like, shape (..., d)
-            Data to form the posterior
-        n : int, optional
-            Number of samples for a monte carlo estimate, defaults to 0
-        """
-        if n == 0:
-            raise ValueError("MixtureModel requires a monte carlo estimate. Use n>0.")
-
-        p = self.posterior(D)
-        q = self.prior()
-        x = p.rvs(size=(n, *self.shape[:-1]), broadcast=True)
-        return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).mean(axis=0)
-
-
-class ReducedLinearModel(object):
-    """A model with no data.
-
-    If a Likelihood is Gaussian in the parameters, it is sometimes more
-    clear/efficient to phrase it in terms of a parameter covariance, parameter
-    mean and peak value:
-
-    log L(D|θ) = logLmax - (θ - μ_L)' Σ_L^{-1} (θ - μ_L)/2
-    log π(θ) = - log|2 π Σ|/2 - (θ - μ)' Σ^{-1} (θ - μ)/2
-
-    log P(θ|D) = - log|2 π Σ_P|/2 - (θ - μ_P)' Σ_P^{-1} (θ - μ_P)/2
-    log Z(D) = logLmax + log π(μ_L) - log P(μ_L)
-
-    Σ_P^{-1} = Σ^{-1} + Σ_L^{-1}
-    Σ_P^{-1} μ_P = Σ^{-1} μ + Σ_L^{-1} μ_L
-
-    We can link this to a data-based model with the relations:
-
-    Sigma_L = (M' C^{-1} M)^{-1}
-    mu_L = Sigma_L M' C^{-1} (D-m)
-    logLmax =
-    - log|2 pi C|/2 - (D-m)'C^{-1}(C - M (M' C^{-1} M)^{-1} M' )C^{-1}(D-m)/2
-
-    See the LinearModel.reduce method for an implementation of this
-
-    Parameters
-    ----------
-    mu_L : array_like
-        Likelihood peak
-    Sigma_L : array_like
-        Likelihood covariance
-    logLmax : float, optional
-        Likelihood maximum, defaults to zero
-    mmu_pi : array_like, optional
-        Prior mean, defaults to zero vector
-    Sigma_pi : array_like, optional
-        Prior covariance, defaults to identity matrix
-    """
-
-    def __init__(self, *args, **kwargs):
-        self.mu_L = np.atleast_1d(kwargs.pop("mu_L"))
-        self.Sigma_L = np.atleast_2d(kwargs.pop("Sigma_L", None))
-        self.logLmax = kwargs.pop("logLmax", 0)
-        self.mu_pi = np.atleast_1d(kwargs.pop("mu_pi", np.zeros_like(self.mu_L)))
-        self.Sigma_pi = np.atleast_2d(kwargs.pop("Sigma_pi", np.eye(len(self.mu_pi))))
-        self.Sigma_P = inv(inv(self.Sigma_pi) + inv(self.Sigma_L))
-        self.mu_P = self.Sigma_P @ (
-            solve(self.Sigma_pi, self.mu_pi) + solve(self.Sigma_L, self.mu_L)
-        )
-
-    def prior(self):
-        """P(θ) as a distribution object."""
-        return multivariate_normal(self.μ, self.Σ, self.shape, self.n, self.diagonal_Σ)
-
-    def posterior(self):
-        """P(θ|D) as a distribution object."""
-        if diagonal:
-            Σ_P = 1 / (1 / self.Σ + 1 / self.Σ_L)
-        elif self.diagonal_Σ_L:
-            Σ_P = inv(inv(self.Σ) + np.eye(self.n) / self.Σ_L[..., None, :])
-        elif self.diagonal_Σ:
-            Σ_P = inv(np.eye(self.n) / self.Σ[..., None, :] + inv(self.Σ_L))
-        else:
-            Σ_P = inv(inv(self.Σ) + inv(self.Σ_L))
-
-        if self.diagonal_Σ_L:
-            x_L = self.μ_L / self.Σ_L
-        else:
-            x_L = np.einsum(
-                "...ij,...j->...i", inv(self.Σ_L), np.ones(self.n) * self.μ_L
-            )
-
-        if self.diagonal_Σ:
-            x = μ / self.Σ
-        else:
-            x = np.einsum("...ij,...j->...i", inv(self.Σ), np.ones(self.n) * self.μ)
-
-        μ_P = np.einsum("...ij,...j->...i", Σ_P, x + x_L)
-        return multivariate_normal(μ_P, Σ_P, self.shape, self.n, diagonal)
-
-    def logπ(self, θ):
-        """P(θ) as a scalar."""
-        return self.prior().logpdf(θ)
-
-    def logP(self, θ):
-        """P(θ|D) as a scalar."""
-        return self.posterior().logpdf(θ)
-
-    def logL(self, θ):
-        """P(D|θ) as a scalar."""
-        return (
-            self.logLmax
-            + multivariate_normal(self.mu_L, self.Sigma_L).logpdf(θ)
-            + logdet(2 * np.pi * self.Sigma_L) / 2
-        )
-
-    def logZ(self):
-        """P(D) as a scalar."""
-        return (
-            self.logLmax
-            + logdet(self.Sigma_P) / 2
-            - logdet(self.Sigma_pi) / 2
-            - (self.mu_P - self.mu_pi)
-            @ solve(self.Sigma_pi, self.mu_P - self.mu_pi)
-            / 2
-            - (self.mu_P - self.mu_L) @ solve(self.Sigma_L, self.mu_P - self.mu_L) / 2
-        )
-
-    def DKL(self):
-        """D_KL(P(θ|D)||P(θ)) the Kullback-Leibler divergence."""
-        return (
-            logdet(self.Sigma_pi)
-            - logdet(self.Sigma_P)
-            + np.trace(inv(self.Sigma_pi) @ self.Sigma_P - 1)
-            + (self.mu_P - self.mu_pi) @ solve(self.Sigma_pi, self.mu_P - self.mu_pi)
-        ) / 2
-
-
-class ReducedLinearModelUniformPrior(object):
-    """A model with no data.
-
-    Gaussian likelihood in the parameters
-
-    logL(θ) = logLmax - (θ - mu_L)' Sigma_L^{-1} (θ - mu_L)
-
-    Uniform prior
-
-    We can link this to a data-based model with the relations:
-
-    Sigma_L = (M' C^{-1} M)^{-1}
-    mu_L = Sigma_L M' C^{-1} (D-m)
-    logLmax =
-    -log|2 pi C|/2 - (D-m)'C^{-1}(C - M (M' C^{-1} M)^{-1} M' )C^{-1}(D-m)/2
-
-    Parameters
-    ----------
-    mu_L : array_like
-        Likelihood peak
-    Sigma_L : array_like
-        Likelihood covariance
-    logLmax : float, optional
-        Likelihood maximum, defaults to zero
-    logV : float, optional
-        log prior volume, defaults to zero
-    """
-
-    def __init__(self, *args, **kwargs):
-        self.mu_L = np.atleast_1d(kwargs.pop("mu_L"))
-        self.Sigma_L = np.atleast_2d(kwargs.pop("Sigma_L"))
-        self.logLmax = kwargs.pop("logLmax", 0)
-        self.logV = kwargs.pop("logV", 0)
-        self.Sigma_P = self.Sigma_L
-        self.mu_P = self.mu_L
-
-    def posterior(self):
-        """P(θ|D) as a distribution object."""
-        return multivariate_normal(
-            self.μ_L, self.Σ_L, self.shape, self.n, self.diagonal
-        )
-
-    def logpi(self, θ):
-        """P(θ) as a scalar."""
-        return -self.logV
-
-    def logP(self, θ):
-        """P(θ|D) as a scalar."""
-        return self.posterior().logpdf(θ)
-
-    def logL(self, θ):
-        """P(D|θ) as a scalar."""
-        dist = multivariate_normal(
-            self.μ_L, self.Σ_L, self.shape, self.n, self.diagonal
-        )
-        return self.logLmax + dist.logpdf(θ) - dist.logpdf(dist.μ_L)
-
-    def logZ(self):
-        """P(D) as a scalar."""
-        return self.logLmax + logdet(2 * np.pi * self.Sigma_P) / 2 - self.logV
-
-    def DKL(self):
-        """D_KL(P(θ|D)||P(θ)) the Kullback-Leibler divergence."""
-        dkl = np.ones(self.shape) * self.logV
-        dkl -= logdet(2 * np.pi * np.e * self.Σ_L, self.diagonal) / 2
-        return dkl
-
-
-class ReducedMixtureModel(ReducedLinearModel):
-    """A model with no data.
-
-    Gaussian likelihood in the parameters
-
-    logL(θ) = logLmax - (θ - μ_L)' Σ_L^{-1} (θ - μ_L)
-
-    We can link this to a data-based model with the relations:
-
-    Σ_L = (M' C^{-1} M)^{-1}
-    μ_L = Σ_L M' C^{-1} (D-m)
-    logLmax =
-    - log|2 π C|/2 - (D-m)'C^{-1}(C - M (M' C^{-1} M)^{-1} M' )C^{-1}(D-m)/2
-
-    Parameters
-    ----------
-    μ_L : array_like
-        Likelihood peak
-    Σ_L : array_like
-        Likelihood covariance
-    logLmax : float, optional
-        Likelihood maximum, defaults to zero
-    logw : array_like, optional
-        log mixture weights
-        if ndim>=1: log mixture weights
-        if scalar: scalar * unit vector
-        Defaults to uniform weights
-    logw_L: array_like, optional
-        log mixture weights for the likelihood
-        if ndim>=1: log mixture weights
-        if scalar: scalar * unit vector
-        Defaults to uniform weights
-    n : int, optional
-        Number of parameters, defaults to automatically inferred value
-    """
-
-    def __init__(self, *args, **kwargs):
-        self.logw = kwargs.pop("logw", 0)
-        self.logw_L = kwargs.pop("logw_L", 0)
-        super().__init__(*args, **kwargs)
-
-    @property
-    def shape(self):
-        """Shape of the distribution."""
-        return np.broadcast_shapes(
-            np.shape(self.logw), np.shape(self.logw_L), super().shape
-        )
-
-    @property
-    def k(self):
-        """Number of mixture components of the distribution."""
-        return self.shape[-1]
-
-    def prior(self):
-        """P(θ) as a scipy distribution object."""
-        dist = super().prior()
-        dist.__class__ = mixture_normal
-        dist.logw = self.logw
-        return dist
-
-    def posterior(self):
-        """P(θ|D) as a scipy distribution object."""
-        dist = super().posterior()
-        dist.__class__ = mixture_normal
-        dist.logw = dist.logw + self.logw_L
-        return dist
-
-    def logL(self, θ):
-        """P(D|θ) as a scalar."""
-        dist = super().likelihood(θ)
-        dist.__class__ = mixture
-        dist.logw = self.logw
-        pass
-
-
-class ReducedMixtureModelUniformPrior(ReducedLinearModelUniformPrior):
-    """Fill in docstring."""
-
-    def __init__(self, *args, **kwargs):
-        self.logw_L = kwargs.pop("logw_L", 0)
-        super().__init__(*args, **kwargs)
-
-    @property
-    def shape(self):
-        """Shape of the distribution."""
-        return np.broadcast_shapes(np.shape(self.logw_L), super().shape)
-
-    @property
-    def k(self):
-        """Number of mixture components of the distribution."""
-        return self.shape[-1]
-
-    def posterior(self):
-        """P(θ|D) as a scipy distribution object."""
-        dist = super().posterior()
-        dist.__class__ = mixture_normal
-        dist.logw = self.logw_L
-        return dist
-
-    def logπ(self, θ):
-        """P(θ) as a scalar."""
-        return -self.logV
-
-    def logP(self, θ):
-        """P(θ|D) as a scalar."""
-        return self.posterior().logpdf(θ)
-
-    def logL(self, θ):
-        """P(D|θ) as a scalar."""
-        pass
-
-    def logZ(self):
-        """To be implemented."""
-        pass
-
-    def dkl(self):
-        """To be implemented."""
-        pass

From 7b26ed0bbdda9d0139f619fdd727c00ce9492eb4 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Sun, 15 Sep 2024 16:30:53 +0100
Subject: [PATCH 117/117] Updates post imperial talk

These changes allowed slides from these talks to be created:
- [PhyStat](https://github.com/williamjameshandley/talks/tree/imperial_2024)
- [cosmoverse](https://github.com/williamjameshandley/talks/tree/cosmoverse_2024)
---
 lsbi/plot.py | 50 ++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 44 insertions(+), 6 deletions(-)

diff --git a/lsbi/plot.py b/lsbi/plot.py
index 6e5b35c..1dc65cf 100644
--- a/lsbi/plot.py
+++ b/lsbi/plot.py
@@ -19,6 +19,16 @@
     match_contour_to_contourf,
 )
 from matplotlib.colors import LinearSegmentedColormap
+from matplotlib.tri import Triangulation
+
+
+def _Triangulation(x, y):
+    if len(x.shape) == 1:
+        return Triangulation(x, y).triangles
+    else:
+        return np.array(
+            [_Triangulation(x[i], y[i]) for i in range(x.shape[0])], dtype=object
+        )
 
 
 def pdf_plot_1d(ax, dist, *args, **kwargs):
@@ -56,7 +66,7 @@ def pdf_plot_1d(ax, dist, *args, **kwargs):
         :meth:`matplotlib.axes.Axes.plot` command).
     """
     kwargs = normalize_kwargs(kwargs)
-    nplot = kwargs.get("nplot_1d", 10000)
+    nplot = kwargs.pop("nplot_1d", 10000)
 
     levels = kwargs.pop("levels", [0.95, 0.68])
     density = kwargs.pop("density", False)
@@ -74,6 +84,8 @@ def pdf_plot_1d(ax, dist, *args, **kwargs):
     else:
         edgecolor = color
 
+    orientation = kwargs.pop("orientation", "horizontal")
+
     x = dist.rvs(nplot)
     logpdf = dist.logpdf(x)
     logpdfmin = np.sort(logpdf)[::-1][int(0.997 * nplot)]
@@ -85,6 +97,9 @@ def pdf_plot_1d(ax, dist, *args, **kwargs):
     if not density:
         logpdf -= np.nanmax(logpdf)
     pdf = np.exp(logpdf)
+    if orientation == "vertical":
+        x, pdf = pdf, x
+
     ans = ax.plot(x, pdf, color=color, *args, **kwargs)
 
     if facecolor and facecolor not in [None, "None", "none"]:
@@ -103,10 +118,16 @@ def pdf_plot_1d(ax, dist, *args, **kwargs):
 
         ans = ans, fill
 
-    if density:
-        ax.set_ylim(bottom=0)
+    if orientation == "vertical":
+        if density:
+            ax.set_xlim(bottom=0)
+        else:
+            ax.set_xlim(0, 1.1)
     else:
-        ax.set_ylim(0, 1.1)
+        if density:
+            ax.set_ylim(bottom=0)
+        else:
+            ax.set_ylim(0, 1.1)
 
     return ans
 
@@ -174,12 +195,28 @@ def pdf_plot_2d(ax, dist, *args, **kwargs):
     levels = iso_probability_contours_from_samples(P, contours=levels)
     y = np.atleast_1d(x[..., 1])
     x = np.atleast_1d(x[..., 0])
+    x_ = x - np.mean(x, axis=-1, keepdims=True)
+    y_ = y - np.mean(y, axis=-1, keepdims=True)
+    cov = np.moveaxis(
+        [
+            [np.mean(x_ * x_, axis=-1), np.mean(x_ * y_, axis=-1)],
+            [np.mean(y_ * x_, axis=-1), np.mean(y_ * y_, axis=-1)],
+        ],
+        [0, 1],
+        [-2, -1],
+    )
+
+    L = np.linalg.cholesky(cov)
+    Linv = np.linalg.inv(L)
+    xy_ = np.einsum("...ij,...kj->...ki", Linv, np.moveaxis([x_, y_], 0, -1))
+    tri = _Triangulation(xy_[..., 0], xy_[..., 1])
 
     if facecolor not in [None, "None", "none"]:
         linewidths = kwargs.pop("linewidths", 0.5)
         contf = ax.tricontourf(
             x,
             y,
+            tri,
             P,
             levels=levels,
             cmap=cmap,
@@ -207,6 +244,7 @@ def pdf_plot_2d(ax, dist, *args, **kwargs):
     cont = ax.tricontour(
         x,
         y,
+        tri,
         P,
         levels=levels,
         zorder=zorder,
@@ -335,8 +373,8 @@ def plot_2d(dist, axes=None, *args, **kwargs):
         axes = list(range(dist.dim))
     if not isinstance(axes, AxesDataFrame):
         fig, axes = make_2d_axes(axes)
-    for y, row in axes.iterrows():
-        for x, ax in row.items():
+    for y, (_, row) in enumerate(axes.iterrows()):
+        for x, (_, ax) in enumerate(row.items()):
             if ax.position == "diagonal":
                 pdf_plot_1d(ax.twin, dist[[x]], *args, **kwargs)
             elif ax.position == "lower":