diff --git a/Project.toml b/Project.toml index 0a381fe..a1a2bf0 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "GPLikelihoods" uuid = "6031954c-0455-49d7-b3b9-3e1c99afaf40" authors = ["JuliaGaussianProcesses Team"] -version = "0.4.7" +version = "0.4.8" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" diff --git a/src/expectations.jl b/src/expectations.jl index 3354548..e204671 100644 --- a/src/expectations.jl +++ b/src/expectations.jl @@ -31,29 +31,18 @@ default_expectation_method(_) = GaussHermiteExpectation(20) y::AbstractVector, ) -This function computes the expected log likelihood: - -```math - ∫ q(f) log p(y | f) df -``` -where `p(y | f)` is the process likelihood. This is described by `lik`, which should be a -callable that takes `f` as input and returns a Distribution over `y` that supports -`loglikelihood(lik(f), y)`. - -`q(f)` is an approximation to the latent function values `f` given by: +This function computes the sum of the expected log likelihoods: ```math - q(f) = ∫ p(f | u) q(u) du + ∑ᵢ ∫ qᵢ(f) log pᵢ(yᵢ | f) df ``` -where `q(u)` is the variational distribution over inducing points. -The marginal distributions of `q(f)` are given by `q_f`. +where `pᵢ(yᵢ | f)` is the process likelihood and `yᵢ` the observation at location/site `i`. +The argument `q_f` is the vector of normal distributions `qᵢ(f)`, and the argument `y` is +the vector of observations `yᵢ`. The argument `lik` is one of the following: +- A callable that takes `f` as input and returns a Distribution over `y` that supports `loglikelihood(lik(f), y)`. This corresponds to the case where `pᵢ` is independent of `i`. +- A vector of such callables. Here, each element of the vector is the corresponding `pᵢ`. `quadrature` determines which method is used to calculate the expected log likelihood. - -# Extended help - -`q(f)` is assumed to be an `MvNormal` distribution and `p(y | f)` is assumed to -have independent marginals such that only the marginals of `q(f)` are required. """ expected_loglikelihood(quadrature, lik, q_f, y) @@ -76,12 +65,16 @@ function expected_loglikelihood( mc::MonteCarloExpectation, lik, q_f::AbstractVector{<:Normal}, y::AbstractVector ) # take `n_samples` reparameterised samples - f_μ = mean.(q_f) - fs = f_μ .+ std.(q_f) .* randn(eltype(f_μ), length(q_f), mc.n_samples) - lls = loglikelihood.(lik.(fs), y) + r = randn(typeof(mean(first(q_f))), length(q_f), mc.n_samples) + lls = _mc_exp_loglikelihood_kernel.(_maybe_ref(lik), q_f, y, r) return sum(lls) / mc.n_samples end +function _mc_exp_loglikelihood_kernel(lik, q_f, y, r) + f = mean(q_f) + std(q_f) * r + return loglikelihood(lik(f), y) +end + # Compute the expected_loglikelihood over a collection of observations and marginal distributions function expected_loglikelihood( gh::GaussHermiteExpectation, lik, q_f::AbstractVector{<:Normal}, y::AbstractVector @@ -94,10 +87,14 @@ function expected_loglikelihood( # type stable. Compared to other type stable implementations, e.g. # using a custom two-argument pairwise sum, this is faster to # differentiate using Zygote. - A = loglikelihood.(lik.(sqrt2 .* std.(q_f) .* gh.xs' .+ mean.(q_f)), y) .* gh.ws' + A = _gh_exp_loglikelihood_kernel.(_maybe_ref(lik), q_f, y, gh.xs', gh.ws') return invsqrtπ * sum(A) end +function _gh_exp_loglikelihood_kernel(lik, q_f, y, x, w) + return loglikelihood(lik(sqrt2 * std(q_f) * x + mean(q_f)), y) * w +end + function expected_loglikelihood( ::AnalyticExpectation, lik, q_f::AbstractVector{<:Normal}, y::AbstractVector ) @@ -105,3 +102,6 @@ function expected_loglikelihood( "No analytic solution exists for $(typeof(lik)). Use `DefaultExpectationMethod`, `GaussHermiteExpectation` or `MonteCarloExpectation` instead.", ) end + +_maybe_ref(lik) = Ref(lik) +_maybe_ref(liks::AbstractArray) = liks diff --git a/src/likelihoods/exponential.jl b/src/likelihoods/exponential.jl index 9ab648e..99ff3d1 100644 --- a/src/likelihoods/exponential.jl +++ b/src/likelihoods/exponential.jl @@ -23,8 +23,18 @@ function expected_loglikelihood( q_f::AbstractVector{<:Normal}, y::AbstractVector{<:Real}, ) - f_μ = mean.(q_f) - return sum(-f_μ - y .* exp.((var.(q_f) / 2) .- f_μ)) + return sum(_exp_exp_loglikelihood_kernel.(q_f, y)) end +function expected_loglikelihood( + ::AnalyticExpectation, + ::AbstractVector{<:ExponentialLikelihood{ExpLink}}, + q_f::AbstractVector{<:Normal}, + y::AbstractVector{<:Real}, +) + return sum(_exp_exp_loglikelihood_kernel.(q_f, y)) +end + +_exp_exp_loglikelihood_kernel(q_f, y) = -mean(q_f) - y * exp((var(q_f) / 2) - mean(q_f)) + default_expectation_method(::ExponentialLikelihood{ExpLink}) = AnalyticExpectation() diff --git a/src/likelihoods/gamma.jl b/src/likelihoods/gamma.jl index 61bd4cb..bdb66a2 100644 --- a/src/likelihoods/gamma.jl +++ b/src/likelihoods/gamma.jl @@ -28,11 +28,21 @@ function expected_loglikelihood( q_f::AbstractVector{<:Normal}, y::AbstractVector{<:Real}, ) - f_μ = mean.(q_f) - return sum( - (lik.α - 1) * log.(y) .- y .* exp.((var.(q_f) / 2) .- f_μ) .- lik.α * f_μ .- - loggamma(lik.α), - ) + return sum(_gamma_exp_loglikelihood_kernel.(lik.α, q_f, y)) +end + +function expected_loglikelihood( + ::AnalyticExpectation, + liks::AbstractVector{<:GammaLikelihood{ExpLink}}, + q_f::AbstractVector{<:Normal}, + y::AbstractVector{<:Real}, +) + return sum(_gamma_exp_loglikelihood_kernel.(getfield.(liks, :α), q_f, y)) +end + +function _gamma_exp_loglikelihood_kernel(α, q_f, y) + return (α - 1) * log(y) - y * exp((var(q_f) / 2) - mean(q_f)) - α * mean(q_f) - + loggamma(α) end default_expectation_method(::GammaLikelihood{ExpLink}) = AnalyticExpectation() diff --git a/src/likelihoods/gaussian.jl b/src/likelihoods/gaussian.jl index ca5101b..c8c2a75 100644 --- a/src/likelihoods/gaussian.jl +++ b/src/likelihoods/gaussian.jl @@ -29,9 +29,20 @@ function expected_loglikelihood( q_f::AbstractVector{<:Normal}, y::AbstractVector{<:Real}, ) - return sum( - -0.5 * (log(2π) .+ log.(lik.σ²) .+ ((y .- mean.(q_f)) .^ 2 .+ var.(q_f)) / lik.σ²) - ) + return sum(_gaussian_exp_loglikelihood_kernel.(lik.σ², q_f, y)) +end + +function expected_loglikelihood( + ::AnalyticExpectation, + liks::AbstractVector{<:GaussianLikelihood}, + q_f::AbstractVector{<:Normal}, + y::AbstractVector{<:Real}, +) + return sum(_gaussian_exp_loglikelihood_kernel.(only.(getfield.(liks, :σ²)), q_f, y)) +end + +function _gaussian_exp_loglikelihood_kernel(σ², q_f, y) + return -0.5 * (log(2π) + log(σ²) + ((y - mean(q_f))^2 + var(q_f)) / σ²) end default_expectation_method(::GaussianLikelihood) = AnalyticExpectation() diff --git a/src/likelihoods/poisson.jl b/src/likelihoods/poisson.jl index 300f120..2ef0a06 100644 --- a/src/likelihoods/poisson.jl +++ b/src/likelihoods/poisson.jl @@ -26,8 +26,20 @@ function expected_loglikelihood( q_f::AbstractVector{<:Normal}, y::AbstractVector{<:Real}, ) - f_μ = mean.(q_f) - return sum((y .* f_μ) - exp.(f_μ .+ (var.(q_f) / 2)) - loggamma.(y .+ 1)) + return sum(_poisson_exp_loglikelihood_kernel.(q_f, y)) +end + +function expected_loglikelihood( + ::AnalyticExpectation, + ::AbstractArray{<:PoissonLikelihood{ExpLink}}, + q_f::AbstractVector{<:Normal}, + y::AbstractVector{<:Real}, +) + return sum(_poisson_exp_loglikelihood_kernel.(q_f, y)) +end + +function _poisson_exp_loglikelihood_kernel(q_f, y) + return (y * mean(q_f)) - exp(mean(q_f) + (var(q_f) / 2)) - loggamma(y + 1) end default_expectation_method(::PoissonLikelihood{ExpLink}) = AnalyticExpectation() diff --git a/test/expectations.jl b/test/expectations.jl index 9063e7d..dccf359 100644 --- a/test/expectations.jl +++ b/test/expectations.jl @@ -24,6 +24,7 @@ m.lik for m in implementation_types if m.quadrature == GPLikelihoods.AnalyticExpectation && m.lik != Any ] + filter!(x -> !(x <: AbstractArray), analytic_likelihoods) for lik_type in analytic_likelihoods lik_type_instances = filter(lik -> isa(lik, lik_type), likelihoods_to_test) @test !isempty(lik_type_instances) @@ -120,4 +121,27 @@ ) @test isfinite(glogα) end + + @testset "non-constant likelihood" begin + @testset "$(nameof(typeof(lik)))" for lik in likelihoods_to_test + liks = fill(lik, 10) + # Test that the various methods of computing expectations return the same + # result. + methods = [ + GaussHermiteExpectation(100), + MonteCarloExpectation(1e7), + GPLikelihoods.DefaultExpectationMethod(), + ] + def = GPLikelihoods.default_expectation_method(lik) + if def isa GPLikelihoods.AnalyticExpectation + push!(methods, def) + end + y = [rand(rng, lik(0.0)) for lik in liks] + + results = map( + m -> GPLikelihoods.expected_loglikelihood(m, liks, q_f, y), methods + ) + @test all(x -> isapprox(x, results[end]; atol=1e-6, rtol=1e-3), results) + end + end end