Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement scalar rules for Zygote with ChainRules #103

Merged
merged 10 commits into from
Aug 23, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
name = "DistributionsAD"
uuid = "ced4e74d-a319-5a8a-b0ac-84af2272839c"
version = "0.6.3"
version = "0.6.4"

[deps]
ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2"
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
DiffRules = "b552c78f-8df3-52c6-915a-8e097449b14b"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
Expand All @@ -22,6 +23,7 @@ ZygoteRules = "700de1a5-db45-46bc-99cf-38207098b444"

[compat]
ChainRules = "0.7"
ChainRulesCore = "0.9.5"
Compat = "3.6"
DiffRules = "0.1, 1.0"
Distributions = "0.23.3"
Expand Down
2 changes: 2 additions & 0 deletions src/DistributionsAD.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ using PDMats,
Requires,
ZygoteRules,
ChainRules, # needed for `ChainRules.chol_blocked_rev`
ChainRulesCore,
FillArrays

using SpecialFunctions: logabsgamma, digamma
Expand Down Expand Up @@ -52,6 +53,7 @@ include("flatten.jl")
include("arraydist.jl")
include("filldist.jl")

include("chainrules.jl")
include("zygote.jl")

@init begin
Expand Down
94 changes: 94 additions & 0 deletions src/chainrules.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
## Uniform ##

function ChainRulesCore.rrule(::typeof(uniformlogpdf), a, b, x)
diff = b - a
insupport = a <= x <= b
lp = insupport ? -log(diff) : log(zero(diff))

function uniform_logpdf_pullback(ȳ)
z = zero(x) * ȳ
if a <= x <= b
c = ȳ / diff^2
devmotion marked this conversation as resolved.
Show resolved Hide resolved
return NO_FIELDS, c, -c, z
else
c = ȳ / one(diff)^2
cNaN = oftype(ȳ, NaN)
return NO_FIELDS, cNaN, cNaN, oftype(z, NaN)
end
end

return lp, uniform_logpdf_pullback
end

## Beta ##

@scalar_rule(
betalogpdf(α::Real, β::Real, x::Number),
@setup(di = digamma(α + β)),
(
@thunk(log(x) - digamma(α) + di),
@thunk(log(1 - x) - digamma(β) + di),
@thunk((α - 1)/x + (1 - β)/(1 - x)),
),
)

## Gamma ##

@scalar_rule(
gammalogpdf(k::Real, θ::Real, x::Number),
(
@thunk(-digamma(k) - log(θ) + log(x)),
@thunk(-k/θ + x/θ^2),
@thunk((k - 1)/x - 1/θ),
),
)

## Chisq ##

@scalar_rule(
chisqlogpdf(k::Real, x::Number),
@setup(ko2 = k / 2),
(@thunk((-logtwo - digamma(ko2) + log(x)) / 2), @thunk((ko2 - 1)/x - one(ko2) / 2)),
)

## FDist ##

@scalar_rule(
fdistlogpdf(v1::Real, v2::Real, x::Number),
@setup(
temp1 = v1 * x + v2,
temp2 = log(temp1),
vsum = v1 + v2,
temp3 = vsum / temp1,
temp4 = digamma(vsum / 2),
),
(
@thunk((log(v1 * x) + 1 - temp2 - x * temp3 - digamma(v1 / 2) + temp4) / 2),
@thunk((log(v2) + 1 - temp2 - temp3 - digamma(v2 / 2) + temp4) / 2),
@thunk(v1 / 2 * (1 / x - temp3) - 1 / x),
),
)

## TDist ##

@scalar_rule(
tdistlogpdf(v::Real, x::Number),
(
@thunk((digamma((v + 1) / 2) - 1 / v - digamma(v / 2) - log(1 + x^2 / v) + x^2 * (v + 1) / v^2 / (1 + x^2 / v)) / 2),
@thunk(-x * (v + 1) / (v + x^2)),
)
)

## Binomial ##

@scalar_rule(
binomlogpdf(n::Int, p::Real, x::Int),
(DoesNotExist(), @thunk(x / p - (n - x) / (1 - p)), DoesNotExist()),
devmotion marked this conversation as resolved.
Show resolved Hide resolved
)

## Poisson ##

@scalar_rule(
poislogpdf(v::Real, x::Int),
(@thunk(x / v - 1), DoesNotExist()),
)
24 changes: 16 additions & 8 deletions src/tracker.jl
Original file line number Diff line number Diff line change
Expand Up @@ -215,16 +215,24 @@ uniformlogpdf(a::Real, b::Real, x::TrackedReal) = track(uniformlogpdf, a, b, x)
uniformlogpdf(a::TrackedReal, b::TrackedReal, x::Real) = track(uniformlogpdf, a, b, x)
uniformlogpdf(a::TrackedReal, b::TrackedReal, x::TrackedReal) = track(uniformlogpdf, a, b, x)
@grad function uniformlogpdf(a, b, x)
# compute log pdf
diff = data(b) - data(a)
T = typeof(diff)
if a <= data(x) <= b && a < b
l = -log(diff)
da = 1/diff^2
return l, Δ -> (da * Δ, -da * Δ, zero(T) * Δ)
else
n = T(NaN)
return n, Δ -> (n, n, n)
insupport = a <= data(x) <= b
lp = insupport ? -log(diff) : log(zero(diff))

function pullback(Δ)
z = zero(x) * Δ
if insupport
c = Δ / diff^2
return c, -c, z
else
c = Δ / one(diff)^2
cNaN = oftype(c, NaN)
return cNaN, cNaN, oftype(z, NaN)
end
end

return lp, pullback
end


Expand Down
7 changes: 3 additions & 4 deletions src/univariate.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,14 @@ Base.minimum(d::TuringUniform) = d.a
Base.maximum(d::TuringUniform) = d.b

function uniformlogpdf(a, b, x)
c = -log(b - a)
diff = b - a
if a <= x <= b
return c
return -log(diff)
else
return oftype(c, -Inf)
return log(zero(diff))
end
end


if VERSION < v"1.2"
Base.inv(::Irrational{:π}) = 1/π
end
Expand Down
98 changes: 0 additions & 98 deletions src/zygote.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,108 +8,10 @@ end

## Uniform ##

ZygoteRules.@adjoint function uniformlogpdf(a, b, x)
diff = b - a
T = typeof(diff)
if a <= x <= b && a < b
l = -log(diff)
da = 1/diff^2
return l, Δ -> (da * Δ, -da * Δ, zero(T) * Δ)
else
n = T(NaN)
return n, Δ -> (n, n, n)
end
end

ZygoteRules.@adjoint function Distributions.Uniform(args...)
return ZygoteRules.pullback(TuringUniform, args...)
end


## Beta ##

function _betalogpdfgrad(α, β, x)
di = digamma(α + β)
dα = log(x) - digamma(α) + di
dβ = log(1 - x) - digamma(β) + di
dx = (α - 1)/x + (1 - β)/(1 - x)
return (dα, dβ, dx)
end
ZygoteRules.@adjoint function betalogpdf(α::Real, β::Real, x::Number)
return betalogpdf(α, β, x), Δ -> (Δ .* _betalogpdfgrad(α, β, x))
end


## Gamma ##

function _gammalogpdfgrad(k, θ, x)
dk = -digamma(k) - log(θ) + log(x)
dθ = -k/θ + x/θ^2
dx = (k - 1)/x - 1/θ
return (dk, dθ, dx)
end
ZygoteRules.@adjoint function gammalogpdf(k::Real, θ::Real, x::Number)
return gammalogpdf(k, θ, x), Δ -> (Δ .* _gammalogpdfgrad(k, θ, x))
end


## Chisq ##

function _chisqlogpdfgrad(k, x)
hk = k/2
d = digamma(hk)
dk = (-log(oftype(hk, 2)) - d + log(x))/2
dx = (hk - 1)/x - one(hk)/2
return (dk, dx)
end
ZygoteRules.@adjoint function chisqlogpdf(k::Real, x::Number)
return chisqlogpdf(k, x), Δ -> (Δ .* _chisqlogpdfgrad(k, x))
end

## FDist ##

function _fdistlogpdfgrad(v1, v2, x)
temp1 = v1 * x + v2
temp2 = log(temp1)
vsum = v1 + v2
temp3 = vsum / temp1
temp4 = digamma(vsum / 2)
dv1 = (log(v1 * x) + 1 - temp2 - x * temp3 - digamma(v1 / 2) + temp4) / 2
dv2 = (log(v2) + 1 - temp2 - temp3 - digamma(v2 / 2) + temp4) / 2
dx = v1 / 2 * (1 / x - temp3) - 1 / x
return (dv1, dv2, dx)
end
ZygoteRules.@adjoint function fdistlogpdf(v1::Real, v2::Real, x::Number)
return fdistlogpdf(v1, v2, x), Δ -> (Δ .* _fdistlogpdfgrad(v1, v2, x))
end

## TDist ##

function _tdistlogpdfgrad(v, x)
dv = (digamma((v + 1) / 2) - 1 / v - digamma(v / 2) - log(1 + x^2 / v) + x^2 * (v + 1) / v^2 / (1 + x^2 / v)) / 2
dx = -x * (v + 1) / (v + x^2)
return (dv, dx)
end
ZygoteRules.@adjoint function tdistlogpdf(v::Real, x::Number)
return tdistlogpdf(v, x), Δ -> (Δ .* _tdistlogpdfgrad(v, x))
end


## Binomial ##

ZygoteRules.@adjoint function binomlogpdf(n::Int, p::Real, x::Int)
return binomlogpdf(n, p, x),
Δ->(nothing, Δ * (x / p - (n - x) / (1 - p)), nothing)
end

## Poisson ##

ZygoteRules.@adjoint function poislogpdf(v::Real, x::Int)
return poislogpdf(v, x),
Δ->(Δ * (x/v - 1), nothing)
end


## PoissonBinomial ##

# Zygote loads ForwardDiff, so this dummy adjoint should never be needed.
Expand Down