Skip to content

Commit

Permalink
Release 3.2.2
Browse files Browse the repository at this point in the history
* Performance improvements
* More tests
  • Loading branch information
ojwoodford authored Sep 23, 2023
1 parent d268085 commit ced2471
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 84 deletions.
4 changes: 1 addition & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "NLLSsolver"
uuid = "50b5cf9e-bf7a-4e29-8981-4280cbba70cb"
authors = ["Oliver Woodford"]
version = "3.2.1"
version = "3.2.2"

This comment has been minimized.

Copy link
@ojwoodford

ojwoodford Sep 23, 2023

Author Owner

[deps]
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Expand All @@ -12,7 +12,6 @@ IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
LDLFactorizations = "40e66cde-538c-5869-a4ad-c39174c6795b"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"
Expand All @@ -25,7 +24,6 @@ HybridArrays = "0.4"
IfElse = "0.1.1"
LDLFactorizations = "0.10"
LoopVectorization = "0.12.165"
OrderedCollections = "1.6.2"
Static = "0.8.7"
StaticArrays = "1"
VisualGeometryDatasets = "0.2.1"
Expand Down
28 changes: 14 additions & 14 deletions src/VectorRepo.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
using OrderedCollections

struct VectorRepo{T}
data::OrderedDict{DataType, Vector}
data::Dict{DataType, Vector}
function VectorRepo{T}() where T
return new{T}(OrderedDict{DataType, Vector}())
return new{T}(Dict{DataType, Vector}())
end
end
VectorRepo() = VectorRepo{Any}()
Expand Down Expand Up @@ -70,27 +69,28 @@ end
@inline vrsum(fun, vr::VectorRepo, init, T::DataType) = sum(fun, get(vr, T); init=init)

# Sum reduction over a subset of the elements
@inline sumsubset(fun, subsetfun, vr::VectorRepo{Any}) = sum(fixallbutlast(sumsubset, fun, subsetfun), values(vr.data); init=0.0)
@inline sumsubset(fun, subsetfun, vr::VectorRepo{T}) where T = sumsubset(fun, subsetfun, vr, T)
@inline sumsubset(fun, subsetfun, vr::VectorRepo, T::Union) = sumsubset(fun, subsetfun, vr, T.a) + sumsubset(fun, subsetfun, vr, T.b)
@inline sumsubset(fun, subsetfun, vr::VectorRepo, T::DataType) = sumsubset(fun, subsetfun, get(vr, T))
@inline sumsubset(fun, subsetfun::Function, vector::Vector) = sumsubset(fun, subsetfun(vector), vector)
function sumsubset(fun, subset::Union{BitVector, Vector{Bool}}, vector::Vector)::Float64
sumsubset(fun, subsetfun, vr::VectorRepo{Any}) = sum(fixallbutlast(sumsubsetvec, fun, subsetfun), values(vr.data); init=0.0)
sumsubset(fun, subsetfun, vr::VectorRepo{T}) where T = sumsubsettype(fun, subsetfun, vr, T)
@inline sumsubsettype(fun, subsetfun, vr, T::Union) = sumsubsettype(fun, subsetfun, vr, T.a) + sumsubsettype(fun, subsetfun, vr, T.b)
@inline sumsubsettype(fun, subsetfun, vr, T::DataType) = sumsubsetparamtype(fun, subsetfun, vr, T)
@inline sumsubsetparamtype(fun, subsetfun, vr, ::Type{T}) where T = sumsubsetvec(fun, subsetfun, get(vr, T)::Vector{T})
@inline sumsubsetvec(fun, subsetfun, vector) = sumsubsetloop(fun, subsetfun(vector), vector)
function sumsubsetloop(fun, subset::Union{BitVector, Vector{Bool}}, vector)::Float64
total = 0.0
for (ind, val) in enumerate(subset)
if val
total += fun(vector[ind])
@fastmath total += @inbounds fun(vector[ind])::Float64
end
end
return total
end
function sumsubset(fun, subset, vector::Vector)::Float64
function sumsubsetloop(fun, subset, vector)::Float64
total = 0.0
for ind in subset
total += fun(vector[ind])
@simd for ind in subset
@fastmath total += @inbounds fun(vector[ind])::Float64
end
return total
end

# Map
Base.map(fun, vr::VectorRepo) = OrderedDict([eltype(val)=>fun(val) for val in values(vr)]...)
Base.map(fun, vr::VectorRepo) = Dict([eltype(val)=>fun(val) for val in values(vr)]...)
12 changes: 6 additions & 6 deletions src/linearsystem.jl
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,8 @@ end

function updatesymlinearsystem!(linsystem::UniVariateLS, g, H, unusedargs...)
# Update the blocks in the problem
linsystem.b .+= g
linsystem.A .+= H
@fastmath linsystem.b .+= g
@fastmath linsystem.A .+= H
end

function updatesymA!(A, a, vars, varflags, blockindices)
Expand All @@ -152,7 +152,7 @@ function updatesymA!(A, a, vars, varflags, blockindices)
nvi = nvars(vars[i])
rangei = SR(1, nvi) .+ loffseti
loffseti += nvi
@inbounds block(A, blockindices[i], blockindices[i], nvi, nvi) .+= view(a, rangei, rangei)
@fastmath @inbounds block(A, blockindices[i], blockindices[i], nvi, nvi) .+= view(a, rangei, rangei)

loffsetj = static(0)
@unroll for j in 1:i-1
Expand All @@ -161,9 +161,9 @@ function updatesymA!(A, a, vars, varflags, blockindices)
rangej = SR(1, nvj) .+ loffsetj
loffsetj += nvj
if @inbounds blockindices[i] >= blockindices[j] # Make sure the block matrix is lower triangular
@inbounds block(A, blockindices[i], blockindices[j], nvi, nvj) .+= view(a, rangei, rangej)
@fastmath @inbounds block(A, blockindices[i], blockindices[j], nvi, nvj) .+= view(a, rangei, rangej)
else
@inbounds block(A, blockindices[j], blockindices[i], nvj, nvi) .+= view(a, rangej, rangei)
@fastmath @inbounds block(A, blockindices[j], blockindices[i], nvj, nvi) .+= view(a, rangej, rangei)
end
end
end
Expand All @@ -178,7 +178,7 @@ function updateb!(B, b, vars, varflags, boffsets, blockindices)
if i <= length(vars) && bitiset(varflags, i)
nv = nvars(vars[i])
range = SR(0, nv-1)
@inbounds view(B, range .+ boffsets[blockindices[i]]) .+= view(b, range .+ loffset)
@fastmath @inbounds view(B, range .+ boffsets[blockindices[i]]) .+= view(b, range .+ loffset)
loffset += nv
end
end
Expand Down
5 changes: 3 additions & 2 deletions src/problem.jl
Original file line number Diff line number Diff line change
Expand Up @@ -160,11 +160,11 @@ function getvarcostmap(problem::NLLSProblem)
return problem.varcostmap
end

function reordercostsforschur(problem::NLLSProblem, schurvars)
function reordercostsforschur!(problem::NLLSProblem, schurvars)
# Group residuals by Schur variable (i.e. variables to be factored out)
# Get the cost index per Schur variable (0 if none)
schurruns = Dict{DataType, Vector{Int}}()
costvarmap = sparse(view(getvarcostmap(problem), schurvars, :)')
costvarmap = sparse(getvarcostmap(problem)')[:,schurvars]
schurvarpercost = sum(costvarmap; dims=2)
@assert all(x->(x<=1), schurvarpercost) "Each cost block can only depend on one schur variable at most"
schurvarpercost[schurvarpercost .> 0] = costvarmap.rowval
Expand All @@ -180,6 +180,7 @@ function reordercostsforschur(problem::NLLSProblem, schurvars)
permute!(costs, sortedorder)
end
end
problem.varcostmapvalid = false
return schurruns
end

Expand Down
54 changes: 21 additions & 33 deletions src/residual.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,52 +30,40 @@ function computerescostgradhess(varflags, residual, kernel, vars)
# Compute the unrobust cost, gradient and Hessian
cost = sqnorm(res)
g = jac' * res
H = jac' * jac

if varflags & 1 == 0
# The kernel is not optimized. Differentiate w.r.t. the cost only
cost, dc, d2c = robustifydcost(kernel, cost)

# IRLS reweighting of Hessian
if dc != 1
H *= dc
end
# Second order correction
if d2c != 0
H += ((2 * d2c) * g) * g'
end
# IRLS reweighting of gradient
if dc != 1
g *= dc
end

# Return the cost
return 0.5 * Float64(cost), g, H
else
# Differentiate w.r.t. the cost and kernel parameters
cost, dc_, d2c_ = robustifydkernel(kernel, cost)
dc = dc_[end]
d2c = d2c_[end,end]

# Compute the d^2/dkernel.dvariables block
kernelvarind = SR(1, nvars(kernel))
@fastmath dkdv = g * view(d2c_, kernelvarind, nvars(kernel)+1)'
end

# Differentiate w.r.t. the cost and kernel parameters
cost, dc, d2c = robustifydkernel(kernel, cost)

# Compute the d^2/dkernel.dvariables block
kernelvarind = SR(1, nvars(kernel))
dkdv = g * view(d2c, kernelvarind, nvars(kernel)+1)'

# IRLS reweighting of Hessian
if dc[end] != 1
H *= dc[end]
H = jac' * jac
if dc != 1
H *= dc
end
# Second order correction
if d2c[end] != 0
H += ((2 * d2c[end]) * g) * g'
if d2c != 0
H += ((2 * d2c) * g) * g'
end
# IRLS reweighting of gradient
if dc[end] != 1
g *= dc[end]
if dc != 1
g *= dc
end

# Add on the kernel derivative blocks
g = vcat(view(dc, kernelvarind), g)
H = hcat(vcat(view(d2c, kernelvarind, kernelvarind), dkdv), vcat(dkdv', H))
if varflags & 1 == 1
# Add on the kernel derivative blocks
g = vcat(view(dc_, kernelvarind), g)
H = hcat(vcat(view(d2c_, kernelvarind, kernelvarind), dkdv), vcat(dkdv', H))
end

# Return the cost and derivatives
return 0.5 * Float64(cost), g, H
Expand Down
10 changes: 8 additions & 2 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,14 @@ bitiset(flags, bit) = (1 << (bit - 1)) & flags != 0
@inline uniontotuple(T::Union) = (uniontotuple(T.a)..., uniontotuple(T.b)...)
@inline uniontotuple(T::DataType) = (T,)

@inline sqnorm(x) = (x' * x)
sqnorm(x::Number) = @fastmath x * x
function sqnorm(vec::AbstractVector)
total = zero(eltype(vec))
@turbo for i in eachindex(vec)
total += vec[i] * vec[i]
end
return total
end

function runlengthencodesortedints(sortedints)
runindices = Vector{Int}(undef, sortedints[end]+2)
Expand Down Expand Up @@ -97,4 +104,3 @@ function fast_bAb(A::SparseMatrixCSC, b::Vector)
end
return total
end

20 changes: 10 additions & 10 deletions test/VectorRepo.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,15 @@ end
vr1 = NLLSsolver.VectorRepo()
@test sum(i->2i, vr1) == 0.0
fillrepo(vr1, floats, ints)
@test isapprox(sum(i->π*i, vr1), total * π)
@test sum(i->π*i, vr1) total * π
vec = values(vr1)
@test length(vec) == 2 && any(Base.Fix2(isa, Vector{Float64}), vec) && any(Base.Fix2(isa, Vector{Int}), vec)

# Test subset reductions
@test NLLSsolver.sumsubset(identity, rangefun, vr1) == halftotal
@test NLLSsolver.sumsubset(identity, indicesfun, vr1) == halftotal
@test NLLSsolver.sumsubset(identity, bitvecfun, vr1) == halftotal
@test NLLSsolver.sumsubset(identity, boolvecfun, vr1) == halftotal
@test NLLSsolver.sumsubset(Float64, rangefun, vr1) == halftotal
@test NLLSsolver.sumsubset(Float64, indicesfun, vr1) == halftotal
@test NLLSsolver.sumsubset(Float64, bitvecfun, vr1) == halftotal
@test NLLSsolver.sumsubset(Float64, boolvecfun, vr1) == halftotal

# Union container
vr2 = NLLSsolver.VectorRepo{Union{Float64, Int, Char}}()
Expand All @@ -49,14 +49,14 @@ end
@test any(keytup .== Float64) && any(keytup .== Int) && any(keytup .== Char)
@test sum(i->2i, vr2) == 0.0
fillrepo(vr2, floats, ints)
@test isapprox(sum(i->π*i, vr2), total * π)
@test sum(i->π*i, vr2) total * π
valuetup = values(vr2)
@test isa(valuetup, Tuple) && length(valuetup) == 3
@test any(Base.Fix2(isa, Vector{Float64}), valuetup) && any(Base.Fix2(isa, Vector{Int}), valuetup) && any(Base.Fix2(isa, Vector{Char}), valuetup)

# Test subset reductions
@test NLLSsolver.sumsubset(identity, rangefun, vr2) == halftotal
@test NLLSsolver.sumsubset(identity, indicesfun, vr2) == halftotal
@test NLLSsolver.sumsubset(identity, bitvecfun, vr2) == halftotal
@test NLLSsolver.sumsubset(identity, boolvecfun, vr2) == halftotal
@test NLLSsolver.sumsubset(Float64, rangefun, vr2) == halftotal
@test NLLSsolver.sumsubset(Float64, indicesfun, vr2) == halftotal
@test NLLSsolver.sumsubset(Float64, bitvecfun, vr2) == halftotal
@test NLLSsolver.sumsubset(Float64, boolvecfun, vr2) == halftotal
end
32 changes: 18 additions & 14 deletions test/optimizeba.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,25 @@ Base.eltype(::ProjError) = Float64

function create_ba_problem(ncameras, nlandmarks, propvisible)
problem = NLLSProblem(Union{EffPose3D{Float64}, Point3D{Float64}}, ProjError)

# Generate the landmarks in a unit cube centered on the origin
for i = 1:nlandmarks
addvariable!(problem, Point3D{Float64}(rand(SVector{3, Float64}) .- 0.5))
end

# Generate the cameras on a unit sphere, pointing to the origin
for i = 1:ncameras
camcenter = normalize(randn(SVector{3, Float64}))
addvariable!(problem, EffPose3D(Rotation3DL(UnitVec3D(-camcenter).v.m), Point3D(camcenter)))
end

# Generate the landmarks in a unit cube centered on the origin
for i = 1:nlandmarks
addvariable!(problem, Point3D{Float64}(rand(SVector{3, Float64}) .- 0.5))
end

# Generate the measurements
visibility = sprand(ncameras, nlandmarks, propvisible)
for landmark = 1:nlandmarks
point = problem.variables[landmark]::Point3D{Float64}
for camera = view(visibility.rowval, visibility.colptr[landmark]:visibility.colptr[landmark+1]-1)
@assert camera <= ncameras
cam = camera + nlandmarks
addcost!(problem, ProjError(project(problem.variables[cam]::EffPose3D{Float64} * point), cam, landmark))
visibility = sprand(nlandmarks, ncameras, propvisible)
for camind = 1:ncameras
camera = problem.variables[camind]::EffPose3D{Float64}
for landmark = view(visibility.rowval, visibility.colptr[camind]:visibility.colptr[camind+1]-1)
landmarkind = landmark + ncameras
addcost!(problem, ProjError(project(camera * problem.variables[landmarkind]::Point3D{Float64}), camind, landmarkind))
end
end

Expand All @@ -55,11 +54,16 @@ end

@testset "optimizeba.jl" begin
# Generate some test data for a dense problem
Random.seed!(1)
Random.seed!(5)
problem = create_ba_problem(10, 100, 0.3)

# Optimze just the landmarks
# Test reordering the costs
problem = perturb_ba_problem(problem, 0.003, 0.0)
costbefore = cost(problem)
NLLSsolver.reordercostsforschur!(problem, isa.(problem.variables, Point3D{Float64}))
@test cost(problem) costbefore

# Optimze just the landmarks
result = NLLSsolver.optimizesingles!(problem, NLLSOptions(), Point3D{Float64})
@test result.bestcost < 1.e-15

Expand Down

1 comment on commit ced2471

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/92074

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v3.2.2 -m "<description of version>" ced2471c9225b176de4c800eca34fb903e1bb3f6
git push origin v3.2.2

Please sign in to comment.