Release 3.1

- Deprecate callback, storecosts and storetrajectory in options - Move callback out of options to explicit input argument - Use callback for getting costs/trajectory
ojwoodford · Sep 20, 2023 · 2fcf7b0 · ojwoodford · Sep 20, 2023 · 2fcf7b0
1 parent 2ef74d3
commit 2fcf7b0
Show file tree

Hide file tree

Showing 17 changed files with 228 additions and 87 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "NLLSsolver"
 uuid = "50b5cf9e-bf7a-4e29-8981-4280cbba70cb"
 authors = ["Oliver Woodford"]
-version = "3.1.2"
+version = "3.2.0"
 
 [deps]
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
@@ -11,6 +11,7 @@ HybridArrays = "1baab800-613f-4b0a-84e4-9cd3431bfbb9"
 IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
 LDLFactorizations = "40e66cde-538c-5869-a4ad-c39174c6795b"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
 OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
@@ -23,6 +24,7 @@ ForwardDiff = "0.10"
 HybridArrays = "0.4"
 IfElse = "0.1.1"
 LDLFactorizations = "0.10"
+LoopVectorization = "0.12.165"
 OrderedCollections = "1.6.2"
 Static = "0.8.7"
 StaticArrays = "1"

diff --git a/examples/rosenbrock.jl b/examples/rosenbrock.jl
@@ -46,25 +46,27 @@ end
 OptimResult() = OptimResult(Observable(Vector{Float64}()), Observable(Vector{Point2f}()))
 
 function runoptimizers!(results, problem, start, iterators)
+    costtrajectory = NLLSsolver.CostTrajectory()
     for (ind, iter) in enumerate(iterators)
         # Set the start
         problem.variables[1] = NLLSsolver.EuclideanVector(start[1], start[2])
 
         # Optimize the cost
-        options = NLLSsolver.NLLSOptions(reldcost=1.e-6, iterator=iter, storetrajectory=true, storecosts=true)
-        result = NLLSsolver.optimize!(problem, options)
+        options = NLLSsolver.NLLSOptions(reldcost=1.e-6, iterator=iter)
+        result = NLLSsolver.optimize!(problem, options, nothing, NLLSsolver.storecostscallback(costtrajectory))
 
         # Construct the trajectory
-        resize!(results[ind].trajectory.val, length(result.trajectory)+1)
+        resize!(results[ind].trajectory.val, length(costtrajectory.trajectory)+1)
         @inbounds results[ind].trajectory.val[1] = Point2f(start[1], start[2])
-        for (i, step) in enumerate(result.trajectory)
+        for (i, step) in enumerate(costtrajectory.trajectory)
             @inbounds results[ind].trajectory.val[i+1] = results[ind].trajectory.val[i] + Point2f(step[1], step[2])
         end
 
         # Set the costs
-        resize!(results[ind].costs.val, length(result.costs)+1)
+        resize!(results[ind].costs.val, length(costtrajectory.costs)+1)
         @inbounds results[ind].costs.val[1] = result.startcost
-        @inbounds results[ind].costs.val[2:end] .= max.(result.costs, 1.e-38)
+        @inbounds results[ind].costs.val[2:end] .= max.(costtrajectory.costs, 1.e-38)
+        empty!(costtrajectory)
     end
     for res in results
         notify(res.costs)

diff --git a/src/BlockDenseMatrix.jl b/src/BlockDenseMatrix.jl
@@ -11,11 +11,11 @@ end
 uniformscaling!(bdm::BlockDenseMatrix, k) = uniformscaling!(bdm.data, k)
 
 zero!(bdm::BlockDenseMatrix) = fill!(bdm.data, 0)
-block(bdm::BlockDenseMatrix, i, j, ::StaticInt{rows}, ::StaticInt{cols}) where {rows, cols} = SizedMatrix{rows, cols}(view(bdm.data, SR(0, rows-1).+bdm.rowblockoffsets[i], SR(0, cols-1).+bdm.columnblockoffsets[j]))
-block(bdm::BlockDenseMatrix, i, j, rows::Integer, cols::Integer) = view(bdm.data, SR(0, rows-1).+bdm.rowblockoffsets[i], SR(0, cols-1).+bdm.columnblockoffsets[j])
-block(bdm::BlockDenseMatrix, i, j, ::StaticInt{rows}, cols::Integer) where rows = HybridArray{Tuple{rows, StaticArrays.Dynamic()}}(block(bdm, i, j, rows, cols))
-block(bdm::BlockDenseMatrix, i, j) = block(bdm, i, j, bdm.rowblockoffsets[i+1]-bdm.rowblockoffsets[i], bdm.columnblockoffsets[j+1]-bdm.columnblockoffsets[j])
-validblock(::BlockDenseMatrix, ::Integer, ::Integer) = true
+@inline block(bdm::BlockDenseMatrix, i, j, ::StaticInt{rows}, ::StaticInt{cols}) where {rows, cols} = @inbounds SizedMatrix{rows, cols}(view(bdm.data, SR(0, rows-1).+bdm.rowblockoffsets[i], SR(0, cols-1).+bdm.columnblockoffsets[j]))
+@inline block(bdm::BlockDenseMatrix, i, j, rows::Integer, cols::Integer) = @inbounds view(bdm.data, SR(0, rows-1).+bdm.rowblockoffsets[i], SR(0, cols-1).+bdm.columnblockoffsets[j])
+@inline block(bdm::BlockDenseMatrix, i, j, ::StaticInt{rows}, cols::Integer) where rows = HybridArray{Tuple{rows, StaticArrays.Dynamic()}}(block(bdm, i, j, rows, cols))
+@inline block(bdm::BlockDenseMatrix, i, j) = @inbounds block(bdm, i, j, bdm.rowblockoffsets[i+1]-bdm.rowblockoffsets[i], bdm.columnblockoffsets[j+1]-bdm.columnblockoffsets[j])
+@inline validblock(::BlockDenseMatrix, ::Integer, ::Integer) = true
 Base.size(bdm::BlockDenseMatrix) = size(bdm.data)
 Base.size(bdm::BlockDenseMatrix, dim) = size(bdm.data, dim)
 Base.length(bdm::BlockDenseMatrix) = length(bdm.data)

diff --git a/src/BlockSparseMatrix.jl b/src/BlockSparseMatrix.jl
@@ -99,11 +99,11 @@ function uniformscaling!(M::BlockSparseMatrix, k)
 end
 
 zero!(bsm::BlockSparseMatrix) = fill!(bsm.data, 0)
-block(bsm::BlockSparseMatrix, i, j, ::StaticInt{rows}, ::StaticInt{cols}) where {rows, cols} = SizedMatrix{rows, cols}(view(bsm.data, SR(0, rows*cols-1).+bsm.indicestransposed[j,i]))
-block(bsm::BlockSparseMatrix, i, j, rows::Integer, cols::Integer) = reshape(view(bsm.data, SR(0, rows*cols-1).+bsm.indicestransposed[j,i]), (rows, cols))
+block(bsm::BlockSparseMatrix, i, j, ::StaticInt{rows}, ::StaticInt{cols}) where {rows, cols} = @inbounds SizedMatrix{rows, cols}(view(bsm.data, SR(0, rows*cols-1).+bsm.indicestransposed[j,i]))
+block(bsm::BlockSparseMatrix, i, j, rows::Integer, cols::Integer) = @inbounds reshape(view(bsm.data, SR(0, rows*cols-1).+bsm.indicestransposed[j,i]), (rows, cols))
 block(bsm::BlockSparseMatrix, i, j, ::StaticInt{rows}, cols::Integer) where rows = HybridArray{Tuple{rows, StaticArrays.Dynamic()}}(block(bsm, i, j, rows, cols))
-block(bsm::BlockSparseMatrix, i, j) = block(bsm, i, j, bsm.rowblocksizes[i], bsm.columnblocksizes[j])
-validblock(bsm::BlockSparseMatrix, i, j) = bsm.indicestransposed[j,i] != 0
+block(bsm::BlockSparseMatrix, i, j) = @inbounds block(bsm, i, j, bsm.rowblocksizes[i], bsm.columnblocksizes[j])
+validblock(bsm::BlockSparseMatrix, i, j) = @inbounds bsm.indicestransposed[j,i] != 0
 Base.size(bsm::BlockSparseMatrix) = (bsm.m, bsm.n)
 Base.size(bsm::BlockSparseMatrix, dim) = dim == 1 ? bsm.m : (dim == 2 ? bsm.n : 1)
 Base.length(bsm::BlockSparseMatrix) = bsm.m * bsm.n

diff --git a/src/NLLSsolver.jl b/src/NLLSsolver.jl
@@ -8,15 +8,17 @@ export ContaminatedGaussian # Adaptive robustifiers
 export EuclideanVector, ZeroToInfScalar, ZeroToOneScalar # Concrete general variables
 export Rotation3DR, Rotation3DL, Point3D, Pose3D, EffPose3D, UnitPose3D # Concrete 3D geometry variable types
 export SimpleCamera, NoDistortionCamera, ExtendedUnifiedCamera, BarrelDistortion, EULensDistortion # Concrete camera sensor & lens variable types
+export CostTrajectory # Object for storing opimization trajectory and costs
 # Functions
 export addcost!, addvariable!, updatevarcostmap!, subproblem, subproblem!, nvars, nres # Construct a problem
 export update, nvars # Variable interface
 export nres, ndeps, varindices, getvars # Residual interface
 export robustkernel, robustify, robustifydcost, robustifydkernel # Robustifier interface
 export cost, computeresidual, computeresjac, computecost, computecostgradhess # Compute the objective
-export optimize!, printoutcallback  # Optimize the objective
+export optimize!  # Optimize the objective
 export rodrigues, project, epipolarerror, proj2orthonormal # Multi-view geometry helper functions
 export ideal2image, image2ideal, pixel2image, image2pixel, ideal2distorted, distorted2ideal, convertlens
+export nullcallback, printoutcallback, storecostscallback # Callbacks to be called once per optimization iteration
 
 # Exported abstract types
 abstract type AbstractCost end  # Standard (non-squared) cost
@@ -57,6 +59,7 @@ include("robustadaptive.jl")
 include("autodiff.jl")
 include("cost.jl")
 include("linearsolver.jl")
+include("callbacks.jl")
 include("iterators.jl")
 include("optimize.jl")
 end
diff --git a/src/autodiff.jl b/src/autodiff.jl
@@ -162,4 +162,24 @@ computegradhesshelper(varflags, residual, vars, x) = computecost(residual, updat
 
 @inline autorobustifydcost(kernel::AbstractRobustifier, cost) = computehessian(Base.Fix1(robustify, kernel), cost)
 @inline autorobustifydkernel(kernel::AbstractAdaptiveRobustifier, cost::T) where T = computehessian(fixallbutlast(computerobustgradhesshelper, kernel, cost), zeros(SVector{nvars(kernel)+1, T}))
-computerobustgradhesshelper(kernel, cost, x) = robustify(update(kernel, x), cost+x[end])
+computerobustgradhesshelper(kernel, cost, x) = robustify(update(kernel, x), cost+x[end])
+
+# Overloads of some specific functions
+struct RotMatLie{T}
+    x::T
+    y::T
+    z::T
+end
+
+function rodrigues(x::T, y::T, z::T) where T<:ForwardDiff.Dual
+    @assert x == 0 && y == 0 && z == 0
+    return RotMatLie{T}(x, y, z)
+end
+
+du(x) = ForwardDiff.partials(x)
+Base.:*(r::AbstractMatrix, u::RotMatLie{T}) where T = SMatrix{3, 3, T, 9}(T(r[1,1], du(u.z)*r[1,2]-du(u.y)*r[1,3]), T(r[2,1], du(u.z)*r[2,2]-du(u.y)*r[2,3]), T(r[3,1], du(u.z)*r[3,2]-du(u.y)*r[3,3]),
+                                                                          T(r[1,2], du(u.x)*r[1,3]-du(u.z)*r[1,1]), T(r[2,2], du(u.x)*r[2,3]-du(u.z)*r[2,1]), T(r[3,2], du(u.x)*r[3,3]-du(u.z)*r[3,1]),
+                                                                          T(r[1,3], du(u.y)*r[1,1]-du(u.x)*r[1,2]), T(r[2,3], du(u.y)*r[2,1]-du(u.x)*r[2,2]), T(r[3,3], du(u.y)*r[3,1]-du(u.x)*r[3,2]))
+Base.:*(u::RotMatLie{T}, r::AbstractMatrix) where T = SMatrix{3, 3, T, 9}(T(r[1,1], du(u.y)*r[3,1]-du(u.z)*r[2,1]), T(r[2,1], du(u.z)*r[1,1]-du(u.x)*r[3,1]), T(r[3,1], du(u.x)*r[2,1]-du(u.y)*r[1,1]),
+                                                                          T(r[1,2], du(u.y)*r[3,2]-du(u.z)*r[2,2]), T(r[2,2], du(u.z)*r[1,2]-du(u.x)*r[3,2]), T(r[3,2], du(u.x)*r[2,2]-du(u.y)*r[1,2]),
+                                                                          T(r[1,3], du(u.y)*r[3,3]-du(u.z)*r[2,3]), T(r[2,3], du(u.z)*r[1,3]-du(u.x)*r[3,3]), T(r[3,3], du(u.x)*r[2,3]-du(u.y)*r[1,3]))
diff --git a/src/callbacks.jl b/src/callbacks.jl
@@ -0,0 +1,54 @@
+import Printf.@printf
+
+# Default: do nothing
+nullcallback(cost, unusedargs...) = (cost, 0)
+
+# Print out per-iteration results
+function printoutcallback(cost, problem, data, trailingargs...)
+    if data.iternum == 1
+        # First iteration, so print out column headers and the zeroth iteration (i.e. start) values
+        println("iter      cost      cost_change    |step|")
+        @printf("% 4d % 8e  % 4.3e   % 3.2e\n", 0, data.bestcost, 0, 0)
+    end
+    @printf("% 4d % 8e  % 4.3e   % 3.2e\n", data.iternum, cost, data.bestcost-cost, norm(data.linsystem.x))
+    return cost, 0
+end
+function printoutcallback(cost, data, trradius::Float64)
+    if data.iternum == 1
+        # First iteration, so print out column headers and the zeroth iteration (i.e. start) values
+        println("iter      cost      cost_change    |step|    tr_radius")
+        @printf("% 4d % 8e  % 4.3e   % 3.2e   % 2.1e\n", 0, data.bestcost, 0, 0, trradius)
+    end
+    @printf("% 4d % 8e  % 4.3e   % 3.2e   % 2.1e\n", data.iternum, cost, data.bestcost-cost, norm(data.linsystem.x), trradius)
+    return cost, 0
+end
+
+# Store per-iteration costs
+function storecostscallback(costs::Vector{Float64}, cost, unusedargs...)
+    push!(costs, cost)
+    return (cost, 0)
+end
+
+struct CostTrajectory
+    costs::Vector{Float64}
+    trajectory::Vector{Vector{Float64}}
+
+    function CostTrajectory()
+        return new(sizehint!(Vector{Float64}(), 50), sizehint!(Vector{Vector{Float64}}(), 50))
+    end
+end
+function Base.empty!(ct::CostTrajectory)
+    empty!(ct.costs)
+    empty!(ct.trajectory)
+    return ct
+end
+
+# Store per-iteration costs and trajectory
+function storecostscallback(store::CostTrajectory, cost, problem, data, unusedargs...)
+    push!(store.costs, cost)
+    push!(store.trajectory, Vector{Float64}(data.linsystem.x))
+    return (cost, 0)
+end
+
+# Callback function generator
+storecostscallback(store) = (args...) -> storecostscallback(store, args...)
diff --git a/src/iterators.jl b/src/iterators.jl
@@ -24,9 +24,10 @@ end
 # Dogleg optimization
 mutable struct DoglegData
     trustradius::Float64
+    cauchy::Vector{Float64}
 
-    function DoglegData()
-        return new(0.0)
+    function DoglegData(varlen)
+        return new(0.0, Vector{Float64}(undef, varlen))
     end
 end
 
@@ -35,8 +36,8 @@ function iterate!(doglegdata::DoglegData, data, problem::NLLSProblem, options::N
     data.timesolver += @elapsed_ns begin
         # Compute the Cauchy step
         gnorm2 = gradient' * gradient
-        a = gnorm2 / ((gradient' * hessian) * gradient + floatmin(eltype(gradient)))
-        cauchy = -a * gradient
+        a = gnorm2 / (fast_bAb(hessian, gradient) + floatmin(eltype(gradient)))
+        doglegdata.cauchy = -a * gradient
         alpha2 = a * a * gnorm2
         alpha = sqrt(alpha2)
         if doglegdata.trustradius == 0
@@ -55,7 +56,7 @@ function iterate!(doglegdata::DoglegData, data, problem::NLLSProblem, options::N
         # Determine the step
         if !(alpha < doglegdata.trustradius)
             # Along first leg
-            data.linsystem.x .= (doglegdata.trustradius / alpha) * cauchy
+            data.linsystem.x .= (doglegdata.trustradius / alpha) * doglegdata.cauchy
             linear_approx = doglegdata.trustradius * (2 * alpha - doglegdata.trustradius) / (2 * a)
         else
             # Along second leg
@@ -65,9 +66,9 @@ function iterate!(doglegdata::DoglegData, data, problem::NLLSProblem, options::N
             else
                 # Find the point along the Cauchy -> Newton line on the trust
                 # region circumference
-                data.linsystem.x .-= cauchy
+                data.linsystem.x .-= doglegdata.cauchy
                 sq_leg = data.linsystem.x' * data.linsystem.x
-                c = cauchy' * data.linsystem.x
+                c = doglegdata.cauchy' * data.linsystem.x
                 trsq = doglegdata.trustradius * doglegdata.trustradius - alpha2
                 step = sqrt(c * c + sq_leg * trsq)
                 if c <= 0
@@ -76,7 +77,7 @@ function iterate!(doglegdata::DoglegData, data, problem::NLLSProblem, options::N
                     step = trsq / (c + step)
                 end
                 data.linsystem.x .*= step
-                data.linsystem.x .+= cauchy
+                data.linsystem.x .+= doglegdata.cauchy
                 linear_approx = 0.5 * (a * (1 - step) ^ 2 * gnorm2) + step * (2 - step) * cost_
             end
         end
@@ -132,7 +133,7 @@ function iterate!(levmardata::LevMarData, data, problem::NLLSProblem, options::N
         if !(cost_ > data.bestcost) || (maximum(abs, data.linsystem.x) < options.dstep)
             # Success (or convergence) - update lambda
             uniformscaling!(hessian, -lastlambda)
-            stepquality = (cost_ - data.bestcost) / (((data.linsystem.x' * hessian) * 0.5 + gradient') * data.linsystem.x)
+            stepquality = (cost_ - data.bestcost) / (0.5 * fast_bAb(hessian, data.linsystem.x) + dot(gradient, data.linsystem.x))
             levmardata.lambda *= stepquality < 0.983 ? 1 - (2 * stepquality - 1) ^ 3 : 0.1
             # Return the cost
             return cost_