diff --git a/Project.toml b/Project.toml index 54c68a3c..421fab33 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "NLLSsolver" uuid = "50b5cf9e-bf7a-4e29-8981-4280cbba70cb" authors = ["Oliver Woodford"] -version = "3.1.2" +version = "3.2.0" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" @@ -11,6 +11,7 @@ HybridArrays = "1baab800-613f-4b0a-84e4-9cd3431bfbb9" IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" LDLFactorizations = "40e66cde-538c-5869-a4ad-c39174c6795b" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" @@ -23,6 +24,7 @@ ForwardDiff = "0.10" HybridArrays = "0.4" IfElse = "0.1.1" LDLFactorizations = "0.10" +LoopVectorization = "0.12.165" OrderedCollections = "1.6.2" Static = "0.8.7" StaticArrays = "1" diff --git a/examples/rosenbrock.jl b/examples/rosenbrock.jl index e9ce5c34..d21d16be 100644 --- a/examples/rosenbrock.jl +++ b/examples/rosenbrock.jl @@ -46,25 +46,27 @@ end OptimResult() = OptimResult(Observable(Vector{Float64}()), Observable(Vector{Point2f}())) function runoptimizers!(results, problem, start, iterators) + costtrajectory = NLLSsolver.CostTrajectory() for (ind, iter) in enumerate(iterators) # Set the start problem.variables[1] = NLLSsolver.EuclideanVector(start[1], start[2]) # Optimize the cost - options = NLLSsolver.NLLSOptions(reldcost=1.e-6, iterator=iter, storetrajectory=true, storecosts=true) - result = NLLSsolver.optimize!(problem, options) + options = NLLSsolver.NLLSOptions(reldcost=1.e-6, iterator=iter) + result = NLLSsolver.optimize!(problem, options, nothing, NLLSsolver.storecostscallback(costtrajectory)) # Construct the trajectory - resize!(results[ind].trajectory.val, length(result.trajectory)+1) + resize!(results[ind].trajectory.val, length(costtrajectory.trajectory)+1) @inbounds results[ind].trajectory.val[1] = Point2f(start[1], start[2]) - for (i, step) in enumerate(result.trajectory) + for (i, step) in enumerate(costtrajectory.trajectory) @inbounds results[ind].trajectory.val[i+1] = results[ind].trajectory.val[i] + Point2f(step[1], step[2]) end # Set the costs - resize!(results[ind].costs.val, length(result.costs)+1) + resize!(results[ind].costs.val, length(costtrajectory.costs)+1) @inbounds results[ind].costs.val[1] = result.startcost - @inbounds results[ind].costs.val[2:end] .= max.(result.costs, 1.e-38) + @inbounds results[ind].costs.val[2:end] .= max.(costtrajectory.costs, 1.e-38) + empty!(costtrajectory) end for res in results notify(res.costs) diff --git a/src/BlockDenseMatrix.jl b/src/BlockDenseMatrix.jl index d671e7f6..e9963a9d 100644 --- a/src/BlockDenseMatrix.jl +++ b/src/BlockDenseMatrix.jl @@ -11,11 +11,11 @@ end uniformscaling!(bdm::BlockDenseMatrix, k) = uniformscaling!(bdm.data, k) zero!(bdm::BlockDenseMatrix) = fill!(bdm.data, 0) -block(bdm::BlockDenseMatrix, i, j, ::StaticInt{rows}, ::StaticInt{cols}) where {rows, cols} = SizedMatrix{rows, cols}(view(bdm.data, SR(0, rows-1).+bdm.rowblockoffsets[i], SR(0, cols-1).+bdm.columnblockoffsets[j])) -block(bdm::BlockDenseMatrix, i, j, rows::Integer, cols::Integer) = view(bdm.data, SR(0, rows-1).+bdm.rowblockoffsets[i], SR(0, cols-1).+bdm.columnblockoffsets[j]) -block(bdm::BlockDenseMatrix, i, j, ::StaticInt{rows}, cols::Integer) where rows = HybridArray{Tuple{rows, StaticArrays.Dynamic()}}(block(bdm, i, j, rows, cols)) -block(bdm::BlockDenseMatrix, i, j) = block(bdm, i, j, bdm.rowblockoffsets[i+1]-bdm.rowblockoffsets[i], bdm.columnblockoffsets[j+1]-bdm.columnblockoffsets[j]) -validblock(::BlockDenseMatrix, ::Integer, ::Integer) = true +@inline block(bdm::BlockDenseMatrix, i, j, ::StaticInt{rows}, ::StaticInt{cols}) where {rows, cols} = @inbounds SizedMatrix{rows, cols}(view(bdm.data, SR(0, rows-1).+bdm.rowblockoffsets[i], SR(0, cols-1).+bdm.columnblockoffsets[j])) +@inline block(bdm::BlockDenseMatrix, i, j, rows::Integer, cols::Integer) = @inbounds view(bdm.data, SR(0, rows-1).+bdm.rowblockoffsets[i], SR(0, cols-1).+bdm.columnblockoffsets[j]) +@inline block(bdm::BlockDenseMatrix, i, j, ::StaticInt{rows}, cols::Integer) where rows = HybridArray{Tuple{rows, StaticArrays.Dynamic()}}(block(bdm, i, j, rows, cols)) +@inline block(bdm::BlockDenseMatrix, i, j) = @inbounds block(bdm, i, j, bdm.rowblockoffsets[i+1]-bdm.rowblockoffsets[i], bdm.columnblockoffsets[j+1]-bdm.columnblockoffsets[j]) +@inline validblock(::BlockDenseMatrix, ::Integer, ::Integer) = true Base.size(bdm::BlockDenseMatrix) = size(bdm.data) Base.size(bdm::BlockDenseMatrix, dim) = size(bdm.data, dim) Base.length(bdm::BlockDenseMatrix) = length(bdm.data) diff --git a/src/BlockSparseMatrix.jl b/src/BlockSparseMatrix.jl index f9ffb2ff..70c1ce70 100644 --- a/src/BlockSparseMatrix.jl +++ b/src/BlockSparseMatrix.jl @@ -99,11 +99,11 @@ function uniformscaling!(M::BlockSparseMatrix, k) end zero!(bsm::BlockSparseMatrix) = fill!(bsm.data, 0) -block(bsm::BlockSparseMatrix, i, j, ::StaticInt{rows}, ::StaticInt{cols}) where {rows, cols} = SizedMatrix{rows, cols}(view(bsm.data, SR(0, rows*cols-1).+bsm.indicestransposed[j,i])) -block(bsm::BlockSparseMatrix, i, j, rows::Integer, cols::Integer) = reshape(view(bsm.data, SR(0, rows*cols-1).+bsm.indicestransposed[j,i]), (rows, cols)) +block(bsm::BlockSparseMatrix, i, j, ::StaticInt{rows}, ::StaticInt{cols}) where {rows, cols} = @inbounds SizedMatrix{rows, cols}(view(bsm.data, SR(0, rows*cols-1).+bsm.indicestransposed[j,i])) +block(bsm::BlockSparseMatrix, i, j, rows::Integer, cols::Integer) = @inbounds reshape(view(bsm.data, SR(0, rows*cols-1).+bsm.indicestransposed[j,i]), (rows, cols)) block(bsm::BlockSparseMatrix, i, j, ::StaticInt{rows}, cols::Integer) where rows = HybridArray{Tuple{rows, StaticArrays.Dynamic()}}(block(bsm, i, j, rows, cols)) -block(bsm::BlockSparseMatrix, i, j) = block(bsm, i, j, bsm.rowblocksizes[i], bsm.columnblocksizes[j]) -validblock(bsm::BlockSparseMatrix, i, j) = bsm.indicestransposed[j,i] != 0 +block(bsm::BlockSparseMatrix, i, j) = @inbounds block(bsm, i, j, bsm.rowblocksizes[i], bsm.columnblocksizes[j]) +validblock(bsm::BlockSparseMatrix, i, j) = @inbounds bsm.indicestransposed[j,i] != 0 Base.size(bsm::BlockSparseMatrix) = (bsm.m, bsm.n) Base.size(bsm::BlockSparseMatrix, dim) = dim == 1 ? bsm.m : (dim == 2 ? bsm.n : 1) Base.length(bsm::BlockSparseMatrix) = bsm.m * bsm.n diff --git a/src/NLLSsolver.jl b/src/NLLSsolver.jl index 1bc2aef2..80ef63e9 100644 --- a/src/NLLSsolver.jl +++ b/src/NLLSsolver.jl @@ -8,15 +8,17 @@ export ContaminatedGaussian # Adaptive robustifiers export EuclideanVector, ZeroToInfScalar, ZeroToOneScalar # Concrete general variables export Rotation3DR, Rotation3DL, Point3D, Pose3D, EffPose3D, UnitPose3D # Concrete 3D geometry variable types export SimpleCamera, NoDistortionCamera, ExtendedUnifiedCamera, BarrelDistortion, EULensDistortion # Concrete camera sensor & lens variable types +export CostTrajectory # Object for storing opimization trajectory and costs # Functions export addcost!, addvariable!, updatevarcostmap!, subproblem, subproblem!, nvars, nres # Construct a problem export update, nvars # Variable interface export nres, ndeps, varindices, getvars # Residual interface export robustkernel, robustify, robustifydcost, robustifydkernel # Robustifier interface export cost, computeresidual, computeresjac, computecost, computecostgradhess # Compute the objective -export optimize!, printoutcallback # Optimize the objective +export optimize! # Optimize the objective export rodrigues, project, epipolarerror, proj2orthonormal # Multi-view geometry helper functions export ideal2image, image2ideal, pixel2image, image2pixel, ideal2distorted, distorted2ideal, convertlens +export nullcallback, printoutcallback, storecostscallback # Callbacks to be called once per optimization iteration # Exported abstract types abstract type AbstractCost end # Standard (non-squared) cost @@ -57,6 +59,7 @@ include("robustadaptive.jl") include("autodiff.jl") include("cost.jl") include("linearsolver.jl") +include("callbacks.jl") include("iterators.jl") include("optimize.jl") end diff --git a/src/autodiff.jl b/src/autodiff.jl index 180d3bc9..1d606fa5 100644 --- a/src/autodiff.jl +++ b/src/autodiff.jl @@ -162,4 +162,24 @@ computegradhesshelper(varflags, residual, vars, x) = computecost(residual, updat @inline autorobustifydcost(kernel::AbstractRobustifier, cost) = computehessian(Base.Fix1(robustify, kernel), cost) @inline autorobustifydkernel(kernel::AbstractAdaptiveRobustifier, cost::T) where T = computehessian(fixallbutlast(computerobustgradhesshelper, kernel, cost), zeros(SVector{nvars(kernel)+1, T})) -computerobustgradhesshelper(kernel, cost, x) = robustify(update(kernel, x), cost+x[end]) \ No newline at end of file +computerobustgradhesshelper(kernel, cost, x) = robustify(update(kernel, x), cost+x[end]) + +# Overloads of some specific functions +struct RotMatLie{T} + x::T + y::T + z::T +end + +function rodrigues(x::T, y::T, z::T) where T<:ForwardDiff.Dual + @assert x == 0 && y == 0 && z == 0 + return RotMatLie{T}(x, y, z) +end + +du(x) = ForwardDiff.partials(x) +Base.:*(r::AbstractMatrix, u::RotMatLie{T}) where T = SMatrix{3, 3, T, 9}(T(r[1,1], du(u.z)*r[1,2]-du(u.y)*r[1,3]), T(r[2,1], du(u.z)*r[2,2]-du(u.y)*r[2,3]), T(r[3,1], du(u.z)*r[3,2]-du(u.y)*r[3,3]), + T(r[1,2], du(u.x)*r[1,3]-du(u.z)*r[1,1]), T(r[2,2], du(u.x)*r[2,3]-du(u.z)*r[2,1]), T(r[3,2], du(u.x)*r[3,3]-du(u.z)*r[3,1]), + T(r[1,3], du(u.y)*r[1,1]-du(u.x)*r[1,2]), T(r[2,3], du(u.y)*r[2,1]-du(u.x)*r[2,2]), T(r[3,3], du(u.y)*r[3,1]-du(u.x)*r[3,2])) +Base.:*(u::RotMatLie{T}, r::AbstractMatrix) where T = SMatrix{3, 3, T, 9}(T(r[1,1], du(u.y)*r[3,1]-du(u.z)*r[2,1]), T(r[2,1], du(u.z)*r[1,1]-du(u.x)*r[3,1]), T(r[3,1], du(u.x)*r[2,1]-du(u.y)*r[1,1]), + T(r[1,2], du(u.y)*r[3,2]-du(u.z)*r[2,2]), T(r[2,2], du(u.z)*r[1,2]-du(u.x)*r[3,2]), T(r[3,2], du(u.x)*r[2,2]-du(u.y)*r[1,2]), + T(r[1,3], du(u.y)*r[3,3]-du(u.z)*r[2,3]), T(r[2,3], du(u.z)*r[1,3]-du(u.x)*r[3,3]), T(r[3,3], du(u.x)*r[2,3]-du(u.y)*r[1,3])) diff --git a/src/callbacks.jl b/src/callbacks.jl new file mode 100644 index 00000000..2f041e7f --- /dev/null +++ b/src/callbacks.jl @@ -0,0 +1,54 @@ +import Printf.@printf + +# Default: do nothing +nullcallback(cost, unusedargs...) = (cost, 0) + +# Print out per-iteration results +function printoutcallback(cost, problem, data, trailingargs...) + if data.iternum == 1 + # First iteration, so print out column headers and the zeroth iteration (i.e. start) values + println("iter cost cost_change |step|") + @printf("% 4d % 8e % 4.3e % 3.2e\n", 0, data.bestcost, 0, 0) + end + @printf("% 4d % 8e % 4.3e % 3.2e\n", data.iternum, cost, data.bestcost-cost, norm(data.linsystem.x)) + return cost, 0 +end +function printoutcallback(cost, data, trradius::Float64) + if data.iternum == 1 + # First iteration, so print out column headers and the zeroth iteration (i.e. start) values + println("iter cost cost_change |step| tr_radius") + @printf("% 4d % 8e % 4.3e % 3.2e % 2.1e\n", 0, data.bestcost, 0, 0, trradius) + end + @printf("% 4d % 8e % 4.3e % 3.2e % 2.1e\n", data.iternum, cost, data.bestcost-cost, norm(data.linsystem.x), trradius) + return cost, 0 +end + +# Store per-iteration costs +function storecostscallback(costs::Vector{Float64}, cost, unusedargs...) + push!(costs, cost) + return (cost, 0) +end + +struct CostTrajectory + costs::Vector{Float64} + trajectory::Vector{Vector{Float64}} + + function CostTrajectory() + return new(sizehint!(Vector{Float64}(), 50), sizehint!(Vector{Vector{Float64}}(), 50)) + end +end +function Base.empty!(ct::CostTrajectory) + empty!(ct.costs) + empty!(ct.trajectory) + return ct +end + +# Store per-iteration costs and trajectory +function storecostscallback(store::CostTrajectory, cost, problem, data, unusedargs...) + push!(store.costs, cost) + push!(store.trajectory, Vector{Float64}(data.linsystem.x)) + return (cost, 0) +end + +# Callback function generator +storecostscallback(store) = (args...) -> storecostscallback(store, args...) diff --git a/src/iterators.jl b/src/iterators.jl index 00646c32..414e9d68 100644 --- a/src/iterators.jl +++ b/src/iterators.jl @@ -24,9 +24,10 @@ end # Dogleg optimization mutable struct DoglegData trustradius::Float64 + cauchy::Vector{Float64} - function DoglegData() - return new(0.0) + function DoglegData(varlen) + return new(0.0, Vector{Float64}(undef, varlen)) end end @@ -35,8 +36,8 @@ function iterate!(doglegdata::DoglegData, data, problem::NLLSProblem, options::N data.timesolver += @elapsed_ns begin # Compute the Cauchy step gnorm2 = gradient' * gradient - a = gnorm2 / ((gradient' * hessian) * gradient + floatmin(eltype(gradient))) - cauchy = -a * gradient + a = gnorm2 / (fast_bAb(hessian, gradient) + floatmin(eltype(gradient))) + doglegdata.cauchy = -a * gradient alpha2 = a * a * gnorm2 alpha = sqrt(alpha2) if doglegdata.trustradius == 0 @@ -55,7 +56,7 @@ function iterate!(doglegdata::DoglegData, data, problem::NLLSProblem, options::N # Determine the step if !(alpha < doglegdata.trustradius) # Along first leg - data.linsystem.x .= (doglegdata.trustradius / alpha) * cauchy + data.linsystem.x .= (doglegdata.trustradius / alpha) * doglegdata.cauchy linear_approx = doglegdata.trustradius * (2 * alpha - doglegdata.trustradius) / (2 * a) else # Along second leg @@ -65,9 +66,9 @@ function iterate!(doglegdata::DoglegData, data, problem::NLLSProblem, options::N else # Find the point along the Cauchy -> Newton line on the trust # region circumference - data.linsystem.x .-= cauchy + data.linsystem.x .-= doglegdata.cauchy sq_leg = data.linsystem.x' * data.linsystem.x - c = cauchy' * data.linsystem.x + c = doglegdata.cauchy' * data.linsystem.x trsq = doglegdata.trustradius * doglegdata.trustradius - alpha2 step = sqrt(c * c + sq_leg * trsq) if c <= 0 @@ -76,7 +77,7 @@ function iterate!(doglegdata::DoglegData, data, problem::NLLSProblem, options::N step = trsq / (c + step) end data.linsystem.x .*= step - data.linsystem.x .+= cauchy + data.linsystem.x .+= doglegdata.cauchy linear_approx = 0.5 * (a * (1 - step) ^ 2 * gnorm2) + step * (2 - step) * cost_ end end @@ -132,7 +133,7 @@ function iterate!(levmardata::LevMarData, data, problem::NLLSProblem, options::N if !(cost_ > data.bestcost) || (maximum(abs, data.linsystem.x) < options.dstep) # Success (or convergence) - update lambda uniformscaling!(hessian, -lastlambda) - stepquality = (cost_ - data.bestcost) / (((data.linsystem.x' * hessian) * 0.5 + gradient') * data.linsystem.x) + stepquality = (cost_ - data.bestcost) / (0.5 * fast_bAb(hessian, data.linsystem.x) + dot(gradient, data.linsystem.x)) levmardata.lambda *= stepquality < 0.983 ? 1 - (2 * stepquality - 1) ^ 3 : 0.1 # Return the cost return cost_ diff --git a/src/optimize.jl b/src/optimize.jl index 7dde5663..49102389 100644 --- a/src/optimize.jl +++ b/src/optimize.jl @@ -1,10 +1,9 @@ # Uni-variate optimization (single unfixed variable) -optimize!(problem::NLLSProblem, options::NLLSOptions, unfixed::Integer, starttimens=Base.time_ns())::NLLSResult = optimizeinternal!(problem, options, NLLSInternal(UInt(unfixed), nvars(problem.variables[unfixed])), starttimens) +optimize!(problem::NLLSProblem, options::NLLSOptions, unfixed::Integer, callback=nullcallback, starttimens=Base.time_ns())::NLLSResult = optimizeinternal_checkcallback!(problem, options, NLLSInternal(UInt(unfixed), nvars(problem.variables[unfixed])), callback, starttimens) # Multi-variate optimization -optimize!(problem::NLLSProblem, options::NLLSOptions, unfixed::Type) = optimize!(problem, options, typeof.(variables).==unfixed) -function optimize!(problem::NLLSProblem, options::NLLSOptions=NLLSOptions(), unfixed::AbstractVector=trues(length(problem.variables)))::NLLSResult +function optimize!(problem::NLLSProblem, options::NLLSOptions, unfixed::AbstractVector, callback)::NLLSResult starttime = Base.time_ns() @assert length(problem.variables) > 0 # Compute the number of free variables (nblocks) @@ -12,12 +11,20 @@ function optimize!(problem::NLLSProblem, options::NLLSOptions=NLLSOptions(), unf if nblocks == 1 # One unfixed variable unfixed = findfirst(unfixed) - return optimize!(problem, options, unfixed, starttime) + return optimize!(problem, options, unfixed, callback, starttime) end # Multiple variables. Use a block sparse matrix - return optimizeinternal!(problem, options, NLLSInternal(makesymmvls(problem, unfixed, nblocks)), starttime) + return optimizeinternal_checkcallback!(problem, options, NLLSInternal(makesymmvls(problem, unfixed, nblocks)), callback, starttime) end +# Conversions for different types of "unfixed" +convertunfixed(::Nothing, problem) = trues(length(problem.variables)) +convertunfixed(unfixed::Type, problem) = typeof.(problem.variables) .== unfixed +convertunfixed(unfixed, problem) = unfixed + +# Default options +optimize!(problem::NLLSProblem, options::NLLSOptions=NLLSOptions(), unfixed=nothing, callback=nullcallback) = optimize!(problem, options, convertunfixed(unfixed, problem), callback) + # Optimize one variable at a time function optimizesingles!(problem::NLLSProblem{VT, CT}, options::NLLSOptions, type::DataType)::NLLSResult where {VT, CT} starttime = Base.time_ns() @@ -60,7 +67,16 @@ function optimizesingles!(problem::NLLSProblem{VT, CT}, options::NLLSOptions, ty return NLLSResult(startcost, endcost, (Base.time_ns() - starttime)*1.e-9, timeinit + subprobinit*1.e-9, timecost, timegradient, timesolver, 0, iternum, costcomputations, gradientcomputations, linearsolvers, Vector{Float64}(), Vector{Vector{Float64}}()) end -function optimizeinternal!(problem::NLLSProblem, options::NLLSOptions, data::NLLSInternal, starttimens::UInt64)::NLLSResult +function optimizeinternal_checkcallback!(problem::NLLSProblem, options::NLLSOptions, data::NLLSInternal, callback, starttimens::UInt64)::NLLSResult + if isnothing(options.callback) + return optimizeinternal!(problem, options, data, callback, starttimens) + else + Base.depwarn("Setting callback in options is deprecated. Pass the callback directly to optimize!() instead", :NLLSOptions) + return optimizeinternal!(problem, options, data, options.callback, starttimens) + end +end + +function optimizeinternal!(problem::NLLSProblem, options::NLLSOptions, data::NLLSInternal, callback, starttimens::UInt64)::NLLSResult # Copy the variables if length(problem.variables) != length(problem.varnext) problem.varnext = copy(problem.variables) @@ -69,27 +85,27 @@ function optimizeinternal!(problem::NLLSProblem, options::NLLSOptions, data::NLL if options.iterator == newton || options.iterator == gaussnewton # Newton's method, using Gauss' approximation to the Hessian (optimizing Hessian form) newtondata = NewtonData() - return optimizeinternal!(problem, options, data, newtondata, starttimens) + return optimizeinternal!(problem, options, data, newtondata, callback, starttimens) end if options.iterator == levenbergmarquardt # Levenberg-Marquardt levmardata = LevMarData() - return optimizeinternal!(problem, options, data, levmardata, starttimens) + return optimizeinternal!(problem, options, data, levmardata, callback, starttimens) end if options.iterator == dogleg # Dogleg - doglegdata = DoglegData() - return optimizeinternal!(problem, options, data, doglegdata, starttimens) + doglegdata = DoglegData(length(data.linsystem.x)) + return optimizeinternal!(problem, options, data, doglegdata, callback, starttimens) end if options.iterator == gradientdescent # Gradient descent gddata = GradientDescentData(1.0) - return optimizeinternal!(problem, options, data, gddata, starttimens) + return optimizeinternal!(problem, options, data, gddata, callback, starttimens) end error("Iterator not recognized") end -function optimizeinternal!(problem::NLLSProblem, options::NLLSOptions, data, iteratedata, starttime::UInt64)::NLLSResult +function optimizeinternal!(problem::NLLSProblem, options::NLLSOptions, data, iteratedata, callback, starttime::UInt64)::NLLSResult timeinit = Base.time_ns() - starttime # Initialize the linear problem data.timegradient += @elapsed_ns data.bestcost = costgradhess!(data.linsystem, problem.variables, problem.costs) @@ -107,7 +123,7 @@ function optimizeinternal!(problem::NLLSProblem, options::NLLSOptions, data, ite # Call the per iteration solver cost = iterate!(iteratedata, data, problem, options)::Float64 # Call the user-defined callback - cost, terminate = options.callback(cost, problem, data, iteratedata)::Tuple{Float64, Int} + cost, terminate = callback(cost, problem, data, iteratedata)::Tuple{Float64, Int} # Store the cost if necessary if options.storecosts push!(costs, cost) diff --git a/src/structs.jl b/src/structs.jl index 139a1527..9fb51152 100644 --- a/src/structs.jl +++ b/src/structs.jl @@ -30,31 +30,14 @@ struct NLLSOptions storecosts::Bool # Indicates whether the cost per outer iteration should be stored storetrajectory::Bool # Indicates whether the step per outer iteration should be stored end -function NLLSOptions(; maxiters=100, reldcost=1.e-15, absdcost=1.e-15, dstep=1.e-15, maxfails=3, iterator=levenbergmarquardt, callback=(cost, args...)->(cost, 0), storecosts=false, storetrajectory=false) +function NLLSOptions(; maxiters=100, reldcost=1.e-15, absdcost=1.e-15, dstep=1.e-15, maxfails=3, iterator=levenbergmarquardt, callback=nothing, storecosts=false, storetrajectory=false) if iterator == gaussnewton Base.depwarn("gaussnewton is deprecated. Use newton instead", :NLLSOptions) end - NLLSOptions(reldcost, absdcost, dstep, maxfails, maxiters, iterator, callback, storecosts, storetrajectory) -end - -# Utility callback that prints out per-iteration results -function printoutcallback(cost, problem, data, trailingargs...) - if data.iternum == 1 - # First iteration, so print out column headers and the zeroth iteration (i.e. start) values - println("iter cost cost_change |step|") - @printf("% 4d % 8e % 4.3e % 3.2e\n", 0, data.bestcost, 0, 0) + if storecosts || storetrajectory + Base.depwarn("storecosts and storetrajectory are deprecated. Use storecostscallback instead", :NLLSOptions) end - @printf("% 4d % 8e % 4.3e % 3.2e\n", data.iternum, cost, data.bestcost-cost, norm(data.linsystem.x)) - return cost, 0 -end -function printoutcallback(cost, data, trradius::Float64) - if data.iternum == 1 - # First iteration, so print out column headers and the zeroth iteration (i.e. start) values - println("iter cost cost_change |step| tr_radius") - @printf("% 4d % 8e % 4.3e % 3.2e % 2.1e\n", 0, data.bestcost, 0, 0, trradius) - end - @printf("% 4d % 8e % 4.3e % 3.2e % 2.1e\n", data.iternum, cost, data.bestcost-cost, norm(data.linsystem.x), trradius) - return cost, 0 + NLLSOptions(reldcost, absdcost, dstep, maxfails, maxiters, iterator, callback, storecosts, storetrajectory) end struct NLLSResult diff --git a/src/utils.jl b/src/utils.jl index f4d113b1..24bb96d3 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,4 +1,4 @@ -using StaticArrays, Static +using StaticArrays, Static, SparseArrays, LoopVectorization @inline function valuedispatch(lower::StaticInt, upper::StaticInt, val::Int, fun) if lower >= upper @@ -55,8 +55,46 @@ end function Base.cumsum!(A::AbstractVector) total = zero(eltype(A)) @inbounds for ind in eachindex(A) - total += A[ind] + @fastmath total += A[ind] A[ind] = total end return A end + +function fast_bAb(A::Matrix, b::Vector) + total = zero(eltype(b)) + @tturbo for i in eachindex(b) + subtotal = zero(eltype(b)) + for j in eachindex(b) + subtotal += A[j,i] * b[j] + end + total += b[i] * subtotal + end + return total +end + +function fast_bAb(A::StaticArray, b::StaticArray) + total = zero(eltype(b)) + @turbo for i in eachindex(b) + subtotal = zero(eltype(b)) + for j in eachindex(b) + subtotal += A[j,i] * b[j] + end + total += b[i] * subtotal + end + return total +end + +function fast_bAb(A::SparseMatrixCSC, b::Vector) + total = zero(eltype(b)) + for i in eachindex(b) + coltotal = zero(eltype(b)) + for j in nzrange(A, i) + @inbounds @fastmath coltotal += A.nzval[j] * b[A.rowval[j]] + end + @inbounds @fastmath coltotal *= b[i] + total += coltotal + end + return total +end + diff --git a/src/visualgeometry/geometry.jl b/src/visualgeometry/geometry.jl index c7acf34c..65d9d48d 100644 --- a/src/visualgeometry/geometry.jl +++ b/src/visualgeometry/geometry.jl @@ -1,10 +1,6 @@ using StaticArrays, LinearAlgebra function rodrigues(x::T, y::T, z::T) where T<:Number - if x == 0 && y == 0 && z == 0 - # Short cut for derivatives at identity - return SMatrix{3, 3, T, 9}(T(1), z, -y, -z, T(1), x, y, -x, T(1)) - end theta2 = x * x + y * y + z * z cosf = T(0.5) sinc = T(1) @@ -62,7 +58,7 @@ end Rotation3DR(x, y, z) = Rotation3DR(rodrigues(x, y, z)) Rotation3DR() = Rotation3DR(SMatrix{3, 3, Float64}(1., 0., 0., 0., 1., 0., 0., 0., 1.)) nvars(::Rotation3DR) = static(3) -update(var::Rotation3DR, updatevec, start=1) = var * Rotation3DR(updatevec[start], updatevec[start+1], updatevec[start+2]) +update(var::Rotation3DR, updatevec, start=1) = Rotation3DR(var.m * rodrigues(updatevec[start], updatevec[start+1], updatevec[start+2])) struct Rotation3DL{T<:Real} <: AbstractRotation3D m::SMatrix{3, 3, T, 9} @@ -70,8 +66,7 @@ end Rotation3DL(x, y, z) = Rotation3DL(rodrigues(x, y, z)) Rotation3DL() = Rotation3DL(SMatrix{3, 3, Float64}(1., 0., 0., 0., 1., 0., 0., 0., 1.)) nvars(::Rotation3DL) = static(3) -update(var::Rotation3DL, updatevec, start=1) = Rotation3DL(updatevec[start], updatevec[start+1], updatevec[start+2]) * var - +update(var::Rotation3DL, updatevec, start=1) = Rotation3DL(rodrigues(updatevec[start], updatevec[start+1], updatevec[start+2]) * var.m) struct UnitVec3D{T<:Real} <: AbstractPoint3D v::Rotation3DR{T} diff --git a/test/adaptivecost.jl b/test/adaptivecost.jl index 072407a7..65805790 100644 --- a/test/adaptivecost.jl +++ b/test/adaptivecost.jl @@ -51,7 +51,7 @@ end problem.variables[3] = 0. # Optimize the cost by alternating EM & optimizing the mean - result = NLLSsolver.optimize!(problem, NLLSsolver.NLLSOptions(iterator=NLLSsolver.newton, callback=emcallback), SVector(1, 2, 3) .> 1) + result = NLLSsolver.optimize!(problem, NLLSsolver.NLLSOptions(iterator=NLLSsolver.newton), SVector(1, 2, 3) .> 1, emcallback) # Check the result @test isapprox(NLLSsolver.params(problem.variables[1]), SVector(1.0, 10.0, 0.8); rtol=0.1) diff --git a/test/dynamicvars.jl b/test/dynamicvars.jl index 5e5f9f32..00a47d64 100644 --- a/test/dynamicvars.jl +++ b/test/dynamicvars.jl @@ -33,7 +33,7 @@ Base.eltype(::NormResidual) = Float64 NLLSsolver.addcost!(problem, NormResidual(length(X))) # Optimize - result = NLLSsolver.optimize!(problem, NLLSsolver.NLLSOptions(iterator=NLLSsolver.newton, storecosts=true)) + result = NLLSsolver.optimize!(problem, NLLSsolver.NLLSOptions(iterator=NLLSsolver.newton)) # Check the result is collinear to X Y = problem.variables[1] diff --git a/test/functional.jl b/test/functional.jl index bded1328..78b4c9a8 100644 --- a/test/functional.jl +++ b/test/functional.jl @@ -48,29 +48,38 @@ Base.eltype(::RosenbrockB) = Float64 @test NLLSsolver.cost(subprob) == 0. # Check callback termination - result = NLLSsolver.optimize!(problem, NLLSsolver.NLLSOptions(callback=(cost, unusedargs...)->(cost, 13))) + result = NLLSsolver.optimize!(problem, NLLSsolver.NLLSOptions(), nothing, (cost, unusedargs...)->(cost, 13)) @test result.termination == (13 << 9) @test result.niterations == 1 - # Test optimization - for iter in [NLLSsolver.newton, NLLSsolver.levenbergmarquardt, NLLSsolver.dogleg] - # Set the start - problem.variables[1] = -0.5 - problem.variables[2] = 2.5 + # Optimize using Newton + result = NLLSsolver.optimize!(problem, NLLSsolver.NLLSOptions(iterator=NLLSsolver.newton)) + @test isapprox(problem.variables[1], 1.0; rtol=1.e-10) + @test isapprox(problem.variables[2], 1.0; rtol=1.e-10) - # Optimize the cost - result = NLLSsolver.optimize!(problem, NLLSsolver.NLLSOptions(iterator=iter)) + # Optimize using Levenberg-Marquardt + problem.variables[1] = -0.5 + problem.variables[2] = 2.5 + ct = NLLSsolver.CostTrajectory() + result = NLLSsolver.optimize!(problem, NLLSsolver.NLLSOptions(iterator=NLLSsolver.levenbergmarquardt), nothing, NLLSsolver.storecostscallback(ct)) + @test isapprox(problem.variables[1], 1.0; rtol=1.e-10) + @test isapprox(problem.variables[2], 1.0; rtol=1.e-10) + @test all(diff(ct.costs) .<= 0.0) # Check costs decrease - # Check the result - @test isapprox(problem.variables[1], 1.0; rtol=1.e-10) - @test isapprox(problem.variables[2], 1.0; rtol=1.e-10) - end + # Optimize using dogleg + problem.variables[1] = -0.5 + problem.variables[2] = 2.5 + empty!(ct) + result = NLLSsolver.optimize!(problem, NLLSsolver.NLLSOptions(iterator=NLLSsolver.dogleg), nothing, NLLSsolver.storecostscallback(ct.costs)) + @test isapprox(problem.variables[1], 1.0; rtol=1.e-10) + @test isapprox(problem.variables[2], 1.0; rtol=1.e-10) + @test all(diff(ct.costs) .<= 0.0) # Check costs decrease # Test standard gradient descent (a worse optimizer, so needs a closer starting point) problem.variables[1] = 1.0 - 1.e-5 problem.variables[2] = 1.0 display(problem) - result = NLLSsolver.optimize!(problem, NLLSsolver.NLLSOptions(iterator=NLLSsolver.gradientdescent, callback=NLLSsolver.printoutcallback)) + result = NLLSsolver.optimize!(problem, NLLSsolver.NLLSOptions(iterator=NLLSsolver.gradientdescent), nothing, NLLSsolver.printoutcallback) display(result) @test isapprox(problem.variables[1], 1.0; rtol=1.e-5) @test isapprox(problem.variables[2], 1.0; rtol=1.e-5) diff --git a/test/geometry.jl b/test/geometry.jl index 495620ad..a43213ca 100644 --- a/test/geometry.jl +++ b/test/geometry.jl @@ -1,4 +1,6 @@ -using NLLSsolver, LinearAlgebra, Test +using NLLSsolver, LinearAlgebra, StaticArrays, Test + +checkduals(x, y) = NLLSsolver.extractvaldual(vec(x)) == NLLSsolver.extractvaldual(vec(y)) @testset "geometry.jl" begin # Test utility functions @@ -22,6 +24,15 @@ using NLLSsolver, LinearAlgebra, Test @test isapprox(NLLSsolver.getvec((rotl * rotr) * point), NLLSsolver.getvec(point)) @test isapprox(NLLSsolver.getvec((NLLSsolver.inverse(rotl) * NLLSsolver.inverse(rotr)) * point), NLLSsolver.getvec(point)) + # Test rotation autodiff updates + updatevec = NLLSsolver.dualzeros(Float64, Val(4)) + T = eltype(updatevec) + updatemat = SMatrix{3, 3, T, 9}(T(1), updatevec[3], -updatevec[2], -updatevec[3], T(1), updatevec[1], updatevec[2], -updatevec[1], T(1)) + rotr = NLLSsolver.Rotation3DR(randn(), randn(), randn()) + rotl = NLLSsolver.Rotation3DL(randn(), randn(), randn()) + @test checkduals(NLLSsolver.update(rotr, updatevec).m, rotr.m * updatemat) + @test checkduals(NLLSsolver.update(rotl, updatevec).m, updatemat * rotl.m) + # Test poses updatevec = zeros(6) pose = NLLSsolver.update(NLLSsolver.Pose3D(rotr, point), updatevec) diff --git a/test/utils.jl b/test/utils.jl index 6285b7ea..ae90f82b 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -1,10 +1,17 @@ -using NLLSsolver, Test +using NLLSsolver, Test, SparseArrays, StaticArrays, LinearAlgebra + +test_fast_bAb(A, b) = @test NLLSsolver.fast_bAb(A, b) ≈ dot(b, A, b) @testset "utils.jl" begin @test NLLSsolver.runlengthencodesortedints([0, 0, 1, 1, 3]) == [1, 3, 5, 5, 6] @test NLLSsolver.runlengthencodesortedints([3]) == [1, 1, 1, 1, 2] @test NLLSsolver.runlengthencodesortedints([0]) == [1, 2] + x = [1, 2, 3] @test cumsum!(x) == [1, 3, 6] @test x == [1, 3, 6] + + test_fast_bAb(randn(20, 20), randn(20)) + test_fast_bAb(sprandn(100, 100, 0.02), randn(100)) + test_fast_bAb(randn(SMatrix{10, 10, Float64, 100}), randn(SVector{10, Float64})) end