diff --git a/docs/src/versions.md b/docs/src/versions.md index 94e9f41f6..8251b6b96 100644 --- a/docs/src/versions.md +++ b/docs/src/versions.md @@ -4,6 +4,7 @@ (In development) * Better error message when a $g$-tensor is symmetry disallowed. +* Higher-precision convergence in [`minimize_energy!`](@ref). * Fix [`minimize_energy!`](@ref) when used with [`set_vacancy_at!`](@ref). ## v0.7.3 diff --git a/src/Optimization.jl b/src/Optimization.jl index 71b8200d0..1b98d7482 100644 --- a/src/Optimization.jl +++ b/src/Optimization.jl @@ -93,15 +93,16 @@ end """ minimize_energy!(sys::System{N}; maxiters=1000, method=Optim.ConjugateGradient(), - g_tol=1e-10, kwargs...) where N + kwargs...) where N Optimizes the spin configuration in `sys` to minimize energy. A total of -`maxiters` iterations will be attempted. Convergence is reached when the root -mean squared energy gradient goes below `g_tol`. The remaining `kwargs` will be -forwarded to the `optimize` method of the Optim.jl package. +`maxiters` iterations will be attempted. The `method` parameter will be used in +the `optimize` function of the [Optim.jl +package](https://github.com/JuliaNLSolvers/Optim.jl). Any remaining `kwargs` +will be included in the `Options` constructor of Optim.jl. """ function minimize_energy!(sys::System{N}; maxiters=1000, subiters=10, method=Optim.ConjugateGradient(), - g_tol=1e-10, kwargs...) where N + kwargs...) where N # Allocate buffers for optimization: # - Each `ns[site]` defines a direction for stereographic projection. # - Each `αs[:,site]` will be optimized in the space orthogonal to `ns[site]`. @@ -123,18 +124,19 @@ function minimize_energy!(sys::System{N}; maxiters=1000, subiters=10, method=Opt optim_set_gradient!(G, sys, αs, ns) end - # Monitor energy gradient magnitude relative to absolute tolerance `g_tol`. - g_res = Inf - - # Repeatedly optimize using a small number (`subiters`) of steps. - options = Optim.Options(; iterations=subiters, g_tol, kwargs...) + # Repeatedly optimize using a small number (`subiters`) of steps. See + # https://github.com/JuliaNLSolvers/Optim.jl/issues/1120 for discussion of + # settings required to find the minimizer `x` to high-accuracy. Note that + # `x` contains normalized spin variables, while gradient `g` has dimensions + # of energy, so we elect to set `x_tol` as the dimensionless convergence + # tolerance. + options = Optim.Options(; iterations=subiters, x_tol=1e-12, g_tol=0, f_reltol=NaN, f_abstol=NaN, kwargs...) + local output for iter in 1 : div(maxiters, subiters, RoundUp) output = Optim.optimize(f, g!, αs, method, options) - g_res = Optim.g_residual(output) - @assert g_tol == Optim.g_tol(output) # Exit if converged - if g_res ≤ g_tol + if Optim.converged(output) cnt = (iter-1)*subiters + output.iterations return cnt end @@ -144,8 +146,7 @@ function minimize_energy!(sys::System{N}; maxiters=1000, subiters=10, method=Opt αs .*= 0 end - # res_str = number_to_simple_string(g_res; digits=2) - # tol_str = number_to_simple_string(g_tol; digits=2) - # @warn "Optimization failed to converge within $maxiters iterations ($res_str ≰ $tol_str)" + f_abschange, x_abschange, g_residual = number_to_simple_string.((Optim.f_abschange(output), Optim.x_abschange(output), Optim.g_residual(output)); digits=2) + @warn "Non-converged after $maxiters iterations: |ΔE|=$f_abschange, |Δx|=$x_abschange, |∂E/∂x|=$g_residual" return -1 end diff --git a/src/Spiral/SpiralEnergy.jl b/src/Spiral/SpiralEnergy.jl index 37e76a883..a5a9e2943 100644 --- a/src/Spiral/SpiralEnergy.jl +++ b/src/Spiral/SpiralEnergy.jl @@ -207,7 +207,7 @@ function minimize_spiral_energy!(sys, axis; maxiters=10_000, k_guess=randn(sys.r check_rotational_symmetry(sys; axis, θ=0.01) L = natoms(sys.crystal) - + params = fill(zero(Vec3), L+1) for i in 1:L params[i] = inverse_stereographic_projection(normalize(sys.dipoles[i]), axis) @@ -218,8 +218,9 @@ function minimize_spiral_energy!(sys, axis; maxiters=10_000, k_guess=randn(sys.r f(params) = spiral_f(sys, axis, params, λ) g!(G, params) = spiral_g!(G, sys, axis, params, λ) - # Minimize f, the energy of a spiral - options = Optim.Options(; iterations=maxiters) + # Minimize f, the energy of a spiral. See comment in `minimize_energy!` for + # a discussion of the tolerance settings. + options = Optim.Options(; iterations=maxiters, x_tol=1e-12, g_tol=0, f_reltol=NaN, f_abstol=NaN) # LBFGS does not converge to high precision, but ConjugateGradient can fail # to converge: https://github.com/JuliaNLSolvers/LineSearches.jl/issues/175. diff --git a/test/test_lswt.jl b/test/test_lswt.jl index 7fa62a3b3..c8fdb98ee 100644 --- a/test/test_lswt.jl +++ b/test/test_lswt.jl @@ -37,7 +37,7 @@ # minimize_energy!(sys; maxiters=1000) # println(round(reinterpret(reshape, ComplexF64, sys.coherents); digits=3)) ground_state = ComplexF64[0.452 + 0.069im; 0.586 + 0.166im; 0.31 + 0.351im; 0.035 + 0.39im; -0.136 + 0.145im; -0.03 - 0.076im;;;;; -0.026 - 0.064im; 0.247 - 0.209im; -0.444 + 0.307im; 0.501 - 0.122im; -0.478 - 0.012im; 0.32 - 0.046im;;;;; -0.078 - 0.451im; -0.029 - 0.609im; 0.234 - 0.406im; 0.358 - 0.158im; 0.181 + 0.083im; -0.063 + 0.053im;;;;; -0.061 + 0.033im; -0.235 - 0.222im; 0.354 + 0.407im; -0.177 - 0.485im; 0.042 + 0.476im; -0.082 - 0.312im;;;;; -0.782 + 0.332im; 0.184 - 0.453im; 0.058 + 0.138im; 0.121 + 0.049im; -0.007 + 0.003im; -0.006 + 0.015im;;;;; 0.057 - 0.005im; -0.023 - 0.013im; -0.101 + 0.009im; -0.247 - 0.257im; -0.631 - 0.193im; -0.617 + 0.209im;;;;; -0.695 - 0.489im; 0.482 - 0.083im; -0.087 + 0.122im; 0.022 + 0.129im; -0.006 - 0.004im; -0.016 + 0.003im;;;;; 0.04 + 0.041im; -0.004 - 0.026im; -0.07 - 0.072im; 0.044 - 0.354im; -0.248 - 0.611im; -0.551 - 0.347im;;;;; 0.347 + 0.149im; -0.216 + 0.511im; -0.386 - 0.36im; 0.434 - 0.153im; -0.024 + 0.24im; 0.026 - 0.021im;;;;; 0.061 + 0.082im; 0.096 + 0.219im; 0.361 + 0.203im; 0.487 - 0.18im; 0.228 - 0.554im; -0.282 - 0.229im;;;;; -0.205 + 0.317im; -0.468 - 0.298im; 0.419 - 0.32im; 0.078 + 0.454im; -0.233 - 0.064im; 0.016 + 0.029im;;;;; 0.101 + 0.018im; 0.22 + 0.095im; 0.402 - 0.097im; 0.233 - 0.464im; -0.211 - 0.561im; -0.363 + 0.025im;;;;; 0.059 - 0.582im; 0.442 - 0.355im; 0.51 + 0.063im; 0.158 + 0.213im; -0.009 + 0.024im; 0.03 - 0.026im;;;;; 0.582 - 0.058im; 0.436 + 0.363im; 0.04 + 0.512im; -0.177 + 0.197im; -0.025 - 0.004im; 0.031 + 0.024im;;;;; 0.561 + 0.165im; 0.268 + 0.5im; -0.156 + 0.49im; -0.238 + 0.116im; -0.022 - 0.013im; 0.02 + 0.034im;;;;; -0.337 + 0.478im; -0.56 + 0.092im; -0.413 - 0.306im; -0.033 - 0.263im; 0.019 - 0.017im; -0.039 + 0.008im] - + sys.coherents .= reinterpret(reshape, Sunny.CVec{6}, ground_state) minimize_energy!(sys) @assert energy_per_site(sys) ≈ -328.38255 @@ -65,7 +65,7 @@ data_flat = reinterpret(ComplexF64, res.data[1:5]) # println(round.(data_flat; digits=12)) data_golden = natoms * ComplexF64[0.000768755803 + 0.0im, 0.000453313199 - 4.8935387e-5im, 0.000468535469 + 8.5812793e-5im, 0.000453313199 + 4.8935387e-5im, 0.00027042076 + 0.0im, 0.000270819458 + 8.0426107e-5im, 0.000468535469 - 8.5812793e-5im, 0.000270819458 - 8.0426107e-5im, 0.000295138353 + 0.0im, 0.0 + 0.0im, 0.0 - 0.0im, 0.0 + 0.0im, 0.0 + 0.0im, 0.0 + 0.0im, 0.0 + 0.0im, 0.0 - 0.0im, 0.0 - 0.0im, 0.0 + 0.0im, 0.00021794048 + 0.0im, -0.000114399503 - 0.000211293782im, -0.000126018935 + 0.000199895684im, -0.000114399503 + 0.000211293782im, 0.000264899429 + 0.0im, -0.000127650501 - 0.000227103219im, -0.000126018935 - 0.000199895684im, -0.000127650501 + 0.000227103219im, 0.000256212415 + 0.0im, 0.0 + 0.0im, -0.0 - 0.0im, -0.0 + 0.0im, -0.0 + 0.0im, 0.0 + 0.0im, -0.0 - 0.0im, -0.0 - 0.0im, -0.0 + 0.0im, 0.0 + 0.0im, 7.5017149e-5 + 0.0im, 0.000206625046 + 0.000158601356im, 0.000240055385 - 0.00011016714im, 0.000206625046 - 0.000158601356im, 0.000904437194 + 0.0im, 0.000428286033 - 0.000810966567im, 0.000240055385 + 0.00011016714im, 0.000428286033 + 0.000810966567im, 0.000929965845 + 0.0im] - + @test isapprox(data_flat, data_golden; atol=1e-9) end @@ -160,7 +160,7 @@ end latvecs = lattice_vectors(a, a, a, 90, 90, 90) positions = [[0, 0, 0]] cryst = Crystal(latvecs, positions) - + function test_biquad(mode, q, s) # System sys = System(cryst, [1 => Moment(; s, g=2)], mode; dims=(2, 2, 2)) @@ -181,7 +181,7 @@ end # Analytical result γq = 2 * (cos(2π*q[1]) + cos(2π*q[2]) + cos(2π*q[3])) disp_ref = J * (s*cos(α) - (2*s-2+1/s) * sin(α)) * √(36 - γq^2) - + @test disp[end-1] ≈ disp[end] ≈ disp_ref end @@ -197,16 +197,16 @@ end latvecs = lattice_vectors(1, 1, 1, 90, 90, 90) cryst = Crystal(latvecs, [[0,0,0], [0.4,0,0]]; types=["A", "B"]) - + sys = System(cryst, [1 => Moment(s=1, g=2), 2 => Moment(s=2, g=2)], :dipole) set_pair_coupling!(sys, (S1, S2) -> +(S1'*diagm([2,-1,-1])*S1)*(S2'*diagm([2,-1,-1])*S2), Bond(1, 2, [0,0,0])) - + θ = randn() set_dipole!(sys, [1, 0, 0], (1, 1, 1, 1)) set_dipole!(sys, [0, cos(θ), sin(θ)], (1, 1, 1, 2)) energy(sys) @test energy(sys) ≈ -3 - + swt = SpinWaveTheory(sys; measure=ssf_trace(sys; apply_g=false)) res = intensities_bands(swt, [[0,0,0]]) @test res.disp[1] ≈ 9 @@ -223,7 +223,7 @@ end positions = [[0, 0, 0]] cryst = Crystal(latvecs, positions) q = [0.12, 0.23, 0.34] - + sys = System(cryst, [1 => Moment(; s, g=-1)], :dipole) sys = reshape_supercell(sys, [1 -1 0; 1 1 0; 0 0 1]) set_exchange!(sys, J, Bond(1, 1, [1, 0, 0])) @@ -304,7 +304,7 @@ end polarize_spins!(sys, (0,0,1)) @test energy_per_site(sys) ≈ -0.1913132980155851 - + swt = SpinWaveTheory(sys; measure=ssf_perp(sys)) qs = [[0, 0, 0], [0, 0, 1/2], [0, 1/2, 1/2], [0, 0, 0]] res = intensities_bands(swt, qs) @@ -318,19 +318,19 @@ end sys = System(cryst, [1 => Moment(s=1, g=2)], :dipole, seed=2) enable_dipole_dipole!(sys, Units(:meV, :angstrom).vacuum_permeability) polarize_spins!(sys, (1,2,3)) # arbitrary direction - + R = hcat([1,1,-1], [-1,1,1], [1,-1,1]) / 2 sys_reshape = reshape_supercell(sys, R) @test energy_per_site(sys_reshape) ≈ energy_per_site(sys) ≈ -0.89944235377 - + swt1 = SpinWaveTheory(sys; measure=nothing) swt2 = SpinWaveTheory(sys_reshape; measure=nothing) q = [0.5, -0.1, 0.3] disp1 = dispersion(swt1, [q]) disp2 = dispersion(swt2, [q]) - + @test disp1 ≈ [1.3236778213351734, 0.9206655419611791] - @test disp2 ≈ [0.9206655419611772] + @test disp2 ≈ [0.9206655419611772] end end @@ -359,7 +359,7 @@ end swt = SpinWaveTheory(sys; measure) q = [0.41568,0.56382,0.76414] res = intensities_bands(swt, [q]) - + SpinW_energies = [2.6267,2.6541,2.8177,2.8767,3.2458,3.3172,3.4727,3.7767,3.8202,3.8284,3.8749,3.9095,3.9422,3.9730,4.0113,4.0794,4.2785,4.4605,4.6736,4.7564,4.7865] SpinW_intensities = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0.2999830079, -0.2999830079im, 0,0.2999830079im, 0.2999830079, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.3591387785, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5954018134, -0.5954018134im, 0,0.5954018134im, 0.5954018134, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.3708506016,1.3708506016im, 0, -1.3708506016im, 1.3708506016, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0511743697, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0734875342, 0.0 + 0.0734875342im, 0, 0.0 - 0.0734875342im, 0.0734875342, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0577275935, -0.0577275935im, 0,0.0577275935im, 0.0577275935, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6.1733740706, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0338873034,0.0338873034im, 0, -0.0338873034im, 0.0338873034, 0, 0, 0, 0] @@ -384,7 +384,7 @@ end h = 2.0*J₃ set_field!(sys, [0, 0, h]) - + dipole_array = zeros(Float64,3,7,7) dipole_array[:,:,1]= [0.856769 0.811926 -0.485645 -0.950288 -0.429787 -0.0800935 0.217557; 0.35333 -0.2906 0.0685251 -0.297433 -0.854291 -0.004611 0.97169; @@ -410,7 +410,7 @@ end for x in 1:7, y in 1:7 set_dipole!(sys, dipole_array[:,x,y], (x,y,1,1)) end - + q1 = [0.0391,0.0415,0.5530] q2 = [0.2360,0.7492,0.9596] q3 = [0.1131,0.7654,0.2810] @@ -428,7 +428,7 @@ end end -@testitem "Invariance to reshaping" begin +@testitem "Invariance to reshaping" begin # Diamond-cubic with antiferromagnetic exchange latvecs = lattice_vectors(1, 1, 1, 90, 90, 90) cryst = Crystal(latvecs, [[0,0,0]], 227; choice="1") @@ -438,12 +438,12 @@ end randomize_spins!(sys) minimize_energy!(sys) @test energy_per_site(sys) ≈ -2s^2 - + # Reshaped system shape = [0 1 1; 1 0 1; 1 1 0] / 2 sys_prim = reshape_supercell(sys, shape) @test energy_per_site(sys_prim) ≈ -2s^2 - + # Both systems should produce the same intensities formfactors = [1 => FormFactor("Co2")] swt1 = SpinWaveTheory(sys_prim; measure=ssf_perp(sys_prim; formfactors)) @@ -594,7 +594,7 @@ end using LinearAlgebra function dimer_model(; J=1.0, J′=0.0, h=0.0, dims=(2,1,1), fast=false) - cryst = Crystal(I(3), [[0,0,0]], 1) + cryst = Crystal(I(3), [[0,0,0]], 1) sys = System(cryst, [1 => Moment(s=3/2, g=1)], :SUN; dims) S = spin_matrices(1/2) @@ -605,13 +605,13 @@ end S1i, S1j = Sunny.to_product_space(S1, S1) S2i, S2j = Sunny.to_product_space(S2, S2) scalar, bilin, biquad, tensordec = Sunny.decompose_general_coupling(J′*(S1i' * S1j + S2i' * S2j), 4, 4; extract_parts=fast) - Sunny.set_pair_coupling_aux!(sys, scalar, Sunny.Mat3(I)*bilin, biquad, tensordec, Bond(1, 1, [-1,0,0])) + Sunny.set_pair_coupling_aux!(sys, scalar, Sunny.Mat3(I)*bilin, biquad, tensordec, Bond(1, 1, [-1,0,0])) return sys, cryst end # Set up dimer model and observables (0 and π channels manually specified) - sys, cryst = dimer_model(; J=1.0, J′=0.2, h=0.0, dims=(2,1,1), fast=false) + sys, cryst = dimer_model(; J=1.0, J′=0.2, h=0.0, dims=(2,1,1), fast=false) s = spin_matrices(1/2) S1, S2 = Sunny.to_product_space(s, s) observables0 = Hermitian.([