SpeedyWeather · maximilian-gelbrecht · Jan 3, 2025 · Jan 7, 2025 · Jan 9, 2025 · Jan 9, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ## Unreleased
 
+- Differentiability tests for timestepping added [#656](https://github.com/SpeedyWeather/SpeedyWeather.jl/pull/656)
 - Interfaces for interpolation of AbstractGridArray [#671](https://github.com/SpeedyWeather/SpeedyWeather.jl/pull/671)
 - Test folder sorted into subfolders [#671](https://github.com/SpeedyWeather/SpeedyWeather.jl/pull/671)
 - Land model modularised + land netCDF output [#671](https://github.com/SpeedyWeather/SpeedyWeather.jl/pull/671)

diff --git a/docs/Project.toml b/docs/Project.toml
@@ -1,6 +1,7 @@
 [deps]
 CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 GeoMakie = "db073c08-6b98-4ee5-b6a4-5efafb3259c6"
 NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab"
 UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228"

diff --git a/docs/make.jl b/docs/make.jl
@@ -20,6 +20,7 @@ makedocs(
                 "Stochastic physics" => "stochastic_physics.md",
                 "Analysis"=>"analysis.md",
                 "Tree structure"=>"structure.md",
+                "Differentiability and Adjoint Model"=>"differentiability.md",
                 "NetCDF output"=>"output.md",
             ],
             "Extending SpeedyWeather" => [

diff --git a/docs/src/differentiability.md b/docs/src/differentiability.md
@@ -0,0 +1,57 @@
+# Differentiability and Adjoint Model
+
+SpeedyWeather.jl is written with differentiability in mind. This means that our model is differentiable by automatic differentiation (AD). If you are interested in machine learning (ML), this means that you can integrate our model directly into your ML models without the need to train your ANNs seperately first. For atmospheric modellers this means that you get an adjoint model for free which is always generated automatically, so that we don't need to maintain it seperatly. So, you can calibrate SpeedyWeather.jl in a fully automatic, data-driven way. 
+
+!!! Work in progress
+    The differentiability of SpeedyWeather.jl is still work in progress and some parts of this documentation might be not be always updated to the latest state. We will extend this documentation over time. Don't hesitate to contact us via GitHub issues or mail when you have questions or want to colloborate.
+
+For the differentiability of our model we rely on [Enzyme.jl](https://github.com/EnzymeAD/Enzyme.jl). If you've used Enzyme before, just go ahead and try to differentiate the model! It should work. We have checked the correctness of the gradients extensively against a finite differences differentiation with [FiniteDifferences.jl](https://github.com/JuliaDiff/FiniteDifferences.jl/). In the following we present a simple example how we can take the gradient of a single timestep of the primitive equation model with respect to one of the model parameter. 
+
+!!! Enzyme with Julia 1.11
+    Currently there are still some issues with Enzyme in Julia 1.11, we recommend to use Julia 1.10 for the following
+
+First we initialize the model as usual: 
+
+```@example autodiff
+using SpeedyWeather, Enzyme 
+
+spectral_grid = SpectralGrid(trunc=23, nlayers=3)           
+model = PrimitiveWetModel(; spectral_grid) 
+simulation = initialize!(model)  
+initialize!(simulation)
+run!(simulation, period=Day(10)) # spin-up the model a bit
+```
+
+Then, we get all variables we need from our `simulation`
+
+```@example autodiff
+(; prognostic_variables, diagnostic_variables, model) = simulation
+(; Δt, Δt_millisec) = model.time_stepping
+dt = 2Δt
+
+progn = prognostic_variables
+diagn = diagnostic_variables
+```
+
+Next, we will prepare to use Enzyme. Enzyme saves the gradient information in a shadow of the original input. For the inputs this shadow is initialized zero, whereas for the output the shadow is used as the seed of the AD. In other words, as we are doing reverse-mode AD, the shadow of the output is the value that is backpropageted by the reverse-mode AD. Ok, let's initialize everything: 
+
+```@example autodiff 
+dprogn = one(progn) # shadow for the progn values 
+ddiagn = make_zero(diagn) # shadow for the diagn values 
+dmodel = make_zero(model) # here, we'll accumulate all parameter derivatives 
+```
+
+Then, we can already do the differentiation with Enzyme
+
+```@example autodiff 
+autodiff(Reverse, SpeedyWeather.timestep!, Const, Duplicated(progn, dprogn), Duplicated(diagn, ddiagn), Const(dt), Duplicated(model, dmodel))
+```
+
+The derivitaves are accumulated in the `dmodel` shadow. So, if we e.g. want to know the derivative with respect to the gravity constant, we just have to inspect: 
+
+```@example autodiff 
+dmodel.planet.gravity 
+```
+
+Doing a full sensitivity analysis through a long integration is computationally much more demanding, and is something that we are currently working on. 
+
diff --git a/ext/SpeedyWeatherEnzymeExt.jl b/ext/SpeedyWeatherEnzymeExt.jl
@@ -2,6 +2,8 @@ module SpeedyWeatherEnzymeExt
 
 using SpeedyWeather
 using Enzyme
+using Enzyme.EnzymeCore
+using SpeedyWeather.ProgressMeter
 import .EnzymeRules: reverse, augmented_primal
 using .EnzymeRules
 
@@ -17,23 +19,23 @@ function adjoint_scale(S::SpectralTransform)
     (; nlat_half, nlons, rfft_plans) = S
     nfreqs = [rfft_plan.osz[1] for rfft_plan in rfft_plans] # TODO: This works with FFTW, but does it with cuFFT as well?
 
-    scale = zeros(Int, maximum(nfreqs), nlat_half) 
+    scale = zeros(Int, maximum(nfreqs), 1, nlat_half) # the scratch memory is (Freq x lvl x lat), so we insert 
+                                                      # an additional dimension here for easier matrix multiply
 
     for i=1:nlat_half
-        scale[1:nfreqs[i],i] = rfft_adjoint_scale(nfreqs[i], nlons[i])
+        scale[1:nfreqs[i],1,i] = rfft_adjoint_scale(nfreqs[i], nlons[i])
     end 
 
-    # TODO: transfer array to GPU in case we are on GPU
-    return reshape(scale, maximum(nfreqs), 1, nlat_half) # the scratch memory is (Freq x lvl x lat), so we insert 
-                                                         # an additional dimension here for easier matrix multiply
+    # TODO: transfer array to GPU in case we are on GPU?
+    return scale
 end 
 
 # Computes the scale for the adjoint/pullback of a real discrete fourier transform.
 function rfft_adjoint_scale(n_freq::Int, n_real::Int)
     if iseven(n_real)
-        return [1; [2 for i=2:(n_freq-1)]; 1]
+        return [1 < i < n_freq ? 2 : 1 for i=1:n_freq]
     else 
-        return [1; [2 for i=2:n_freq]]
+        return [1 < i ? 2 : 1 for i=1:n_freq]
     end 
 end 
 
@@ -112,4 +114,17 @@ function reverse(config::EnzymeRules.RevConfigWidth{1}, func::Const{typeof(_four
     return (nothing, nothing, nothing, nothing)
 end
 
+###
+# implement make_zero where the default one fails
+
+# this lock is part of the ProgressMeter that's part of the Feedback of all models
+@inline function Enzyme.make_zero(
+    ::Type{ProgressMeter.ProgressCore}, 
+    seen::IdDict, 
+    prev::ProgressMeter.ProgressCore, 
+    ::Val{copy_if_inactive} = Val(false),
+)::ProgressMeter.ProgressCore where {copy_if_inactive} 
+    return prev
+end
+
 end
diff --git a/ext/SpeedyWeatherFiniteDifferencesExt.jl b/ext/SpeedyWeatherFiniteDifferencesExt.jl
@@ -27,4 +27,91 @@ function FiniteDifferences.to_vec(x::LTA) where LTA <: LowerTriangularArray
     return x_vec, LowerTriangularArray_from_vec
 end
 
+# Vector{Particle} needs an extra modification because an empty vector yields Any[] with to_vec for Particle (which isn't the case for number types)
+function FiniteDifferences.to_vec(x::Vector{Particle{NF}}) where NF 
+    if isempty(x) 
+        return NF[], identity
+    else # the else statement is the unmodified to_vec(::DenseVector)
+        x_vecs_and_backs = map(to_vec, x)
+        x_vecs, backs = first.(x_vecs_and_backs), last.(x_vecs_and_backs)
+        function Vector_from_vec(x_vec)
+            sz = cumsum(map(length, x_vecs))
+            x_Vec = [backs[n](x_vec[sz[n] - length(x_vecs[n]) + 1:sz[n]]) for n in eachindex(x)]
+            return oftype(x, x_Vec)
+        end
+        # handle empty x
+        x_vec = isempty(x_vecs) ? eltype(eltype(x_vecs))[] : reduce(vcat, x_vecs)
+        return x_vec, Vector_from_vec
+    end 
+end
+
+# A version of the generic fallback from FiniteDifferences that excludes some of the fields 
+# that we don't want to be varied for our big data structures 
+# also replaces NaNs that are expected in land and ocean variables
+function FiniteDifferences.to_vec(x::T) where {T <: Union{PrognosticVariables, PrognosticVariablesOcean, PrognosticVariablesLand, DiagnosticVariables, Tendencies, GridVariables, DynamicsVariables, PhysicsVariables, ParticleVariables}}
+
+    excluded_fields_pre, included_fields, excluded_fields_post = determine_included_fields(T)
+
+    val_vecs_and_backs = map(name -> to_vec(getfield(x, name)), included_fields)
+    vals = first.(val_vecs_and_backs)
+    backs = last.(val_vecs_and_backs)
+
+    vals_excluded_pre = map(name -> getfield(x, name), excluded_fields_pre)
+    vals_excluded_post = map(name -> getfield(x, name), excluded_fields_post)
+
+    v, vals_from_vec = to_vec(vals)
+    v = replace_NaN(x, v)
+
+    function structtype_from_vec(v::Vector{<:Real})
+        val_vecs = vals_from_vec(v)
+        values = map((b, v) -> b(v), backs, val_vecs)
+
+        T(vals_excluded_pre..., values..., vals_excluded_post...)
+    end
+    return v, structtype_from_vec
+end
+
+function determine_included_fields(T::Type)
+    names = fieldnames(T)
+
+    included_field_types = Union{SpeedyWeather.AbstractDiagnosticVariables, 
+    SpeedyWeather.AbstractPrognosticVariables, SpeedyWeather.ColumnVariables,
+    NTuple, Dict{Symbol, <:Tuple}, Dict{Symbol, <:AbstractArray}, AbstractArray}
+
+    excluded_fields_pre = []
+    included_fields = []
+    excluded_fields_post = []
+
+    for name in names 
+        if fieldtype(T, name) <: included_field_types
+            push!(included_fields, name)
+        else 
+            if isempty(included_fields)
+                push!(excluded_fields_pre, name)
+            else 
+                push!(excluded_fields_post, name)
+            end 
+        end 
+    end 
+
+    return excluded_fields_pre, included_fields, excluded_fields_post
+end 
+
+# in the ocean and land variables we have NaNs, FiniteDifferences can't deal with those, so we replace them
+function replace_NaN(x_type::T, vec) where {T <: Union{PrognosticVariablesOcean, PrognosticVariablesLand, PhysicsVariables}}
+    nan_indices = isnan.(vec)
+    vec[nan_indices] .= 0 
+    return vec
+end 
+
+# fallback, we really only want to replace the NaNs in ocean and land variables 
+replace_NaN(type, vec) = vec
+
+# By default FiniteDifferences doesn't include this, even though Integers can't be varied. 
+# there's an old GitHub issue and PR about this 
+function FiniteDifferences.to_vec(x::Integer)
+    Integer_from_vec(v) = x
+    return Bool[], Integer_from_vec
+end
+
 end 
diff --git a/src/LowerTriangularMatrices/lower_triangular_array.jl b/src/LowerTriangularMatrices/lower_triangular_array.jl
@@ -123,6 +123,8 @@ function Base.array_summary(io::IO, L::LowerTriangularMatrix{T}, inds::Tuple{Var
     print(io, Base.dims2string(length.(inds)), ", $(mn[1])x$(mn[2]) LowerTriangularMatrix{$T}")
 end
 
+@inline Base.dataids(L::LowerTriangularArray) = Base.dataids(L.data)
+
 # CREATE INSTANCES (ZEROS, ONES, UNDEF)
 for f in (:zeros, :ones, :rand, :randn)
     @eval begin

diff --git a/src/RingGrids/general.jl b/src/RingGrids/general.jl
@@ -22,6 +22,9 @@ nonparametric_type(grid::AbstractGridArray) = nonparametric_type(typeof(grid))
 # also needed for other array types, defined in extensions
 nonparametric_type(::Type{<:Array}) = Array
 
+# needed for unalias 
+@inline Base.dataids(grid::AbstractGridArray) = Base.dataids(grid.data)
+
 """$(TYPEDSIGNATURES) Full grid array type for `grid`. Always returns the N-dimensional `*Array`
 not the two-dimensional (`N=1`) `*Grid`. For reduced grids the corresponding full grid that
 share the same latitudes."""
@@ -427,7 +430,6 @@ end
 ## BROADCASTING
 # following https://docs.julialang.org/en/v1/manual/interfaces/#man-interfaces-broadcasting
 import Base.Broadcast: BroadcastStyle, Broadcasted, DefaultArrayStyle
-import LinearAlgebra: isstructurepreserving, fzeropreserving
 
 # {1} as grids are <:AbstractVector, Grid here is the non-parameteric Grid type!
 struct AbstractGridArrayStyle{N, Grid} <: Broadcast.AbstractArrayStyle{N} end
@@ -440,7 +442,7 @@ Base.BroadcastStyle(::Type{Grid}) where {Grid<:AbstractGridArray{T, N, ArrayType
 
 # allocation for broadcasting, create a new Grid with undef of type/number format T
 function Base.similar(bc::Broadcasted{AbstractGridArrayStyle{N, Grid}}, ::Type{T}) where {N, Grid, T}
-    return Grid(Array{T}(undef, size(bc)...))
+    return Grid(Array{T}(undef, size(bc)))
 end
 
 # ::Val{0} for broadcasting with 0-dimensional, ::Val{1} for broadcasting with vectors, etc
@@ -485,7 +487,7 @@ function Base.similar(
     ::Type{T},
 ) where {N, ArrayType, Grid, T}
     ArrayType_ = nonparametric_type(ArrayType)
-    return Grid(ArrayType_{T}(undef, size(bc)...))
+    return Grid(ArrayType_{T}(undef, size(bc)))
 end
 
 function Adapt.adapt_structure(to, grid::Grid) where {Grid <: AbstractGridArray}

diff --git a/src/dynamics/clock.jl b/src/dynamics/clock.jl
@@ -38,6 +38,18 @@ function Base.show(io::IO, C::Clock)
     print_fields(io, C, keys)
 end
 
+# copy! 
+function Base.copy!(clock::Clock, clock_old::Clock)
+    clock.time = clock_old.time 
+    clock.start = clock_old.start 
+    clock.period = clock_old.period 
+    clock.timestep_counter = clock_old.timestep_counter
+    clock.n_timesteps = clock_old.n_timesteps
+    clock.Δt = clock_old.Δt 
+
+    return nothing 
+end 
+
 """
 $(TYPEDSIGNATURES)
 Initialize the clock with the time step `Δt` in the `time_stepping`."""

diff --git a/src/dynamics/implicit.jl b/src/dynamics/implicit.jl
@@ -209,12 +209,12 @@ Initialize the implicit terms for the PrimitiveEquation models."""
 function initialize!(   
     implicit::ImplicitPrimitiveEquation,
     dt::Real,                                           # the scaled time step radius*dt
-    diagn::DiagnosticVariables,
+    diagn::DiagnosticVariables{NF},
     geometry::AbstractGeometry,
     geopotential::AbstractGeopotential,
     atmosphere::AbstractAtmosphere,
     adiabatic_conversion::AbstractAdiabaticConversion,
-)
+) where NF
 
     (; trunc, nlayers, α, temp_profile, S, S⁻¹, L, R, U, W, L0, L1, L2, L3, L4) = implicit
     (; σ_levels_full, σ_levels_thick) = geometry
@@ -267,10 +267,10 @@ function initialize!(
 
         for r in 1:nlayers
             L1[k, r] = ΔT_below*σ_levels_thick[r]*σₖ         # vert advection operator below
-            L1[k, r] -= k>=r ? σ_levels_thick[r] : 0
+            L1[k, r] -= k>=r ? σ_levels_thick[r] : zero(NF)
 
             L1[k, r] += ΔT_above*σ_levels_thick[r]*σₖ_above   # vert advection operator above
-            L1[k, r] -= (k-1)>=r ? σ_levels_thick[r] : 0
+            L1[k, r] -= (k-1)>=r ? σ_levels_thick[r] : zero(NF)
         end
 
         # _sum_above operator itself

diff --git a/src/dynamics/particles.jl b/src/dynamics/particles.jl
@@ -48,6 +48,9 @@ function Base.zeros(ArrayType::Type{<:AbstractArray{P}}, n::Int...) where {P<:Pa
     fill!(z, zero(P))
 end
 
+Base.eltype(::Type{Particle{NF}}) where NF = NF
+Base.eltype(::Particle{NF}) where NF = NF
+
 Base.rand(rng::Random.AbstractRNG, ::Random.Sampler{Particle}) = rand(rng, Particle{DEFAULT_NF,true})
 Base.rand(rng::Random.AbstractRNG, ::Random.Sampler{Particle{NF}}) where NF = rand(rng, Particle{NF,true})