Merge branch 'main' of github.com:gher-ulg/DINCAE.jl

gher-uliege · Jan 21, 2025 · ddf94ca · ddf94ca
2 parents 9d2230a + 269492f
commit ddf94ca
Show file tree

Hide file tree

Showing 7 changed files with 128 additions and 110 deletions.
diff --git a/.github/workflows/ci.yml → .github/workflows/CI.yml b/.github/workflows/ci.yml → .github/workflows/CI.yml
@@ -3,33 +3,30 @@ on:
   pull_request:
   push:
     branches:
+      - '*'
       - main
       - cycles
       - dev
     tags: '*'
 jobs:
   test:
-    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
+    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ github.event_name }}
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
       matrix:
         version:
           - '1'
-#          - 'nightly'
         os:
           - ubuntu-latest
           - macOS-latest
           - windows-latest
-        arch:
-          - x64
     steps:
       - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.version }}
-          arch: ${{ matrix.arch }}
-      - uses: actions/cache@v1
+      - uses: actions/cache@v4
         env:
           cache-name: cache-artifacts
         with:

diff --git a/CITATION.cff b/CITATION.cff
@@ -4,7 +4,7 @@ authors:
 - family-names: "Barth"
   given-names: "Alexander"
   orcid: "https://orcid.org/0000-0003-2952-5997"
-title: gher-uliege/DINCAE.jl: v2.0.2
+title: "gher-uliege/DINCAE.jl: v2.0.2"
 version: v2.0.2
 doi: 10.5281/zenodo.8032127 
 date-released: 2023-06-13

diff --git a/README.md b/README.md
@@ -2,10 +2,8 @@
 [![codecov](https://codecov.io/github/gher-uliege/DINCAE.jl/graph/badge.svg?token=2FXNLLMNEF)](https://codecov.io/github/gher-uliege/DINCAE.jl)
 [![documentation stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://gher-uliege.github.io/DINCAE.jl/stable/)
 [![documentation dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://gher-uliege.github.io/DINCAE.jl/dev/)
+[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5575066.svg)](https://doi.org/10.5281/zenodo.5575066)
 
-[![Issues](https://img.shields.io/github/issues-raw/gher-uliege/DINCAE.jl?style=plastic)](https://github.com/gher-uliege/DINCAE.jl/issues)
-![Issues](https://img.shields.io/github/commit-activity/m/gher-uliege/DINCAE.jl)
-![Commit](https://img.shields.io/github/last-commit/gher-uliege/DINCAE.jl) ![GitHub top language](https://img.shields.io/github/languages/top/gher-uliege/DINCAE.jl)
 
 # DINCAE.jl
 

diff --git a/ext/AMDGPUExt.jl b/ext/AMDGPUExt.jl
@@ -34,16 +34,16 @@ function interpnd_d!(pos::AbstractVector{<:NTuple{N}},A,vec) where N
     return nothing
 end
 
-function interpnd!(pos::AbstractVector{<:NTuple{N}},A_d::ROCArray,vec_d) where N
+function interpnd!(pos::AbstractVector{<:NTuple{N}},A::ROCArray,vec) where N
     AMDGPU.@sync begin
         len = length(pos)
-        kernel = @roc launch=false interpnd_d!(pos,A_d,vec_d)
+        kernel = @roc launch=false interpnd_d!(pos,A,vec)
         config = AMDGPU.launch_configuration(kernel)
         groupsize = min(len, config.groupsize)
         gridsize = cld(len, groupsize)
         @debug gridsize,groupsize
 
-        kernel(pos,A_d,vec_d; groupsize, gridsize)
+        kernel(pos,A,vec; groupsize, gridsize)
     end
 end
 
@@ -80,15 +80,15 @@ function interp_adjn_d!(pos::AbstractVector{<:NTuple{N}},values,B) where N
 end
 
 
-function interp_adjn!(pos::AbstractVector{<:NTuple{N}},values_d::ROCArray,B_d) where N
-    B_d .= 0
+function interp_adjn!(pos::AbstractVector{<:NTuple{N}},values::ROCArray,B) where N
+    B .= 0
 
     AMDGPU.@sync begin
         len = length(pos)
         #numgridsize = ceil(Int, length(pos)/256)
         # must be one
         numgridsize = 1
-        @roc groupsize=256 gridsize=numgridsize interp_adjn_d!(pos,values_d,B_d)
+        @roc groupsize=256 gridsize=numgridsize interp_adjn_d!(pos,values,B)
     end
 end
 

diff --git a/ext/CUDAExt.jl b/ext/CUDAExt.jl
@@ -11,7 +11,8 @@ function interpnd_d!(pos::AbstractVector{<:NTuple{N}},A,vec) where N
 
     @inbounds for i = index:stride:length(pos)
         p = pos[i]
-        ind = floor.(Int,p)
+        #ind = floor.(Int,p)
+        ind = unsafe_trunc.(Int32,floor.(p))
 
         # interpolation coefficients
         c = p .- ind
@@ -27,27 +28,28 @@ function interpnd_d!(pos::AbstractVector{<:NTuple{N}},A,vec) where N
     return nothing
 end
 
-function interpnd!(pos::AbstractVector{<:NTuple{N}},d_A::CuArray,vec_d) where N
+function interpnd!(pos::AbstractVector{<:NTuple{N}},A::CuArray,vec) where N
     CUDA.@sync begin
         len = length(pos)
-        kernel = @cuda launch=false interpnd_d!(pos,d_A,vec_d)
+        kernel = @cuda launch=false interpnd_d!(pos,A,vec)
         config = launch_configuration(kernel.fun)
         threads = min(len, config.threads)
         blocks = cld(len, threads)
         @debug blocks,threads
 
-        kernel(pos,d_A,vec_d; threads, blocks)
+        kernel(pos,A,vec; threads, blocks)
     end
 end
 
 
-function interp_adjn_d!(pos::AbstractVector{<:NTuple{N}},values,A2) where N
+function interp_adjn_d!(pos::AbstractVector{<:NTuple{N}},values,B) where N
     index = (blockIdx().x - 1) * blockDim().x + threadIdx().x
     stride = gridDim().x * blockDim().x
 
     @inbounds for i = index:stride:length(pos)
         p = pos[i]
-        ind = floor.(Int,p)
+        #ind = floor.(Int,p)
+        ind = unsafe_trunc.(Int32,floor.(p))
 
         # interpolation coefficients
         c = p .- ind
@@ -57,24 +59,24 @@ function interp_adjn_d!(pos::AbstractVector{<:NTuple{N}},values,A2) where N
 
             cc = prod(ntuple(n -> (offset[n] == 1 ? c[n] : 1-c[n]),Val(N)))
 
-            I = LinearIndices(A2)[p2...]
-            CUDA.atomic_add!(pointer(A2,I), values[i] * cc)
+            I = LinearIndices(B)[p2...]
+            CUDA.atomic_add!(pointer(B,I), values[i] * cc)
         end
     end
 
     return nothing
 end
 
 
-function interp_adjn!(pos::AbstractVector{<:NTuple{N}},cuvalues::CuArray,d_A2) where N
-    A2 .= 0
+function interp_adjn!(pos::AbstractVector{<:NTuple{N}},values::CuArray,B) where N
+    B .= 0
 
     CUDA.@sync begin
         len = length(pos)
         #numblocks = ceil(Int, length(pos)/256)
         # must be one
         numblocks = 1
-        @cuda threads=256 blocks=numblocks interp_adjn_d!(pos,cuvalues,d_A2)
+        @cuda threads=256 blocks=numblocks interp_adjn_d!(pos,values,B)
     end
 end
 

diff --git a/src/model.jl b/src/model.jl
@@ -285,7 +285,84 @@ function recmodel4(sz,enc_nfilter,dec_nfilter,skipconnections,l=1; method = :nea
     end
 end
 
+function genmodel(sz,ninput,noutput;
+                  truth_uncertain = false,
+                  enc_nfilter_internal = [16,24,36,54],
+                  skipconnections = 2:(length(enc_nfilter_internal)+1),
+                  regularization_L1_beta = 0,
+                  regularization_L2_beta = 0,
+                  upsampling_method = :nearest,
+                  loss_weights_refine = (1.,),
+                  min_std_err = 0.006737946999085467,
+                  output_ndims = 1,
+                  direction_obs = nothing,
+                  laplacian_penalty = 0,
+                  laplacian_error_penalty = laplacian_penalty,
+                  )
+
+    nvar = ninput
+    enc_nfilter = vcat([nvar],enc_nfilter_internal)
+
+    if output_ndims == 1
+        dec_nfilter = vcat([2*noutput],enc_nfilter_internal)
+    else
+        noutput = output_ndims
+        # 5 (u,v) and lower matrix of P
+        dec_nfilter = vcat([nscalar_per_obs_(output_ndims)],
+                           enc_nfilter_internal)
+    end
+
+
+    @info "Number of filters in encoder: $enc_nfilter"
+    @info "Number of filters in decoder: $dec_nfilter"
+
+
+    gamma = log(min_std_err^(-2))
+    @info "Gamma:             $gamma"
+
+    @info "Number of filters: $enc_nfilter"
+    if loss_weights_refine == (1.,)
+        steps = (DINCAE.recmodel4(
+            sz,
+                enc_nfilter,
+                dec_nfilter,
+                skipconnections,
+                method = upsampling_method),)
+    else
+        println("Step model")
 
+        enc_nfilter2 = copy(enc_nfilter)
+        enc_nfilter2[1] += dec_nfilter[1]
+        dec_nfilter2 = copy(dec_nfilter)
+        @info "Number of filters in encoder (refinement): $enc_nfilter2"
+        @info "Number of filters in decoder (refinement): $dec_nfilter2"
+
+        steps = (DINCAE.recmodel4(sz,enc_nfilter,dec_nfilter,skipconnections; method = upsampling_method),
+                 DINCAE.recmodel4(sz,enc_nfilter2,dec_nfilter2,skipconnections; method = upsampling_method))
+    end
+
+    if output_ndims == 1
+        model = StepModel(
+            steps,loss_weights_refine,truth_uncertain,gamma;
+            regularization_L1 = regularization_L1_beta,
+            regularization_L2 = regularization_L2_beta,
+            laplacian_penalty,
+            laplacian_error_penalty,
+        )
+    else
+        model = StepModel(
+            steps,loss_weights_refine,truth_uncertain,gamma;
+            final_layer = identity,
+            costfun = (xrec,xtrue) -> vector2_costfun(xrec,xtrue,truth_uncertain,direction_obs),
+            regularization_L1 = regularization_L1_beta,
+            regularization_L2 = regularization_L2_beta,
+            laplacian_penalty,
+            laplacian_error_penalty,
+        )
+    end
+
+    return model
+end
 
 
 """
@@ -366,6 +443,7 @@ function reconstruct(Atype,data_all,fnames_rec;
         error("No output will be saved. Consider to adjust save_epochs (currently $save_epochs) or epochs (currently $epochs).")
     end
 
+    device = _to_device(Atype)
     varname = data_all[1][1].varname
 
     @info "Number of threads: $(Threads.nthreads())"
@@ -408,15 +486,12 @@ function reconstruct(Atype,data_all,fnames_rec;
         output_varnames = all_varnames[train_data.isoutput]
         # number of output variables
         noutput = sum(train_data.isoutput)
-        dec_nfilter = vcat([2*noutput],enc_nfilter_internal)
     else
         @assert output_ndims == 2
         output_varnames = ["u","v"]
 
         noutput = output_ndims
-        # 5 (u,v) and lower matrix of P
-        dec_nfilter = vcat([nscalar_per_obs_(output_ndims)],
-                           enc_nfilter_internal)
+        direction_obs = device(direction_obs)
     end
 
     @info "Output variables:  $output_varnames"
@@ -434,69 +509,29 @@ function reconstruct(Atype,data_all,fnames_rec;
     @info "Input size:        $(format_size(sz))"
     @info "Input sum:         $(sum(inputs_))"
 
-    nvar = sz[end-1]
-    enc_nfilter = vcat([nvar],enc_nfilter_internal)
-
-    @info "Number of filters in encoder: $enc_nfilter"
-    @info "Number of filters in decoder: $dec_nfilter"
-
-
-    gamma = log(min_std_err^(-2))
-    @info "Gamma:             $gamma"
-
-    @info "Number of filters: $enc_nfilter"
-    if loss_weights_refine == (1.,)
-        steps = (DINCAE.recmodel4(
-            sz[1:end-2],
-                enc_nfilter,
-                dec_nfilter,
-                skipconnections,
-                method = upsampling_method),)
-    else
-        println("Step model")
 
-        enc_nfilter2 = copy(enc_nfilter)
-        enc_nfilter2[1] += dec_nfilter[1]
-        dec_nfilter2 = copy(dec_nfilter)
-        @info "Number of filters in encoder (refinement): $enc_nfilter2"
-        @info "Number of filters in decoder (refinement): $dec_nfilter2"
-
-        steps = (DINCAE.recmodel4(sz[1:end-2],enc_nfilter,dec_nfilter,skipconnections; method = upsampling_method),
-                 DINCAE.recmodel4(sz[1:end-2],enc_nfilter2,dec_nfilter2,skipconnections; method = upsampling_method))
-    end
-
-    if output_ndims == 1
-        model = StepModel(
-            steps,loss_weights_refine,truth_uncertain,gamma;
-            regularization_L1 = regularization_L1_beta,
-            regularization_L2 = regularization_L2_beta,
-            laplacian_penalty = laplacian_penalty,
-            laplacian_error_penalty = laplacian_error_penalty,
-        )
-    else
-        model = StepModel(
-            steps,loss_weights_refine,truth_uncertain,gamma;
-            final_layer = identity,
-            costfun = (xrec,xtrue) -> vector2_costfun(xrec,xtrue,truth_uncertain,Atype(direction_obs)),
-            regularization_L1 = regularization_L1_beta,
-            regularization_L2 = regularization_L2_beta,
-            laplacian_penalty = laplacian_penalty,
-            laplacian_error_penalty = laplacian_error_penalty,
-        )
-    end
-
-    device = _to_device(Atype)
+    model = genmodel(sz[1:end-2],sz[end-1],noutput;
+                     enc_nfilter_internal,
+                     upsampling_method,
+                     skipconnections,
+                     min_std_err,
+                     loss_weights_refine,
+                     truth_uncertain,
+                     output_ndims,
+                     direction_obs,
+                     regularization_L1_beta, regularization_L2_beta,
+                     laplacian_penalty, laplacian_error_penalty)
 
     @info "using device:      $device"
     model = model |> device
 
-    xrec = model(Atype(inputs_))
+    xrec = model(device(inputs_))
     @info "Output size:       $(format_size(size(xrec)))"
     @info "Output range:      $(extrema(Array(xrec)))"
     @info "Output sum:        $(sum(xrec))"
 
-    #loss = model(Atype(inputs_), Atype(xtrue))
-    loss = loss_function(model,Atype(inputs_), Atype(xtrue))
+    #loss = model(device(inputs_), device(xtrue))
+    loss = loss_function(model,device(inputs_), device(xtrue))
     @info "Initial loss:      $loss"
 
     losses = typeof(loss)[]