JuliaOptimalTransport · devmotion · May 29, 2021 · May 28, 2021 · May 28, 2021 · May 28, 2021
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
@@ -0,0 +1,16 @@
+steps:
+  - label: "Julia v1"
+    plugins:
+      - JuliaCI/julia#v1:
+          version: "1"
+      - JuliaCI/julia-test#v1:
+          coverage: false
+    agents:
+      queue: "juliagpu"
+      cuda: "*"
+    # Don't run Buildkite if the commit message includes the text [skip tests]
+    if: build.message !~ /\[skip tests\]/
+    timeout_in_minutes: 60
+
+env:
+  GROUP: GPU
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -49,6 +49,7 @@ jobs:
           coverage: ${{ matrix.coverage || false }}
         env:
           PYTHON: '' # Use Conda.jl also on Linux
+          GROUP: OptimalTransport
       - uses: julia-actions/julia-processcoverage@v1
         if: matrix.coverage
       - uses: codecov/codecov-action@v1

diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml
@@ -13,4 +13,4 @@ jobs:
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
-        run: julia -e 'using CompatHelper; CompatHelper.main(; subdirs = ["", "docs", "examples/basic"])'
+        run: julia -e 'using CompatHelper; CompatHelper.main(; subdirs = ["", "docs", "docs/gpu", "examples/basic"])'
diff --git a/.gitignore b/.gitignore
@@ -27,6 +27,8 @@ docs/src/examples/
 # committed for packages, but should be committed for applications that require a static
 # environment.
 /Manifest.toml
+/test/Manifest.toml
+/test/gpu/Manifest.toml
 
 # Files generated by Jupyter Notebooks
 *.ipynb_checkpoints
diff --git a/Project.toml b/Project.toml
@@ -23,10 +23,12 @@ QuadGK = "2"
 julia = "1"
 
 [extras]
+Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 PythonOT = "3c485715-4278-42b2-9b5f-8f00e43c12ef"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Tulip = "6dd1b50a-3aae-11e9-10b5-ef983d2400fa"
 
 [targets]
-test = ["Tulip", "PythonOT", "Random", "Test"]
+test = ["Pkg", "PythonOT", "Random", "SafeTestsets", "Test", "Tulip"]
diff --git a/test/entropic.jl b/test/entropic.jl
@@ -0,0 +1,146 @@
+using OptimalTransport
+
+using Distances
+using PythonOT: PythonOT
+
+using Random
+using Test
+
+const POT = PythonOT
+
+Random.seed!(100)
+
+@testset "entropic.jl" begin
+    @testset "sinkhorn" begin
+        M = 250
+        N = 200
+
+        @testset "example" begin
+            # create two uniform histograms
+            μ = fill(1 / M, M)
+            ν = fill(1 / N, N)
+
+            # create random cost matrix
+            C = pairwise(SqEuclidean(), rand(1, M), rand(1, N); dims=2)
+
+            # compute optimal transport map (Julia implementation + POT)
+            eps = 0.01
+            γ = sinkhorn(μ, ν, C, eps; maxiter=5_000, rtol=1e-9)
+            γ_pot = POT.sinkhorn(μ, ν, C, eps; numItermax=5_000, stopThr=1e-9)
+            @test γ_pot ≈ γ rtol = 1e-6
+
+            # compute optimal transport cost
+            c = sinkhorn2(μ, ν, C, eps; maxiter=5_000, rtol=1e-9)
+
+            # with regularization term
+            c_w_regularization = sinkhorn2(μ, ν, C, eps; maxiter=5_000, regularization=true)
+            @test c_w_regularization ≈ c + eps * sum(x -> iszero(x) ? x : x * log(x), γ)
+
+            # compare with POT
+            c_pot = POT.sinkhorn2(μ, ν, C, eps; numItermax=5_000, stopThr=1e-9)[1]
+            @test c_pot ≈ c
+
+            # ensure that provided map is used and correct
+            c2 = sinkhorn2(similar(μ), similar(ν), C, rand(); plan=γ)
+            @test c2 ≈ c
+            c2_w_regularization = sinkhorn2(
+                similar(μ), similar(ν), C, eps; plan=γ, regularization=true
+            )
+            @test c2_w_regularization ≈ c_w_regularization
+        end
+
+        # different element type
+        @testset "Float32" begin
+            # create two uniform histograms
+            μ = fill(Float32(1 / M), M)
+            ν = fill(Float32(1 / N), N)
+
+            # create random cost matrix
+            C = pairwise(SqEuclidean(), rand(Float32, 1, M), rand(Float32, 1, N); dims=2)
+
+            # compute optimal transport map (Julia implementation + POT)
+            eps = 0.01f0
+            γ = sinkhorn(μ, ν, C, eps; maxiter=5_000, rtol=1e-6)
+            @test eltype(γ) === Float32
+
+            γ_pot = POT.sinkhorn(μ, ν, C, eps; numItermax=5_000, stopThr=1e-6)
+            @test Float32.(γ_pot) ≈ γ rtol = 1e-3
+
+            # compute optimal transport cost
+            c = sinkhorn2(μ, ν, C, eps; maxiter=5_000, rtol=1e-6)
+            @test c isa Float32
+
+            # with regularization term
+            c_w_regularization = sinkhorn2(
+                μ, ν, C, eps; maxiter=5_000, rtol=1e-6, regularization=true
+            )
+            @test c_w_regularization ≈ c + eps * sum(x -> iszero(x) ? x : x * log(x), γ)
+
+            # compare with POT
+            c_pot = POT.sinkhorn2(μ, ν, C, eps; numItermax=5_000, stopThr=1e-6)[1]
+            @test Float32(c_pot) ≈ c rtol = 1e-3
+        end
+
+        @testset "deprecations" begin
+            # create two uniform histograms
+            μ = fill(1 / M, M)
+            ν = fill(1 / N, N)
+
+            # create random cost matrix
+            C = pairwise(SqEuclidean(), rand(1, M), rand(1, N); dims=2)
+
+            # check `sinkhorn2`
+            eps = 0.01
+            c = sinkhorn2(μ, ν, C, eps; atol=1e-6)
+            @test (@test_deprecated sinkhorn2(μ, ν, C, eps; tol=1e-6)) == c
+            c = sinkhorn2(μ, ν, C, eps; check_convergence=5)
+            @test (@test_deprecated sinkhorn2(μ, ν, C, eps; check_marginal_step=5)) == c
+
+            # check `sinkhorn_gibbs
+            K = @. exp(-C / eps)
+            γ = OptimalTransport.sinkhorn_gibbs(μ, ν, K; atol=1e-6)
+            @test (@test_deprecated OptimalTransport.sinkhorn_gibbs(μ, ν, K; tol=1e-6)) == γ
+            γ = OptimalTransport.sinkhorn_gibbs(μ, ν, K; check_convergence=5)
+            @test (@test_deprecated OptimalTransport.sinkhorn_gibbs(
+                μ, ν, K; check_marginal_step=5
+            )) == γ
+        end
+    end
+
+    @testset "sinkhorn_stabilized" begin
+        M = 250
+        N = 200
+
+        # create two uniform histograms
+        μ = fill(1 / M, M)
+        ν = fill(1 / N, N)
+
+        # create random cost matrix
+        C = pairwise(SqEuclidean(), rand(1, M), rand(1, N); dims=2)
+
+        # compute optimal transport map (Julia implementation + POT)
+        eps = 0.01
+        γ = sinkhorn_stabilized(μ, ν, C, eps)
+        γ_pot = POT.sinkhorn(μ, ν, C, eps; method="sinkhorn_stabilized")
+        @test γ ≈ γ_pot rtol = 1e-6
+    end
+
+    @testset "sinkhorn_barycenter" begin
+        # set up support
+        support = range(-1; stop=1, length=250)
+        μ1 = exp.(-(support .+ 0.5) .^ 2 ./ 0.1^2)
+        μ1 ./= sum(μ1)
+        μ2 = exp.(-(support .- 0.5) .^ 2 ./ 0.1^2)
+        μ2 ./= sum(μ2)
+        μ_all = hcat(μ1, μ2)'
+
+        # create cost matrix
+        C = pairwise(SqEuclidean(), support'; dims=2)
+
+        # compute Sinkhorn barycenter (Julia implementation + POT)
+        eps = 0.01
+        μ_interp = sinkhorn_barycenter(μ_all, [C, C], eps, [0.5, 0.5])
+        μ_interp_pot = POT.barycenter(μ_all', C, eps; weights=[0.5, 0.5], stopThr=1e-9)
+        @test μ_interp ≈ μ_interp_pot
+    end
+end
diff --git a/test/exact.jl b/test/exact.jl
@@ -0,0 +1,111 @@
+using OptimalTransport
+
+using Distances
+using PythonOT: PythonOT
+using Tulip
+using MathOptInterface
+using Distributions
+
+using LinearAlgebra
+using Random
+
+const MOI = MathOptInterface
+const POT = PythonOT
+
+Random.seed!(100)
+
+@testset "exact.jl" begin
+    @testset "Earth-Movers Distance" begin
+        M = 200
+        N = 250
+        μ = rand(M)
+        ν = rand(N)
+        μ ./= sum(μ)
+        ν ./= sum(ν)
+
+        @testset "example" begin
+            # create random cost matrix
+            C = pairwise(SqEuclidean(), rand(1, M), rand(1, N); dims=2)
+
+            # compute optimal transport map and cost with POT
+            pot_P = POT.emd(μ, ν, C)
+            pot_cost = POT.emd2(μ, ν, C)
+
+            # compute optimal transport map and cost with Tulip
+            lp = Tulip.Optimizer()
+            P = emd(μ, ν, C, lp)
+            @test size(C) == size(P)
+            @test MOI.get(lp, MOI.TerminationStatus()) == MOI.OPTIMAL
+            @test maximum(abs, P .- pot_P) < 1e-2
+
+            lp = Tulip.Optimizer()
+            cost = emd2(μ, ν, C, lp)
+            @test dot(C, P) ≈ cost atol = 1e-5
+            @test MOI.get(lp, MOI.TerminationStatus()) == MOI.OPTIMAL
+            @test cost ≈ pot_cost atol = 1e-5
+        end
+
+        @testset "pre-computed plan" begin
+            # create random cost matrix
+            C = pairwise(SqEuclidean(), rand(1, M), rand(1, N); dims=2)
+
+            # compute optimal transport map
+            P = emd(μ, ν, C, Tulip.Optimizer())
+
+            # do not use μ and ν to ensure that provided map is used
+            cost = emd2(similar(μ), similar(ν), C, Tulip.Optimizer(); plan=P)
+            @test cost ≈ emd2(μ, ν, C, Tulip.Optimizer())
+        end
+
+        # https://github.com/JuliaOptimalTransport/OptimalTransport.jl/issues/71
+        @testset "cost matrix with integers" begin
+            C = pairwise(SqEuclidean(), rand(1:10, 1, M), rand(1:10, 1, N); dims=2)
+            emd2(μ, ν, C, Tulip.Optimizer())
+        end
+    end
+
+    @testset "1D Optimal Transport for Convex Cost" begin
+        @testset "continuous distributions" begin
+            # two normal distributions (has analytical solution)
+            μ = Normal(randn(), rand())
+            ν = Normal(randn(), rand())
+
+            # compute OT plan
+            γ = ot_plan(sqeuclidean, μ, ν)
+            x = randn()
+            @test γ(x) ≈ quantile(ν, cdf(μ, x))
+
+            # compute OT cost
+            c = ot_cost(sqeuclidean, μ, ν)
+            @test c ≈ (mean(μ) - mean(ν))^2 + (std(μ) - std(ν))^2
+
+            # do not use ν to ensure that the provided plan is used
+            @test ot_cost(sqeuclidean, μ, Normal(randn(), rand()); plan=γ) ≈ c
+        end
+
+        @testset "semidiscrete case" begin
+            μ = Normal(randn(), rand())
+            νprobs = rand(30)
+            νprobs ./= sum(νprobs)
+            ν = Categorical(νprobs)
+
+            # compute OT plan
+            γ = ot_plan(euclidean, μ, ν)
+            x = randn()
+            @test γ(x) ≈ quantile(ν, cdf(μ, x))
+
+            # compute OT cost, without and with provided plan
+            # do not use ν in the second case to ensure that the provided plan is used
+            c = ot_cost(euclidean, μ, ν)
+            @test ot_cost(euclidean, μ, Categorical(reverse(νprobs)); plan=γ) ≈ c
+
+            # check that OT cost is consistent with OT cost of a discretization
+            m = 500
+            xs = rand(μ, m)
+            μdiscrete = fill(1 / m, m)
+            C = pairwise(Euclidean(), xs', (1:length(νprobs))'; dims=2)
+            c2 = emd2(μdiscrete, νprobs, C, Tulip.Optimizer())
+            @test c2 ≈ c rtol = 1e-1
+        end
+    end
+end
diff --git a/test/gpu/Project.toml b/test/gpu/Project.toml
@@ -0,0 +1,11 @@
+[deps]
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
+OptimalTransport = "7e02d93a-ae51-4f58-b602-d97af76e3b33"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[compat]
+CUDA = "3"
+Distances = "0.10"
+OptimalTransport = "0.3"
+julia = "1.6"
diff --git a/test/gpu/simple_gpu.jl b/test/gpu/simple_gpu.jl
@@ -0,0 +1,68 @@
+using OptimalTransport
+
+using CUDA
+using Distances
+
+using Random
+using Test
+
+CUDA.allowscalar(false)
+
+Random.seed!(100)
+
+@testset "simple_gpu.jl" begin
+    # ensure that a GPU is available
+    if !CUDA.functional()
+        @warn "skipped GPU tests: no GPU available"
+    else
+        @testset "sinkhorn" begin
+            # source histogram
+            m = 200
+            μ = rand(Float32, m)
+            μ ./= sum(μ)
+
+            # target histogram
+            n = 250
+            ν = rand(Float32, n)
+            ν ./= sum(ν)
+
+            # random cost matrix
+            C = pairwise(SqEuclidean(), randn(Float32, 1, m), randn(Float32, 1, n); dims=2)
+
+            # compute transport plan and cost on the GPU
+            ε = 0.01f0
+            γ = sinkhorn(cu(μ), cu(ν), cu(C), ε)
+            c = sinkhorn2(cu(μ), cu(ν), cu(C), ε)
+
+            # compare with results on the CPU
+            @test γ ≈ cu(sinkhorn(μ, ν, C, ε))
+            @test c ≈ cu(sinkhorn2(μ, ν, C, ε))
+        end
+
+        @testset "sinkhorn_unbalanced" begin
+            # source histogram
+            m = 200
+            μ = rand(Float32, m)
+            μ ./= 1.5f0 * sum(μ)
+
+            # target histogram
+            n = 250
+            ν = rand(Float32, n)
+            ν ./= sum(ν)
+
+            # random cost matrix
+            C = pairwise(SqEuclidean(), randn(Float32, 1, m), randn(Float32, 1, n); dims=2)
+
+            # compute transport plan and cost on the GPU
+            ε = 0.01f0
+            λ1 = 0.4f0
+            λ2 = 0.6f0
+            γ = sinkhorn_unbalanced(cu(μ), cu(ν), cu(C), λ1, λ2, ε)
+            c = sinkhorn_unbalanced2(cu(μ), cu(ν), cu(C), λ1, λ2, ε)
+
+            # compare with results on the CPU
+            @test γ ≈ cu(sinkhorn_unbalanced(μ, ν, C, λ1, λ2, ε))
+            @test c ≈ cu(sinkhorn_unbalanced2(μ, ν, C, λ1, λ2, ε))
+        end
+    end
+end