diff --git a/benchmark/benchmark_gpu_innertype.jl b/benchmark/benchmark_gpu_innertype.jl new file mode 100644 index 000000000..b1e639a2c --- /dev/null +++ b/benchmark/benchmark_gpu_innertype.jl @@ -0,0 +1,81 @@ +using Revise +using QuantumClifford +using QuantumClifford: to_cpu, to_gpu +using CUDA +using BenchmarkTools +using Plots +using ProgressBars +using DataFrames +using CSV + +include("surface-code-circuit.jl") + +ints = [UInt8, UInt16, UInt32, UInt64, UInt128] +names = [:UInt8, :UInt16, :UInt32, :UInt64, :UInt128] + +function get_stats() + println("Starting tests with $(Threads.nthreads()) threads out of `Sys.CPU_THREADS = $(Sys.CPU_THREADS)`...") + + # Powers of 2 to benchmark + powers = reverse(3:18) # start from the hardest one first! (fail fast) + n_values = 2 .^ powers + + times = Dict([ + sm=>[] + for (sm, tp) in zip(names, ints) + ]) + + ccircuit = if eltype(circ) <: QuantumClifford.CompactifiedGate + circ + else + compactify_circuit(circ) + end + + CUDA.allowscalar(false) + + + for n in ProgressBar(n_values) + for (sm, T) in zip(names, ints) + gpu_frames() = to_gpu(QuantumClifford._create_pauliframe(ccircuit; trajectories=n), T) + gpu_column() = pftrajectories(fastcolumn(gpu_frames()), ccircuit), synchronize() + @show n, T + @elapsed gpu_column() # to eliminate compile time + push!(times[sm], @belapsed $gpu_column()) + end + # note. this is also measuring the time it takes to move data to gpu + end + df = DataFrame(times) + df.n_values=n_values + return df +end + +function load_stats() + if isfile("gpu_innertype_comparison.csv") + DataFrame(CSV.File("gpu_innertype_comparison.csv")) + else + get_stats() + end +end + +function save_stats(df) + CSV.write("gpu_innertype_comparison.csv", df) +end + +function plot_result(df) + my_y_axis(arrs) = begin + arr = [] + for a in arrs + append!(arr, a) + end + logarr = log.(arr) + exp.(range(min(logarr...), stop=max(logarr...), length=8)) + end + plot(df.n_values, [df[!, col] for col in names], marker=:circle, label=reshape(["times $col" for col in names], (1, length(names))), xlabel="n", ylabel="Execution Time (s)", title="gpu inner type comparison (fast column)", xticks=df.n_values[1:2:end], xscale=:log2, yticks=my_y_axis([df[!, col] for col in names]), yscale=:log10, legend=:topleft, size=(1200, 800)) +end + +function main() + df = load_stats() + save_stats(df) + plot_result(df) + savefig("benchmark_gpu_innertype.png") +end diff --git a/ext/QuantumCliffordGPUExt/adapters.jl b/ext/QuantumCliffordGPUExt/adapters.jl index 5b009728a..86d91685a 100644 --- a/ext/QuantumCliffordGPUExt/adapters.jl +++ b/ext/QuantumCliffordGPUExt/adapters.jl @@ -1,33 +1,40 @@ -# const InnerGPUType = UInt64; +const InnerGPUType = UInt32; +const InnerCPUType = UInt64; # todo. make the conversion types consiting with this... # also define a cpu default type?! -to_gpu(array::AbstractArray) = CuArray(array); +to_gpu(array::AbstractArray) = CuArray(array) to_cpu(array::AbstractArray) = Array(array); +# changes the type to InnerGPUType or InnerCPUType as well as copying to cpu/gpu +to_gpu(array::AbstractArray, tp::Type{T}) where {T <: Unsigned} = + CuArray(reinterpret(T, collect(array))) +to_cpu(array::AbstractArray, tp::Type{T}) where {T <: Unsigned} = + Array(reinterpret(T, collect(array))) + # maybe change the format of storing the data in gpu array # so that it is more convinient to work with them on gpu? # todo later add some type checking to avoid copying (or throw error) if the data is already on gpu/cpu -to_gpu(tab::QuantumClifford.Tableau) = - QuantumClifford.Tableau(to_gpu(tab.phases), tab.nqubits, to_gpu(tab.xzs)) +to_gpu(tab::QuantumClifford.Tableau, tp::Type{T}=InnerGPUType) where {T <: Unsigned} = + QuantumClifford.Tableau(to_gpu(tab.phases), tab.nqubits, to_gpu(tab.xzs, tp)) -to_cpu(tab::QuantumClifford.Tableau) = - QuantumClifford.Tableau(to_cpu(tab.phases), tab.nqubits, to_cpu(tab.xzs)) +to_cpu(tab::QuantumClifford.Tableau, tp::Type{T}=InnerCPUType) where {T <: Unsigned} = + QuantumClifford.Tableau(to_cpu(tab.phases), tab.nqubits, to_cpu(tab.xzs, tp)) -to_gpu(pauli::QuantumClifford.PauliOperator) = - QuantumClifford.PauliOperator(to_gpu(pauli.phase), pauli.nqubits, to_gpu(pauli.xz)) +to_gpu(pauli::QuantumClifford.PauliOperator, tp::Type{T}=InnerGPUType) where {T <: Unsigned} = + QuantumClifford.PauliOperator(to_gpu(pauli.phase), pauli.nqubits, to_gpu(pauli.xz, tp)) -to_cpu(pauli::QuantumClifford.PauliOperator) = - QuantumClifford.PauliOperator(to_cpu(pauli.phase), pauli.nqubits, to_cpu(pauli.xz)) +to_cpu(pauli::QuantumClifford.PauliOperator, tp::Type{T}=InnerCPUType) where {T <: Unsigned} = + QuantumClifford.PauliOperator(to_cpu(pauli.phase), pauli.nqubits, to_cpu(pauli.xz, tp)) -to_gpu(stabilizer::QuantumClifford.Stabilizer) = - Stabilizer(to_gpu(tab(stabilizer))) +to_gpu(stabilizer::QuantumClifford.Stabilizer, tp::Type{T}=InnerGPUType) where {T <: Unsigned} = + Stabilizer(to_gpu(tab(stabilizer), tp)) -to_cpu(stabilizer::QuantumClifford.Stabilizer) = - Stabilizer(to_cpu(tab(stabilizer))) +to_cpu(stabilizer::QuantumClifford.Stabilizer, tp::Type{T}=InnerCPUType) where {T <: Unsigned} = + Stabilizer(to_cpu(tab(stabilizer), tp)) -to_gpu(pauli_frame::QuantumClifford.PauliFrame) = - QuantumClifford.PauliFrame(to_gpu(pauli_frame.frame), to_gpu(pauli_frame.measurements)) +to_gpu(pauli_frame::QuantumClifford.PauliFrame, tp::Type{T}=InnerGPUType) where {T <: Unsigned} = + QuantumClifford.PauliFrame(to_gpu(pauli_frame.frame, tp), to_gpu(pauli_frame.measurements)) -to_cpu(pauli_frame::QuantumClifford.PauliFrame) = - QuantumClifford.PauliFrame(to_cpu(pauli_frame.frame), to_cpu(pauli_frame.measurements)) +to_cpu(pauli_frame::QuantumClifford.PauliFrame, tp::Type{T}=InnerCPUType) where {T <: Unsigned} = + QuantumClifford.PauliFrame(to_cpu(pauli_frame.frame, tp), to_cpu(pauli_frame.measurements)) diff --git a/ext/QuantumCliffordGPUExt/apply_noise.jl b/ext/QuantumCliffordGPUExt/apply_noise.jl index 57b4dfebf..ecfcd647e 100644 --- a/ext/QuantumCliffordGPUExt/apply_noise.jl +++ b/ext/QuantumCliffordGPUExt/apply_noise.jl @@ -27,6 +27,7 @@ function applynoise_kernel(xzs::DeviceMatrix{Tme}, return nothing end + r = rand() if r < p # X error xzs[ibig,f] ⊻= ismallm