From f3fcf0f5e942be163af38fa34ecabcd97f1ba3b5 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 20 Oct 2021 14:48:51 -0400 Subject: [PATCH 1/6] add flux based benchmark environment --- .gitignore | 2 +- benchmarks/.gitignore | 2 + benchmarks/Manifest.toml | 169 +++++++++++++++++++++++++++++++++++++++ benchmarks/Project.toml | 3 + benchmarks/run.jl | 65 +++++++++++++++ benchmarks/spack.yaml | 11 +++ 6 files changed, 251 insertions(+), 1 deletion(-) create mode 100644 benchmarks/.gitignore create mode 100644 benchmarks/Manifest.toml create mode 100644 benchmarks/Project.toml create mode 100644 benchmarks/run.jl create mode 100644 benchmarks/spack.yaml diff --git a/.gitignore b/.gitignore index 5278359..aa46e26 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ res -Manifest.toml +./Manifest.toml Enzyme-GPU-Tests diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore new file mode 100644 index 0000000..3d5d678 --- /dev/null +++ b/benchmarks/.gitignore @@ -0,0 +1,2 @@ +.spack-env +spack.lock diff --git a/benchmarks/Manifest.toml b/benchmarks/Manifest.toml new file mode 100644 index 0000000..c3f2e3c --- /dev/null +++ b/benchmarks/Manifest.toml @@ -0,0 +1,169 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.7.0-beta3.0" +manifest_format = "2.0" + +[[deps.ArgTools]] +uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" + +[[deps.Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" + +[[deps.Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[deps.CEnum]] +git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9" +uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" +version = "0.4.1" + +[[deps.Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[deps.Downloads]] +deps = ["ArgTools", "LibCURL", "NetworkOptions"] +uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" + +[[deps.FluxRM]] +deps = ["CEnum", "JSON3", "Libdl", "StructTypes"] +git-tree-sha1 = "2890941804efd695679e00c993157d0c32be9c6c" +repo-rev = "main" +repo-url = "https://github.com/flux-framework/FluxRM.jl" +uuid = "e7b9fac9-e441-4388-a71b-956020f2d1a2" +version = "0.1.0" + +[[deps.InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[deps.JSON3]] +deps = ["Dates", "Mmap", "Parsers", "StructTypes", "UUIDs"] +git-tree-sha1 = "7d58534ffb62cd947950b3aa9b993e63307a6125" +uuid = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" +version = "1.9.2" + +[[deps.LazyArtifacts]] +deps = ["Artifacts", "Pkg"] +uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" + +[[deps.LibCURL]] +deps = ["LibCURL_jll", "MozillaCACerts_jll"] +uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" + +[[deps.LibCURL_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" + +[[deps.LibGit2]] +deps = ["Base64", "NetworkOptions", "Printf", "SHA"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + +[[deps.LibSSH2_jll]] +deps = ["Artifacts", "Libdl", "MbedTLS_jll"] +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[deps.Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[deps.Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[deps.MbedTLS_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" + +[[deps.Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" + +[[deps.MozillaCACerts_jll]] +uuid = "14a3606d-f60d-562e-9121-12d972cd8159" + +[[deps.NetworkOptions]] +uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" + +[[deps.PackageCompiler]] +deps = ["Artifacts", "LazyArtifacts", "Libdl", "Pkg", "RelocatableFolders", "UUIDs"] +git-tree-sha1 = "a965dd53ccaa69010d62851ab73d4e0c3d098314" +uuid = "9b87118b-4619-50d2-8e1e-99f35a4d4d9d" +version = "1.7.6" + +[[deps.Parsers]] +deps = ["Dates"] +git-tree-sha1 = "98f59ff3639b3d9485a03a72f3ab35bab9465720" +uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" +version = "2.0.6" + +[[deps.Pkg]] +deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" + +[[deps.Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[deps.REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[deps.Random]] +deps = ["Serialization"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[deps.RelocatableFolders]] +deps = ["SHA", "Scratch"] +git-tree-sha1 = "df2be5142a2a3db2da37b21d87c9fa7973486bfd" +uuid = "05181044-ff0b-4ac5-8273-598c1e38db00" +version = "0.1.2" + +[[deps.SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" + +[[deps.Scratch]] +deps = ["Dates"] +git-tree-sha1 = "0b4b7f1393cff97c33891da2a0bf69c6ed241fda" +uuid = "6c6a2e73-6563-6170-7368-637461726353" +version = "1.1.0" + +[[deps.Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[deps.Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" + +[[deps.StructTypes]] +deps = ["Dates", "UUIDs"] +git-tree-sha1 = "d24a825a95a6d98c385001212dc9020d609f2d4f" +uuid = "856f2bd8-1eba-4b0a-8007-ebc267875bd4" +version = "1.8.1" + +[[deps.TOML]] +deps = ["Dates"] +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" + +[[deps.Tar]] +deps = ["ArgTools", "SHA"] +uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" + +[[deps.UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + +[[deps.Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[deps.Zlib_jll]] +deps = ["Libdl"] +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" + +[[deps.nghttp2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" + +[[deps.p7zip_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" diff --git a/benchmarks/Project.toml b/benchmarks/Project.toml new file mode 100644 index 0000000..f1c8b5c --- /dev/null +++ b/benchmarks/Project.toml @@ -0,0 +1,3 @@ +[deps] +FluxRM = "e7b9fac9-e441-4388-a71b-956020f2d1a2" +PackageCompiler = "9b87118b-4619-50d2-8e1e-99f35a4d4d9d" diff --git a/benchmarks/run.jl b/benchmarks/run.jl new file mode 100644 index 0000000..eab3ac7 --- /dev/null +++ b/benchmarks/run.jl @@ -0,0 +1,65 @@ +using FluxRM + +function __get_treedict!(dict, path...) + next_dict = dict + for node in path + next_dict = get!(()-> Dict{String, Any}(), next_dict, node) + end + return next_dict +end + +function __set_io_path(spec::FluxRM.JobSpec.Jobspec, iotype, name, path) + system = spec.attributes.system + if system.shell === nothing + system.shell = Dict{String, Any}() + end + io = __get_treedict!(system.shell, "options", iotype, name) + + io["type"] = "file" + io["path"] = path +end + +function juliaspec(args, dir; num_nodes=1, num_tasks_per_node=6, cores_per_task=6) + num_tasks = num_nodes*num_tasks_per_node + cmd = `$(Base.julia_cmd()) $(args)` + jobspec = FluxRM.JobSpec.from_command(cmd; num_nodes, num_tasks, cores_per_task) + system = jobspec.attributes.system + system.cwd = dir + system.environment = Dict( + "JULIA_PROJECT" => dir, + "OPENBLAS_NUM_THREADS" => "8" # HyperThreads + ) + __set_io_path(jobspec, "output", "stderr", "flux-{{id}}.err") + __set_io_path(jobspec, "output", "stdout", "flux-{{id}}.out") + @assert FluxRM.JobSpec.validate(jobspec, 1) + + # FluxRM.JSON3.pretty(FluxRM.JSON3.write(jobspec)) + + jobspec +end + +# Notes +# - Flux treats hyper-threads as a single core + +function nodes() + rpc = fetch(FluxRM.RPC(Flux(), "resource.status", nodeid=0)) + R = first(rpc.R.execution.R_lite) + + hosts = FluxRM.IDSet(R.rank) + Int(length(hosts)) +end + +const N = nodes() + +let flux = Flux() + + for i in 0:floor(Int,log2(N)) + n = 2^i + for psize in (20,) + jobspec = juliaspec(`-L setup.jl experiment.jl $psize`, realpath("experiment"), num_nodes=n) + sub = FluxRM.submit(flux, jobspec) + job = FluxRM.Job(sub) + @info "Launched" jobid = FluxRM.encode(job) n psize + end + end +end diff --git a/benchmarks/spack.yaml b/benchmarks/spack.yaml new file mode 100644 index 0000000..eb294a7 --- /dev/null +++ b/benchmarks/spack.yaml @@ -0,0 +1,11 @@ +# This is a Spack Environment file. +# +# It describes a set of packages to be installed, along with +# configuration settings. +spack: + # add package specs to the `specs` list + specs: + - flux-sched + - flux-core + concretization: together + view: true From f42232f39432c6b51269c5842b38b82b63dd8997 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 25 Oct 2021 08:49:49 -0400 Subject: [PATCH 2/6] add readme --- benchmarks/README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 benchmarks/README.md diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 0000000..f4a7310 --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,15 @@ +# Running the benchmakrs + +## Installation +``` +spack env activate . +spack concretize +despacktivate +``` + +## Launching +``` +spack env activate . +flux start +julia --project=. run.jl +``` From cacfadae57f6969806aa23447844856dbbb19b19 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 25 Oct 2021 09:08:47 -0400 Subject: [PATCH 3/6] Wireup for MPI not yet working --- benchmarks/Manifest.toml | 2 +- benchmarks/run.jl | 21 +++++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/benchmarks/Manifest.toml b/benchmarks/Manifest.toml index c3f2e3c..722dea2 100644 --- a/benchmarks/Manifest.toml +++ b/benchmarks/Manifest.toml @@ -1,6 +1,6 @@ # This file is machine-generated - editing it directly is not advised -julia_version = "1.7.0-beta3.0" +julia_version = "1.7.0-rc1" manifest_format = "2.0" [[deps.ArgTools]] diff --git a/benchmarks/run.jl b/benchmarks/run.jl index eab3ac7..b100b09 100644 --- a/benchmarks/run.jl +++ b/benchmarks/run.jl @@ -19,15 +19,16 @@ function __set_io_path(spec::FluxRM.JobSpec.Jobspec, iotype, name, path) io["path"] = path end -function juliaspec(args, dir; num_nodes=1, num_tasks_per_node=6, cores_per_task=6) +function juliaspec(args, dir; num_nodes=1, num_tasks_per_node=8, cores_per_task=1) num_tasks = num_nodes*num_tasks_per_node cmd = `$(Base.julia_cmd()) $(args)` jobspec = FluxRM.JobSpec.from_command(cmd; num_nodes, num_tasks, cores_per_task) system = jobspec.attributes.system system.cwd = dir system.environment = Dict( - "JULIA_PROJECT" => dir, - "OPENBLAS_NUM_THREADS" => "8" # HyperThreads + "JULIA_PROJECT" => dir, + "JULIA_NUM_THREADS" => cores_per_task, + "JULIA_EXCLUSIVE" => 1 ) __set_io_path(jobspec, "output", "stderr", "flux-{{id}}.err") __set_io_path(jobspec, "output", "stdout", "flux-{{id}}.out") @@ -50,16 +51,16 @@ function nodes() end const N = nodes() +const workdir = realpath(joinpath(@__DIR__, "..", "examples")) -let flux = Flux() +@info "Launching Jobs in " workdir +let flux = Flux() for i in 0:floor(Int,log2(N)) n = 2^i - for psize in (20,) - jobspec = juliaspec(`-L setup.jl experiment.jl $psize`, realpath("experiment"), num_nodes=n) - sub = FluxRM.submit(flux, jobspec) - job = FluxRM.Job(sub) - @info "Launched" jobid = FluxRM.encode(job) n psize - end + jobspec = juliaspec(`benchmark.jl -s 45`, workdir, num_nodes=n) + sub = FluxRM.submit(flux, jobspec) + job = FluxRM.Job(sub) + @info "Launched" jobid = FluxRM.encode(job) n end end From 02cd09e2f4221e7629cb3387970d8332405a02ef Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 25 Oct 2021 11:07:26 -0400 Subject: [PATCH 4/6] add missing --mpi --- benchmarks/README.md | 6 ++++++ benchmarks/run.jl | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index f4a7310..a9b6a2c 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -1,12 +1,18 @@ # Running the benchmakrs ## Installation + +In `benchmarks/`: + ``` spack env activate . spack concretize despacktivate +spack env activate . +julia --project. -e "import Pkg; Pkg.instantiate()" ``` + ## Launching ``` spack env activate . diff --git a/benchmarks/run.jl b/benchmarks/run.jl index b100b09..c5f525e 100644 --- a/benchmarks/run.jl +++ b/benchmarks/run.jl @@ -58,7 +58,7 @@ const workdir = realpath(joinpath(@__DIR__, "..", "examples")) let flux = Flux() for i in 0:floor(Int,log2(N)) n = 2^i - jobspec = juliaspec(`benchmark.jl -s 45`, workdir, num_nodes=n) + jobspec = juliaspec(`benchmark.jl -s 45 --mpi`, workdir, num_nodes=n) sub = FluxRM.submit(flux, jobspec) job = FluxRM.Job(sub) @info "Launched" jobid = FluxRM.encode(job) n From 9bb540d8fa13977ea06244c38b71ff175d3bd072 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 9 Nov 2021 16:06:36 -0500 Subject: [PATCH 5/6] build env --- benchmarks/run.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/benchmarks/run.jl b/benchmarks/run.jl index c5f525e..e99ae27 100644 --- a/benchmarks/run.jl +++ b/benchmarks/run.jl @@ -53,6 +53,12 @@ end const N = nodes() const workdir = realpath(joinpath(@__DIR__, "..", "examples")) +@info "Instantiate" workdir +cd(workdir) do + run(`$(Base.julia_cmd()) --project=. -e 'import Pkg; Pkg.instantiate()'`) + run(`$(Base.julia_cmd()) --project=. -e 'import Pkg; Pkg.build()'`) +end + @info "Launching Jobs in " workdir let flux = Flux() From ca8a643e47c36c5d24612f2c953d0f2cc6d5c84c Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 9 Nov 2021 16:06:51 -0500 Subject: [PATCH 6/6] work around quirks in my environment --- benchmarks/spack.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benchmarks/spack.yaml b/benchmarks/spack.yaml index eb294a7..7da5fb7 100644 --- a/benchmarks/spack.yaml +++ b/benchmarks/spack.yaml @@ -7,5 +7,7 @@ spack: specs: - flux-sched - flux-core + - libzmq@4.3.2 + - hwloc ~rocm ~pci concretization: together view: true