From 5f3741e4044e814a58ad1aa0989eae431c73645c Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Sat, 7 Oct 2023 10:23:31 +0100 Subject: [PATCH 1/6] Add init/read timing for C++ models --- src/main.cpp | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/src/main.cpp b/src/main.cpp index d946d775..13059494 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -306,7 +306,9 @@ void run() #endif + auto init1 = std::chrono::high_resolution_clock::now(); stream->init_arrays(startA, startB, startC); + auto init2 = std::chrono::high_resolution_clock::now(); // Result of the Dot kernel, if used. T sum = 0.0; @@ -333,7 +335,54 @@ void run() std::vector c(ARRAY_SIZE); + auto read1 = std::chrono::high_resolution_clock::now(); stream->read_arrays(a, b, c); + auto read2 = std::chrono::high_resolution_clock::now(); + + auto initElapsedS = std::chrono::duration_cast>(read2 - read1).count(); + auto readElapsedS = std::chrono::duration_cast>(init2 - init1).count(); + auto initBWps = ((mibibytes ? std::pow(2.0, -20.0) : 1.0E-6) * (3 * sizeof(T) * ARRAY_SIZE)) / initElapsedS; + auto readBWps = ((mibibytes ? std::pow(2.0, -20.0) : 1.0E-6) * (3 * sizeof(T) * ARRAY_SIZE)) / readElapsedS; + + if (output_as_csv) + { + std::cout + << "phase" << csv_separator + << "n_elements" << csv_separator + << "sizeof" << csv_separator + << ((mibibytes) ? "max_mibytes_per_sec" : "max_mbytes_per_sec") << csv_separator + << "runtime" << std::endl; + std::cout + << "Init" << csv_separator + << ARRAY_SIZE << csv_separator + << sizeof(T) << csv_separator + << initBWps << csv_separator + << initElapsedS << std::endl; + std::cout + << "Read" << csv_separator + << ARRAY_SIZE << csv_separator + << sizeof(T) << csv_separator + << readBWps << csv_separator + << readElapsedS << std::endl; + } + else + { + std::cout << "Init: " + << std::setw(7) + << initElapsedS + << " s (=" + << initBWps + << (mibibytes ? " MiBytes/sec" : " MBytes/sec") + << ")" << std::endl; + std::cout << "Read: " + << std::setw(7) + << readElapsedS + << " s (=" + << readBWps + << (mibibytes ? " MiBytes/sec" : " MBytes/sec") + << ")" << std::endl; + } + check_solution(num_times, a, b, c, sum); // Display timing results From 512a6fac0c43ca964d203a5f1cd0809a21219518 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Sat, 7 Oct 2023 11:16:46 +0100 Subject: [PATCH 2/6] Add init/read timing for Rust --- src/rust/rust-stream/rustfmt.toml | 2 +- src/rust/rust-stream/src/lib.rs | 47 +++++++++++++++++-- src/rust/rust-stream/src/stream.rs | 12 +++++ .../rust-stream/tests/integration_test.rs | 8 ++-- 4 files changed, 60 insertions(+), 9 deletions(-) diff --git a/src/rust/rust-stream/rustfmt.toml b/src/rust/rust-stream/rustfmt.toml index aa2f0e9a..66b62356 100644 --- a/src/rust/rust-stream/rustfmt.toml +++ b/src/rust/rust-stream/rustfmt.toml @@ -54,7 +54,7 @@ use_field_init_shorthand = false force_explicit_abi = true condense_wildcard_suffixes = false color = "Auto" -required_version = "1.4.38" +required_version = "1.6.0" unstable_features = false disable_all_formatting = false skip_children = false diff --git a/src/rust/rust-stream/src/lib.rs b/src/rust/rust-stream/src/lib.rs index 3ac72c31..41ac0c28 100644 --- a/src/rust/rust-stream/src/lib.rs +++ b/src/rust/rust-stream/src/lib.rs @@ -174,7 +174,7 @@ where StreamData: RustStream { ); } - stream.init_arrays(); + let init = stream.run_init_arrays(); let tabulate = |xs: &Vec, name: &str, t_size: usize| -> Vec<(&str, String)> { let tail = &xs[1..]; // tail only @@ -235,10 +235,47 @@ where StreamData: RustStream { }; }; + let show_setup = |init: Duration, read: Duration| { + let setup = vec![ + ("Init", init.as_secs_f64(), 3 * array_bytes), + ("Read", read.as_secs_f64(), 3 * array_bytes), + ]; + if option.csv { + tabulate_all( + setup + .iter() + .map(|(name, elapsed, t_size)| { + vec![ + ("phase", name.to_string()), + ("n_elements", option.arraysize.to_string()), + ("sizeof", t_size.to_string()), + ( + if option.mibibytes { "max_mibytes_per_sec" } else { "max_mbytes_per_sec" }, + (mega_scale * (*t_size as f64) / elapsed).to_string(), + ), + ("runtime", elapsed.to_string()), + ] + }) + .collect::>(), + ); + } else { + for (name, elapsed, t_size) in setup { + println!( + "{}: {:.5} s (={:.5} {})", + name, + elapsed, + mega_scale * (t_size as f64) / elapsed, + if option.mibibytes { "MiBytes/sec" } else { "MBytes/sec" } + ); + } + } + }; + let solutions_correct = match benchmark { Benchmark::All => { let (results, sum) = stream.run_all(option.numtimes); - stream.read_arrays(); + let read = stream.run_read_arrays(); + show_setup(init, read); let correct = check_solution(benchmark, option.numtimes, &stream, Some(sum)); tabulate_all(vec![ tabulate(&results.copy, "Copy", 2 * array_bytes), @@ -251,14 +288,16 @@ where StreamData: RustStream { } Benchmark::NStream => { let results = stream.run_nstream(option.numtimes); - stream.read_arrays(); + let read = stream.run_read_arrays(); + show_setup(init, read); let correct = check_solution(benchmark, option.numtimes, &stream, None); tabulate_all(vec![tabulate(&results, "Nstream", 4 * array_bytes)]); correct } Benchmark::Triad => { let results = stream.run_triad(option.numtimes); - stream.read_arrays(); + let read = stream.run_read_arrays(); + show_setup(init, read); let correct = check_solution(benchmark, option.numtimes, &stream, None); let total_bytes = 3 * array_bytes * option.numtimes; let bandwidth = giga_scale * (total_bytes as f64 / results.as_secs_f64()); diff --git a/src/rust/rust-stream/src/stream.rs b/src/rust/rust-stream/src/stream.rs index 560c6f1e..86de56b2 100644 --- a/src/rust/rust-stream/src/stream.rs +++ b/src/rust/rust-stream/src/stream.rs @@ -132,6 +132,18 @@ pub trait RustStream { fn nstream(&mut self); fn dot(&mut self) -> T; + fn run_init_arrays(&mut self) -> Duration { + timed(|| { + self.init_arrays(); + }) + } + + fn run_read_arrays(&mut self) -> Duration { + timed(|| { + self.read_arrays(); + }) + } + fn run_all(&mut self, n: usize) -> (AllTiming>, T) { let mut timings: AllTiming> = AllTiming { copy: vec![Duration::default(); n], diff --git a/src/rust/rust-stream/tests/integration_test.rs b/src/rust/rust-stream/tests/integration_test.rs index 8031a794..01705465 100644 --- a/src/rust/rust-stream/tests/integration_test.rs +++ b/src/rust/rust-stream/tests/integration_test.rs @@ -2,10 +2,10 @@ use rstest::rstest; #[rstest] fn test_main( - #[values(0, 1, 2, 3, 4)] device: usize, // - #[values("", "--pin")] pin: &str, // - #[values("", "--malloc")] malloc: &str, // - #[values("", "--init")] init: &str, // + #[values(0, 1, 2, 3, 4)] device: usize, // + #[values("", "--pin")] pin: &str, // + #[values("", "--malloc")] malloc: &str, // + #[values("", "--init")] init: &str, // #[values("", "--triad-only", "--nstream-only")] option: &str, // ) { let line = format!( From 971d1e8ac72b6d4fb76874d04a6a4873983f41e1 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Sat, 7 Oct 2023 12:10:08 +0100 Subject: [PATCH 3/6] Add init/read timing for Scala --- src/scala/scala-stream/.bsp/sbt.json | 1 - src/scala/scala-stream/.gitignore | 1 + src/scala/scala-stream/.scalafmt.conf | 2 +- src/scala/scala-stream/build.sbt | 13 ++-- .../scala-stream/project/build.properties | 2 +- src/scala/scala-stream/project/plugins.sbt | 6 +- .../main/scala/scalastream/ScalaStream.scala | 63 ++++++++++++++----- 7 files changed, 63 insertions(+), 25 deletions(-) delete mode 100644 src/scala/scala-stream/.bsp/sbt.json diff --git a/src/scala/scala-stream/.bsp/sbt.json b/src/scala/scala-stream/.bsp/sbt.json deleted file mode 100644 index 2e1edb1e..00000000 --- a/src/scala/scala-stream/.bsp/sbt.json +++ /dev/null @@ -1 +0,0 @@ -{"name":"sbt","version":"1.5.2","bspVersion":"2.0.0-M5","languages":["scala"],"argv":["/usr/lib/jvm/java-11-openjdk-11.0.11.0.9-2.fc33.x86_64/bin/java","-Xms100m","-Xmx100m","-classpath","/home/tom/.local/share/JetBrains/Toolbox/apps/IDEA-U/ch-0/211.7142.45.plugins/Scala/launcher/sbt-launch.jar","xsbt.boot.Boot","-bsp","--sbt-launch-jar=/home/tom/.local/share/JetBrains/Toolbox/apps/IDEA-U/ch-0/211.7142.45.plugins/Scala/launcher/sbt-launch.jar"]} \ No newline at end of file diff --git a/src/scala/scala-stream/.gitignore b/src/scala/scala-stream/.gitignore index 2f7896d1..ee5cda21 100644 --- a/src/scala/scala-stream/.gitignore +++ b/src/scala/scala-stream/.gitignore @@ -1 +1,2 @@ target/ +.bsp/ diff --git a/src/scala/scala-stream/.scalafmt.conf b/src/scala/scala-stream/.scalafmt.conf index 8c7d0c8e..5d87df36 100644 --- a/src/scala/scala-stream/.scalafmt.conf +++ b/src/scala/scala-stream/.scalafmt.conf @@ -1,4 +1,4 @@ -version = "3.0.0-RC2" +version = "3.7.14" runner.dialect = scala3 style = defaultWithAlign diff --git a/src/scala/scala-stream/build.sbt b/src/scala/scala-stream/build.sbt index 49164f63..b13fda3e 100644 --- a/src/scala/scala-stream/build.sbt +++ b/src/scala/scala-stream/build.sbt @@ -3,7 +3,7 @@ lazy val mainCls = Some("scalastream.App") lazy val root = (project in file(".")) .enablePlugins(NativeImagePlugin) .settings( - scalaVersion := "3.0.0", + scalaVersion := "3.3.1", version := "4.0", organization := "uk.ac.bristol.uob-hpc", organizationName := "University of Bristol", @@ -11,6 +11,11 @@ lazy val root = (project in file(".")) assembly / mainClass := mainCls, scalacOptions ~= filterConsoleScalacOptions, assembly / assemblyJarName := "scala-stream.jar", + assembly / assemblyMergeStrategy := { + case PathList("module-info.class") => MergeStrategy.discard + case PathList("META-INF", "versions", xs @ _, "module-info.class") => MergeStrategy.discard + case x => (ThisBuild / assemblyMergeStrategy).value(x) + }, nativeImageOptions := Seq( "--no-fallback", "-H:ReflectionConfigurationFiles=../../reflect-config.json" @@ -22,8 +27,8 @@ lazy val root = (project in file(".")) // Lazy val implementation in Scala 3 triggers an exception in nativeImage, use 2_13 for arg parsing for now otherwise we can't get to the benchmarking part ("com.github.scopt" %% "scopt" % "4.0.1").cross(CrossVersion.for3Use2_13), // par also uses lazy val at some point, so it doesn't work in nativeImage - "org.scala-lang.modules" %% "scala-parallel-collections" % "1.0.3", - "net.openhft" % "affinity" % "3.21ea1", - "org.slf4j" % "slf4j-simple" % "1.7.30" // for affinity + "org.scala-lang.modules" %% "scala-parallel-collections" % "1.0.4", + "net.openhft" % "affinity" % "3.23.2", + "org.slf4j" % "slf4j-simple" % "2.0.5" // for affinity ) ) diff --git a/src/scala/scala-stream/project/build.properties b/src/scala/scala-stream/project/build.properties index 19479ba4..875b706a 100644 --- a/src/scala/scala-stream/project/build.properties +++ b/src/scala/scala-stream/project/build.properties @@ -1 +1 @@ -sbt.version=1.5.2 +sbt.version=1.9.2 diff --git a/src/scala/scala-stream/project/plugins.sbt b/src/scala/scala-stream/project/plugins.sbt index 2c82902e..35a00f05 100644 --- a/src/scala/scala-stream/project/plugins.sbt +++ b/src/scala/scala-stream/project/plugins.sbt @@ -1,6 +1,6 @@ addSbtPlugin("com.timushev.sbt" % "sbt-updates" % "0.5.3") -addSbtPlugin("io.github.davidgregory084" % "sbt-tpolecat" % "0.1.17") +addSbtPlugin("io.github.davidgregory084" % "sbt-tpolecat" % "0.1.20") addSbtPlugin("org.scalameta" % "sbt-native-image" % "0.3.0") -addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.15.0") +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.3") addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.9.27") -addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.2") +addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.3") diff --git a/src/scala/scala-stream/src/main/scala/scalastream/ScalaStream.scala b/src/scala/scala-stream/src/main/scala/scalastream/ScalaStream.scala index 9c011a6d..888ba7c0 100644 --- a/src/scala/scala-stream/src/main/scala/scalastream/ScalaStream.scala +++ b/src/scala/scala-stream/src/main/scala/scalastream/ScalaStream.scala @@ -14,6 +14,7 @@ transparent trait ScalaStream[@specialized(Float, Double) A]: def config: Config[A] def initArrays(): Unit + def readArrays(): Unit = () def copy(): Unit def mul(): Unit def add(): Unit @@ -27,6 +28,8 @@ transparent trait ScalaStream[@specialized(Float, Double) A]: val end = System.nanoTime() FiniteDuration(end - start, TimeUnit.NANOSECONDS) -> r + inline def runInitArrays(): FiniteDuration = timed(initArrays())._1 + inline def runReadArrays(): FiniteDuration = timed(readArrays())._1 inline def runAll(times: Int)(using Fractional[A]): (Timings[Vector[FiniteDuration]], A) = val copy = ArrayBuffer.fill[FiniteDuration](times)(Duration.Zero) val mul = ArrayBuffer.fill[FiniteDuration](times)(Duration.Zero) @@ -62,7 +65,6 @@ transparent trait ScalaStream[@specialized(Float, Double) A]: def data(): Data[A] - trait Fractional[@specialized(Double, Float) A]: def toFractional(f: Float): A def toFractional(f: Double): A @@ -77,13 +79,13 @@ trait Fractional[@specialized(Double, Float) A]: extension (x: Int) inline def fractional = toFractional(x.toFloat) extension (x: Long) inline def fractional = toFractional(x.toDouble) extension (x: A) - inline def +(y: A) = add(x, y) - inline def -(y: A) = sub(x, y) - inline def *(y: A) = mul(x, y) - inline def /(y: A) = div(x, y) - inline def >(y: A) = compare(x, y) > 0 - inline def <(y: A) = compare(x, y) < 0 - inline def abs_ = abs(x) + inline def +(y: A) = add(x, y) + inline def -(y: A) = sub(x, y) + inline def *(y: A) = mul(x, y) + inline def /(y: A) = div(x, y) + inline def >(y: A) = compare(x, y) > 0 + inline def <(y: A) = compare(x, y) < 0 + inline def abs_ = abs(x) end Fractional given FloatFractional: Fractional[Float] with @@ -204,7 +206,7 @@ object App: validateXs("c", vec.c, goldC) dotSum.foreach { sum => - val goldSum = (goldA * goldB) * (config.options.arraysize).fractional + val goldSum = (goldA * goldB) * config.options.arraysize.fractional val error = ((sum - goldSum) / goldSum).abs_ if error > 1.fractional / 100000000.fractional then Console.err.println( @@ -238,10 +240,10 @@ object App: ) println(s"Running ${config.benchmark match { - case Benchmark.All => "kernels" - case Benchmark.Triad => "triad" - case Benchmark.NStream => "nstream" - }} ${opt.numtimes} times") + case Benchmark.All => "kernels" + case Benchmark.Triad => "triad" + case Benchmark.NStream => "nstream" + }} ${opt.numtimes} times") if config.benchmark == Benchmark.Triad then println(s"Number of elements: ${opt.arraysize}") @@ -288,11 +290,38 @@ object App: println(header.map(_._1.padTo(padding, ' ')).mkString(sep)) println(rows.map(_.map(_._2.padTo(padding, ' ')).mkString(sep)).mkString("\n")) + def showInit(init: FiniteDuration, read: FiniteDuration): Unit = { + val setup = + Vector(("Init", init.seconds, 3 * arrayBytes), ("Read", read.seconds, 3 * arrayBytes)) + if opt.csv then + tabulate( + setup.map((name, elapsed, totalBytes) => + Vector( + "phase" -> name, + "n_elements" -> opt.arraysize.toString, + "sizeof" -> arrayBytes.toString, + s"max_m${if opt.mibibytes then "i" else ""}bytes_per_sec" -> + (megaScale * totalBytes.toDouble / elapsed).toString, + "runtime" -> elapsed.toString + ) + ): _* + ) + else + for (name, elapsed, totalBytes) <- setup do + println( + f"$name: $elapsed%.5f s (=${megaScale * totalBytes.toDouble / elapsed}%.5f M${ + if opt.mibibytes then "i" else "" + }Bytes/sec)" + ) + } + val stream = mkStream(config) - stream.initArrays() + val init = stream.runInitArrays() config.benchmark match case Benchmark.All => val (results, sum) = stream.runAll(opt.numtimes) + val read = stream.runReadArrays() + showInit(init, read) validate(stream.data(), config, Some(sum)) tabulate( mkRow(results.copy, "Copy", 2 * arrayBytes), @@ -303,10 +332,14 @@ object App: ) case Benchmark.NStream => val result = stream.runNStream(opt.numtimes) + val read = stream.runReadArrays() + showInit(init, read) validate(stream.data(), config) tabulate(mkRow(result, "Nstream", 4 * arrayBytes)) case Benchmark.Triad => - val results = stream.runTriad(opt.numtimes) + val results = stream.runTriad(opt.numtimes) + val read = stream.runReadArrays() + showInit(init, read) val totalBytes = 3 * arrayBytes * opt.numtimes val bandwidth = megaScale * (totalBytes / results.seconds) println(f"Runtime (seconds): ${results.seconds}%.5f") From 3de019c156a803a6b5abd1b5865828a80965087c Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Sat, 7 Oct 2023 13:50:58 +0100 Subject: [PATCH 4/6] Add init/read timing for Java Upgrade to TornadoVM 0.15 API --- src/java/java-stream/pom.xml | 8 +- .../src/main/java/javastream/JavaStream.java | 14 ++- .../src/main/java/javastream/Main.java | 93 ++++++++++++++----- .../javastream/aparapi/AparapiStreams.java | 2 +- .../javastream/jdk/GenericPlainStream.java | 2 +- .../java/javastream/jdk/GenericStream.java | 2 +- .../jdk/SpecialisedDoubleStream.java | 2 +- .../jdk/SpecialisedFloatStream.java | 2 +- .../jdk/SpecialisedPlainDoubleStream.java | 2 +- .../jdk/SpecialisedPlainFloatStream.java | 2 +- .../tornadovm/GenericTornadoVMStream.java | 34 +++---- .../tornadovm/SpecialisedDouble.java | 52 +++++++++-- .../tornadovm/SpecialisedFloat.java | 52 +++++++++-- .../tornadovm/TornadoVMStreams.java | 26 ++++-- 14 files changed, 210 insertions(+), 83 deletions(-) diff --git a/src/java/java-stream/pom.xml b/src/java/java-stream/pom.xml index d28a3d5f..78d26b31 100644 --- a/src/java/java-stream/pom.xml +++ b/src/java/java-stream/pom.xml @@ -12,7 +12,7 @@ UTF-8 UTF-8 - 5.7.2 + 5.9.2 @@ -27,19 +27,19 @@ com.beust jcommander - 1.81 + 1.82 tornado tornado-api - 0.9 + 0.15.1 com.aparapi aparapi - 2.0.0 + 3.0.0 diff --git a/src/java/java-stream/src/main/java/javastream/JavaStream.java b/src/java/java-stream/src/main/java/javastream/JavaStream.java index 7ab96cb5..4fdb229b 100644 --- a/src/java/java-stream/src/main/java/javastream/JavaStream.java +++ b/src/java/java-stream/src/main/java/javastream/JavaStream.java @@ -56,7 +56,7 @@ protected JavaStream(Config config) { protected abstract T dot(); - protected abstract Data data(); + protected abstract Data readArrays(); public static class EnumeratedStream extends JavaStream { @@ -113,8 +113,8 @@ public T dot() { } @Override - public Data data() { - return actual.data(); + public Data readArrays() { + return actual.readArrays(); } } @@ -140,6 +140,14 @@ private static Duration timed(Runnable f) { return Duration.ofNanos(end - start); } + final Duration runInitArrays() { + return timed(this::initArrays); + } + + final SimpleImmutableEntry> runReadArrays() { + return timed(this::readArrays); + } + final SimpleImmutableEntry, T> runAll(int times) { Timings timings = new Timings<>(); T lastSum = null; diff --git a/src/java/java-stream/src/main/java/javastream/Main.java b/src/java/java-stream/src/main/java/javastream/Main.java index 24421281..3732a242 100644 --- a/src/java/java-stream/src/main/java/javastream/Main.java +++ b/src/java/java-stream/src/main/java/javastream/Main.java @@ -128,6 +128,40 @@ static final class Implementation { } } + @SuppressWarnings("unchecked") + static void showInit( + int totalBytes, double megaScale, Options opt, Duration init, Duration read) { + List> setup = + Arrays.asList( + new SimpleImmutableEntry<>("Init", durationToSeconds(init)), + new SimpleImmutableEntry<>("Read", durationToSeconds(read))); + if (opt.csv) { + tabulateCsv( + true, + setup.stream() + .map( + x -> + Arrays.asList( + new SimpleImmutableEntry<>("function", x.getKey()), + new SimpleImmutableEntry<>("n_elements", opt.arraysize + ""), + new SimpleImmutableEntry<>("sizeof", totalBytes + ""), + new SimpleImmutableEntry<>( + "max_m" + (opt.mibibytes ? "i" : "") + "bytes_per_sec", + ((megaScale * (double) totalBytes / x.getValue())) + ""), + new SimpleImmutableEntry<>("runtime", x.getValue() + ""))) + .toArray(List[]::new)); + } else { + for (Entry e : setup) { + System.out.printf( + "%s: %.5f s (%.5f M%sBytes/sec)%n", + e.getKey(), + e.getValue(), + megaScale * (double) totalBytes / e.getValue(), + opt.mibibytes ? "i" : ""); + } + } + } + static boolean run( String name, Config config, Function, JavaStream> mkStream) { @@ -183,35 +217,46 @@ static boolean run( JavaStream stream = mkStream.apply(config); - stream.initArrays(); - + Duration init = stream.runInitArrays(); final boolean ok; switch (config.benchmark) { case ALL: - Entry, T> results = stream.runAll(opt.numtimes); - ok = checkSolutions(stream.data(), config, Optional.of(results.getValue())); - Timings timings = results.getKey(); - tabulateCsv( - opt.csv, - mkCsvRow(timings.copy, "Copy", 2 * arrayBytes, megaScale, opt), - mkCsvRow(timings.mul, "Mul", 2 * arrayBytes, megaScale, opt), - mkCsvRow(timings.add, "Add", 3 * arrayBytes, megaScale, opt), - mkCsvRow(timings.triad, "Triad", 3 * arrayBytes, megaScale, opt), - mkCsvRow(timings.dot, "Dot", 2 * arrayBytes, megaScale, opt)); - break; + { + Entry, T> results = stream.runAll(opt.numtimes); + SimpleImmutableEntry> read = stream.runReadArrays(); + showInit(totalBytes, megaScale, opt, init, read.getKey()); + ok = checkSolutions(read.getValue(), config, Optional.of(results.getValue())); + Timings timings = results.getKey(); + tabulateCsv( + opt.csv, + mkCsvRow(timings.copy, "Copy", 2 * arrayBytes, megaScale, opt), + mkCsvRow(timings.mul, "Mul", 2 * arrayBytes, megaScale, opt), + mkCsvRow(timings.add, "Add", 3 * arrayBytes, megaScale, opt), + mkCsvRow(timings.triad, "Triad", 3 * arrayBytes, megaScale, opt), + mkCsvRow(timings.dot, "Dot", 2 * arrayBytes, megaScale, opt)); + break; + } case NSTREAM: - List nstreamResults = stream.runNStream(opt.numtimes); - ok = checkSolutions(stream.data(), config, Optional.empty()); - tabulateCsv(opt.csv, mkCsvRow(nstreamResults, "Nstream", 4 * arrayBytes, megaScale, opt)); - break; + { + List nstreamResults = stream.runNStream(opt.numtimes); + SimpleImmutableEntry> read = stream.runReadArrays(); + showInit(totalBytes, megaScale, opt, init, read.getKey()); + ok = checkSolutions(read.getValue(), config, Optional.empty()); + tabulateCsv(opt.csv, mkCsvRow(nstreamResults, "Nstream", 4 * arrayBytes, megaScale, opt)); + break; + } case TRIAD: - Duration triadResult = stream.runTriad(opt.numtimes); - ok = checkSolutions(stream.data(), config, Optional.empty()); - int triadTotalBytes = 3 * arrayBytes * opt.numtimes; - double bandwidth = megaScale * (triadTotalBytes / durationToSeconds(triadResult)); - System.out.printf("Runtime (seconds): %.5f", durationToSeconds(triadResult)); - System.out.printf("Bandwidth (%s/s): %.3f ", gigaSuffix, bandwidth); - break; + { + Duration triadResult = stream.runTriad(opt.numtimes); + SimpleImmutableEntry> read = stream.runReadArrays(); + showInit(totalBytes, megaScale, opt, init, read.getKey()); + ok = checkSolutions(read.getValue(), config, Optional.empty()); + int triadTotalBytes = 3 * arrayBytes * opt.numtimes; + double bandwidth = megaScale * (triadTotalBytes / durationToSeconds(triadResult)); + System.out.printf("Runtime (seconds): %.5f", durationToSeconds(triadResult)); + System.out.printf("Bandwidth (%s/s): %.3f ", gigaSuffix, bandwidth); + break; + } default: throw new AssertionError(); } diff --git a/src/java/java-stream/src/main/java/javastream/aparapi/AparapiStreams.java b/src/java/java-stream/src/main/java/javastream/aparapi/AparapiStreams.java index ab2de528..052c807d 100644 --- a/src/java/java-stream/src/main/java/javastream/aparapi/AparapiStreams.java +++ b/src/java/java-stream/src/main/java/javastream/aparapi/AparapiStreams.java @@ -122,7 +122,7 @@ public T dot() { } @Override - public Data data() { + public Data readArrays() { return kernels.syncAndDispose(); } } diff --git a/src/java/java-stream/src/main/java/javastream/jdk/GenericPlainStream.java b/src/java/java-stream/src/main/java/javastream/jdk/GenericPlainStream.java index 7f210fa8..8075603c 100644 --- a/src/java/java-stream/src/main/java/javastream/jdk/GenericPlainStream.java +++ b/src/java/java-stream/src/main/java/javastream/jdk/GenericPlainStream.java @@ -86,7 +86,7 @@ public T dot() { } @Override - public Data data() { + public Data readArrays() { return new Data<>(a, b, c); } } diff --git a/src/java/java-stream/src/main/java/javastream/jdk/GenericStream.java b/src/java/java-stream/src/main/java/javastream/jdk/GenericStream.java index 1e65b8f9..3cacf3ac 100644 --- a/src/java/java-stream/src/main/java/javastream/jdk/GenericStream.java +++ b/src/java/java-stream/src/main/java/javastream/jdk/GenericStream.java @@ -80,7 +80,7 @@ public T dot() { } @Override - public Data data() { + public Data readArrays() { return new Data<>(a, b, c); } } diff --git a/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedDoubleStream.java b/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedDoubleStream.java index 26406a62..1b54bc3a 100644 --- a/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedDoubleStream.java +++ b/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedDoubleStream.java @@ -78,7 +78,7 @@ public Double dot() { } @Override - public Data data() { + public Data readArrays() { return new Data<>(boxed(a), boxed(b), boxed(c)); } } diff --git a/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedFloatStream.java b/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedFloatStream.java index 6c414c16..4d8c137a 100644 --- a/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedFloatStream.java +++ b/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedFloatStream.java @@ -78,7 +78,7 @@ public Float dot() { } @Override - public Data data() { + public Data readArrays() { return new Data<>(boxed(a), boxed(b), boxed(c)); } } diff --git a/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedPlainDoubleStream.java b/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedPlainDoubleStream.java index afda2ef8..c4f38d0e 100644 --- a/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedPlainDoubleStream.java +++ b/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedPlainDoubleStream.java @@ -78,7 +78,7 @@ public Double dot() { } @Override - public Data data() { + public Data readArrays() { return new Data<>(boxed(a), boxed(b), boxed(c)); } } diff --git a/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedPlainFloatStream.java b/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedPlainFloatStream.java index 9ccee53e..5178ed27 100644 --- a/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedPlainFloatStream.java +++ b/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedPlainFloatStream.java @@ -78,7 +78,7 @@ public Float dot() { } @Override - public Data data() { + public Data readArrays() { return new Data<>(boxed(a), boxed(b), boxed(c)); } } diff --git a/src/java/java-stream/src/main/java/javastream/tornadovm/GenericTornadoVMStream.java b/src/java/java-stream/src/main/java/javastream/tornadovm/GenericTornadoVMStream.java index d936df60..a65c32ab 100644 --- a/src/java/java-stream/src/main/java/javastream/tornadovm/GenericTornadoVMStream.java +++ b/src/java/java-stream/src/main/java/javastream/tornadovm/GenericTornadoVMStream.java @@ -4,8 +4,8 @@ import java.util.stream.Collectors; import javastream.JavaStream; import javastream.Main.Config; -import uk.ac.manchester.tornado.api.TaskSchedule; -import uk.ac.manchester.tornado.api.TornadoRuntimeCI; +import uk.ac.manchester.tornado.api.TornadoExecutionPlan; +import uk.ac.manchester.tornado.api.TornadoRuntimeInterface; import uk.ac.manchester.tornado.api.common.TornadoDevice; import uk.ac.manchester.tornado.api.runtime.TornadoRuntime; @@ -13,18 +13,18 @@ abstract class GenericTornadoVMStream extends JavaStream { protected final TornadoDevice device; - protected TaskSchedule copyTask; - protected TaskSchedule mulTask; - protected TaskSchedule addTask; - protected TaskSchedule triadTask; - protected TaskSchedule nstreamTask; - protected TaskSchedule dotTask; + protected TornadoExecutionPlan copyTask; + protected TornadoExecutionPlan mulTask; + protected TornadoExecutionPlan addTask; + protected TornadoExecutionPlan triadTask; + protected TornadoExecutionPlan nstreamTask; + protected TornadoExecutionPlan dotTask; GenericTornadoVMStream(Config config) { super(config); try { - TornadoRuntimeCI runtime = TornadoRuntime.getTornadoRuntime(); + TornadoRuntimeInterface runtime = TornadoRuntime.getTornadoRuntime(); List devices = TornadoVMStreams.enumerateDevices(runtime); device = devices.get(config.options.device); @@ -42,10 +42,6 @@ abstract class GenericTornadoVMStream extends JavaStream { } } - protected static TaskSchedule mkSchedule() { - return new TaskSchedule(""); - } - @Override public List listDevices() { return TornadoVMStreams.enumerateDevices(TornadoRuntime.getTornadoRuntime()).stream() @@ -55,12 +51,12 @@ public List listDevices() { @Override public void initArrays() { - this.copyTask.warmup(); - this.mulTask.warmup(); - this.addTask.warmup(); - this.triadTask.warmup(); - this.nstreamTask.warmup(); - this.dotTask.warmup(); + this.copyTask.withWarmUp(); + this.mulTask.withWarmUp(); + this.addTask.withWarmUp(); + this.triadTask.withWarmUp(); + this.nstreamTask.withWarmUp(); + this.dotTask.withWarmUp(); } @Override diff --git a/src/java/java-stream/src/main/java/javastream/tornadovm/SpecialisedDouble.java b/src/java/java-stream/src/main/java/javastream/tornadovm/SpecialisedDouble.java index 7712e317..c10153e3 100644 --- a/src/java/java-stream/src/main/java/javastream/tornadovm/SpecialisedDouble.java +++ b/src/java/java-stream/src/main/java/javastream/tornadovm/SpecialisedDouble.java @@ -2,8 +2,11 @@ import java.util.Arrays; import javastream.Main.Config; +import uk.ac.manchester.tornado.api.TaskGraph; +import uk.ac.manchester.tornado.api.TornadoExecutionPlan; import uk.ac.manchester.tornado.api.annotations.Parallel; import uk.ac.manchester.tornado.api.annotations.Reduce; +import uk.ac.manchester.tornado.api.enums.DataTransferMode; final class SpecialisedDouble extends GenericTornadoVMStream { @@ -49,7 +52,7 @@ private static void dot_( private final double[] a, b, c; private final double[] dotSum; - @SuppressWarnings({"PrimitiveArrayArgumentToVarargsMethod", "DuplicatedCode"}) + @SuppressWarnings({"DuplicatedCode"}) SpecialisedDouble(Config config) { super(config); final int size = config.options.arraysize; @@ -58,12 +61,43 @@ private static void dot_( b = new double[size]; c = new double[size]; dotSum = new double[1]; - this.copyTask = mkSchedule().task("", SpecialisedDouble::copy, size, a, c); - this.mulTask = mkSchedule().task("", SpecialisedDouble::mul, size, b, c, scalar); - this.addTask = mkSchedule().task("", SpecialisedDouble::add, size, a, b, c); - this.triadTask = mkSchedule().task("", SpecialisedDouble::triad, size, a, b, c, scalar); - this.nstreamTask = mkSchedule().task("", SpecialisedDouble::nstream, size, a, b, c, scalar); - this.dotTask = mkSchedule().task("", SpecialisedDouble::dot_, a, b, dotSum).streamOut(dotSum); + this.copyTask = + new TornadoExecutionPlan( + new TaskGraph("copy") + .task("copy", SpecialisedDouble::copy, size, a, c) + .transferToDevice(DataTransferMode.FIRST_EXECUTION, a, c) + .snapshot()); + this.mulTask = + new TornadoExecutionPlan( + new TaskGraph("mul") + .task("mul", SpecialisedDouble::mul, size, b, c, scalar) + .transferToDevice(DataTransferMode.FIRST_EXECUTION, b, c) + .snapshot()); + this.addTask = + new TornadoExecutionPlan( + new TaskGraph("add") + .task("add", SpecialisedDouble::add, size, a, b, c) + .transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b, c) + .snapshot()); + this.triadTask = + new TornadoExecutionPlan( + new TaskGraph("triad") + .task("triad", SpecialisedDouble::triad, size, a, b, c, scalar) + .transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b, c) + .snapshot()); + this.nstreamTask = + new TornadoExecutionPlan( + new TaskGraph("nstream") + .task("nstream", SpecialisedDouble::nstream, size, a, b, c, scalar) + .transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b, c) + .snapshot()); + this.dotTask = + new TornadoExecutionPlan( + new TaskGraph("dot") + .task("dot", SpecialisedDouble::dot_, a, b, dotSum) + .transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b) + .transferToHost(DataTransferMode.EVERY_EXECUTION, new Object[] {dotSum}) + .snapshot()); } @Override @@ -72,7 +106,7 @@ public void initArrays() { Arrays.fill(a, config.initA); Arrays.fill(b, config.initB); Arrays.fill(c, config.initC); - TornadoVMStreams.xferToDevice(device, a, b, c); + TornadoVMStreams.allocAndXferToDevice(device, a, b, c); } @Override @@ -81,7 +115,7 @@ protected Double getSum() { } @Override - public Data data() { + public Data readArrays() { TornadoVMStreams.xferFromDevice(device, a, b, c); return new Data<>(boxed(a), boxed(b), boxed(c)); } diff --git a/src/java/java-stream/src/main/java/javastream/tornadovm/SpecialisedFloat.java b/src/java/java-stream/src/main/java/javastream/tornadovm/SpecialisedFloat.java index e61cfe9e..0f3fffa7 100644 --- a/src/java/java-stream/src/main/java/javastream/tornadovm/SpecialisedFloat.java +++ b/src/java/java-stream/src/main/java/javastream/tornadovm/SpecialisedFloat.java @@ -2,8 +2,11 @@ import java.util.Arrays; import javastream.Main.Config; +import uk.ac.manchester.tornado.api.TaskGraph; +import uk.ac.manchester.tornado.api.TornadoExecutionPlan; import uk.ac.manchester.tornado.api.annotations.Parallel; import uk.ac.manchester.tornado.api.annotations.Reduce; +import uk.ac.manchester.tornado.api.enums.DataTransferMode; final class SpecialisedFloat extends GenericTornadoVMStream { @@ -49,7 +52,7 @@ private static void dot_( private final float[] a, b, c; private final float[] dotSum; - @SuppressWarnings({"PrimitiveArrayArgumentToVarargsMethod", "DuplicatedCode"}) + @SuppressWarnings({"DuplicatedCode"}) SpecialisedFloat(Config config) { super(config); final int size = config.options.arraysize; @@ -58,12 +61,43 @@ private static void dot_( b = new float[size]; c = new float[size]; dotSum = new float[1]; - this.copyTask = mkSchedule().task("", SpecialisedFloat::copy, size, a, c); - this.mulTask = mkSchedule().task("", SpecialisedFloat::mul, size, b, c, scalar); - this.addTask = mkSchedule().task("", SpecialisedFloat::add, size, a, b, c); - this.triadTask = mkSchedule().task("", SpecialisedFloat::triad, size, a, b, c, scalar); - this.nstreamTask = mkSchedule().task("", SpecialisedFloat::nstream, size, a, b, c, scalar); - this.dotTask = mkSchedule().task("", SpecialisedFloat::dot_, a, b, dotSum).streamOut(dotSum); + this.copyTask = + new TornadoExecutionPlan( + new TaskGraph("copy") + .task("copy", SpecialisedFloat::copy, size, a, c) + .transferToDevice(DataTransferMode.FIRST_EXECUTION, a, c) + .snapshot()); + this.mulTask = + new TornadoExecutionPlan( + new TaskGraph("mul") + .task("mul", SpecialisedFloat::mul, size, b, c, scalar) + .transferToDevice(DataTransferMode.FIRST_EXECUTION, b, c) + .snapshot()); + this.addTask = + new TornadoExecutionPlan( + new TaskGraph("add") + .task("add", SpecialisedFloat::add, size, a, b, c) + .transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b, c) + .snapshot()); + this.triadTask = + new TornadoExecutionPlan( + new TaskGraph("triad") + .task("triad", SpecialisedFloat::triad, size, a, b, c, scalar) + .transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b, c) + .snapshot()); + this.nstreamTask = + new TornadoExecutionPlan( + new TaskGraph("nstream") + .task("nstream", SpecialisedFloat::nstream, size, a, b, c, scalar) + .transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b, c) + .snapshot()); + this.dotTask = + new TornadoExecutionPlan( + new TaskGraph("dot") + .task("dot", SpecialisedFloat::dot_, a, b, dotSum) + .transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b) + .transferToHost(DataTransferMode.EVERY_EXECUTION, new Object[] {dotSum}) + .snapshot()); } @Override @@ -72,7 +106,7 @@ public void initArrays() { Arrays.fill(a, config.initA); Arrays.fill(b, config.initB); Arrays.fill(c, config.initC); - TornadoVMStreams.xferToDevice(device, a, b, c); + TornadoVMStreams.allocAndXferToDevice(device, a, b, c); } @Override @@ -81,7 +115,7 @@ protected Float getSum() { } @Override - public Data data() { + public Data readArrays() { TornadoVMStreams.xferFromDevice(device, a, b, c); return new Data<>(boxed(a), boxed(b), boxed(c)); } diff --git a/src/java/java-stream/src/main/java/javastream/tornadovm/TornadoVMStreams.java b/src/java/java-stream/src/main/java/javastream/tornadovm/TornadoVMStreams.java index 68eecadc..a43c7c8d 100644 --- a/src/java/java-stream/src/main/java/javastream/tornadovm/TornadoVMStreams.java +++ b/src/java/java-stream/src/main/java/javastream/tornadovm/TornadoVMStreams.java @@ -1,36 +1,46 @@ package javastream.tornadovm; +import java.util.Arrays; import java.util.List; import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.IntStream; import javastream.JavaStream; import javastream.Main.Config; -import uk.ac.manchester.tornado.api.TornadoRuntimeCI; +import uk.ac.manchester.tornado.api.TornadoRuntimeInterface; +import uk.ac.manchester.tornado.api.common.Event; import uk.ac.manchester.tornado.api.common.TornadoDevice; -import uk.ac.manchester.tornado.api.mm.TornadoGlobalObjectState; +import uk.ac.manchester.tornado.api.memory.TornadoDeviceObjectState; +import uk.ac.manchester.tornado.api.memory.TornadoGlobalObjectState; import uk.ac.manchester.tornado.api.runtime.TornadoRuntime; public final class TornadoVMStreams { private TornadoVMStreams() {} - static void xferToDevice(TornadoDevice device, Object... xs) { + static void allocAndXferToDevice(TornadoDevice device, Object... xs) { for (Object x : xs) { TornadoGlobalObjectState state = TornadoRuntime.getTornadoRuntime().resolveObject(x); + device.allocateObjects( + new Object[] {x}, 0, new TornadoDeviceObjectState[] {state.getDeviceState(device)}); List writeEvent = device.ensurePresent(x, state.getDeviceState(device), null, 0, 0); if (writeEvent != null) writeEvent.forEach(e -> device.resolveEvent(e).waitOn()); } } static void xferFromDevice(TornadoDevice device, Object... xs) { - for (Object x : xs) { - TornadoGlobalObjectState state = TornadoRuntime.getTornadoRuntime().resolveObject(x); - device.resolveEvent(device.streamOut(x, 0, state.getDeviceState(device), null)).waitOn(); - } + Arrays.stream(xs) + .map( + x -> { + TornadoGlobalObjectState state = TornadoRuntime.getTornadoRuntime().resolveObject(x); + return device.resolveEvent( + device.streamOut(x, 0, state.getDeviceState(device), null)); + }) + .collect(Collectors.toList()) + .forEach(Event::waitOn); } - static List enumerateDevices(TornadoRuntimeCI runtime) { + static List enumerateDevices(TornadoRuntimeInterface runtime) { return IntStream.range(0, runtime.getNumDrivers()) .mapToObj(runtime::getDriver) .flatMap(d -> IntStream.range(0, d.getDeviceCount()).mapToObj(d::getDevice)) From e7774c13728844257594f19649e449f0dee779d4 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Sat, 7 Oct 2023 13:58:34 +0100 Subject: [PATCH 5/6] Update changelog for timing and version bump updates --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 605d3273..deba9842 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ All notable changes to this project will be documented in this file. - Thrust managed memory. - HIP managed memory. - New implementation using SYCL2020 USM (sycl2020-acc) and renamed original `sycl2020` to `sycl2020-acc`. +- Data initialisation and read-back timing for all models, including Java, Scala, Julia, and Rust +- Add support for the latest Aparapi (3.0.0) and TornadoVM (0.15.x) for Java ### Changed - RAJA CUDA CMake build issues resolved. @@ -17,6 +19,7 @@ All notable changes to this project will be documented in this file. - Number of thread-blocks in CUDA dot kernel implementation changed to 1024. - Fix compatibility of `sycl2020` (now `sycl2020-acc`) with hipSYCL. - Bumped Julia compat to 1.9 +- Bumped Scala to 3.3.1 - Bumped Rust to 1.74.0-nightly (13e6f24b9 2023-09-23) From 3cb01e76a80fb5162a39e27e70b1bdbbce5591a4 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Sat, 7 Oct 2023 14:59:26 +0100 Subject: [PATCH 6/6] Add init/read timing for Julia --- src/julia/JuliaStream.jl/src/Stream.jl | 69 +++++++++++++++++++------- 1 file changed, 50 insertions(+), 19 deletions(-) diff --git a/src/julia/JuliaStream.jl/src/Stream.jl b/src/julia/JuliaStream.jl/src/Stream.jl index 42030f86..226d44b7 100644 --- a/src/julia/JuliaStream.jl/src/Stream.jl +++ b/src/julia/JuliaStream.jl/src/Stream.jl @@ -20,6 +20,18 @@ end @enum Benchmark All Triad Nstream + +function run_init_arrays!(data::StreamData{T,C}, context, init::Tuple{T,T,T})::Float64 where {T,C} + return @elapsed init_arrays!(data, context, init) +end + +function run_read_data(data::StreamData{T,C}, context)::Tuple{Float64,VectorData{T}} where {T,C} + elapsed = @elapsed begin + result = read_data(data, context) + end + return (elapsed, result) +end + function run_all!(data::StreamData{T,C}, context, times::Int)::Tuple{Timings,T} where {T,C} timings = Timings(times) lastSum::T = 0 @@ -39,11 +51,7 @@ function run_triad!(data::StreamData{T,C}, context, times::Int)::Float64 where { end end -function run_nstream!( - data::StreamData{T,C}, - context, - times::Int, -)::Vector{Float64} where {T,C} +function run_nstream!(data::StreamData{T,C}, context, times::Int)::Vector{Float64} where {T,C} timings::Vector{Float64} = zeros(times) for i = 1:times @inbounds timings[i] = @elapsed nstream!(data, context) @@ -93,9 +101,7 @@ function check_solutions( error = abs((dot - gold_sum) / gold_sum) failed = error > 1.0e-8 if failed - println( - "Validation failed on sum. Error $error \nSum was $dot but should be $gold_sum", - ) + println("Validation failed on sum. Error $error \nSum was $dot but should be $gold_sum") end !failed end : true @@ -166,7 +172,7 @@ function main() parse_options(config) if config.list - for (i, (_,repr, impl)) in enumerate(devices()) + for (i, (_, repr, impl)) in enumerate(devices()) println("[$i] ($impl) $repr") end exit(0) @@ -175,9 +181,7 @@ function main() ds = devices() # TODO implement substring device match if config.device < 1 || config.device > length(ds) - error( - "Device $(config.device) out of range (1..$(length(ds))), NOTE: Julia is 1-indexed", - ) + error("Device $(config.device) out of range (1..$(length(ds))), NOTE: Julia is 1-indexed") else device = ds[config.device] end @@ -257,16 +261,42 @@ function main() end end + function show_init(init::Float64, read::Float64) + setup = [("Init", init, 3 * array_bytes), ("Read", read, 3 * array_bytes)] + if config.csv + tabulate( + map( + x -> [ + ("phase", x[1]), + ("n_elements", config.arraysize), + ("sizeof", x[3]), + ("max_m$(config.mibibytes ? "i" : "")bytes_per_sec", mega_scale * total_bytes / x[2]), + ("runtime", x[2]), + ], + setup, + )..., + ) + else + for (name, elapsed, total_bytes) in setup + println( + "$name: $(round(elapsed; digits=5)) s (=$(round(( mega_scale * total_bytes) / elapsed; digits = 5)) M$(config.mibibytes ? "i" : "")Bytes/sec)", + ) + end + end + end + init::Tuple{type,type,type} = DefaultInit scalar::type = DefaultScalar GC.enable(false) (data, context) = make_stream(config.arraysize, scalar, device, config.csv) - init_arrays!(data, context, init) + tInit = run_init_arrays!(data, context, init) if benchmark == All (timings, sum) = run_all!(data, context, config.numtimes) - valid = check_solutions(read_data(data, context), config.numtimes, init, benchmark, sum) + (tRead, result) = run_read_data(data, context) + show_init(tInit, tRead) + valid = check_solutions(result, config.numtimes, init, benchmark, sum) tabulate( mk_row(timings.copy, "Copy", 2 * array_bytes), mk_row(timings.mul, "Mul", 2 * array_bytes), @@ -276,13 +306,15 @@ function main() ) elseif benchmark == Nstream timings = run_nstream!(data, context, config.numtimes) - valid = - check_solutions(read_data(data, context), config.numtimes, init, benchmark, nothing) + (tRead, result) = run_read_data(data, context) + show_init(tInit, tRead) + valid = check_solutions(result, config.numtimes, init, benchmark, nothing) tabulate(mk_row(timings, "Nstream", 4 * array_bytes)) elseif benchmark == Triad elapsed = run_triad!(data, context, config.numtimes) - valid = - check_solutions(read_data(data, context), config.numtimes, init, benchmark, nothing) + (tRead, result) = run_read_data(data, context) + show_init(tInit, tRead) + valid = check_solutions(result, config.numtimes, init, benchmark, nothing) total_bytes = 3 * array_bytes * config.numtimes bandwidth = mega_scale * (total_bytes / elapsed) println("Runtime (seconds): $(round(elapsed; digits=5))") @@ -290,7 +322,6 @@ function main() else error("Bad benchmark $(benchmark)") end - GC.enable(true) if !valid