From 44b244cc1d125da86a96498e4d36baa265805b82 Mon Sep 17 00:00:00 2001
From: Femi <47154698+oojo12@users.noreply.github.com>
Date: Thu, 10 Nov 2022 23:16:46 -0500
Subject: [PATCH] Add Pprof Profiling to Existing Benchmarks (#273)

* add pprof profiling

* add missing config

* target spec dev-dep

* cfg targets

* add missing criterion_main!

* add space for readability

* update Benchmarking Section

* add profiling to all existing benchmarks
---
 CONTRIBUTE.md                                 | 29 +++++++++++++++++++
 algorithms/linfa-clustering/Cargo.toml        |  3 ++
 .../linfa-clustering/benches/appx_dbscan.rs   |  8 ++++-
 algorithms/linfa-clustering/benches/dbscan.rs |  8 ++++-
 .../benches/gaussian_mixture.rs               |  8 ++++-
 .../linfa-clustering/benches/k_means.rs       | 13 ++++++++-
 algorithms/linfa-ftrl/Cargo.toml              |  3 ++
 algorithms/linfa-ftrl/benches/ftrl.rs         | 13 ++++++++-
 algorithms/linfa-ica/Cargo.toml               |  3 ++
 algorithms/linfa-ica/benches/fast_ica.rs      | 12 +++++++-
 algorithms/linfa-linear/Cargo.toml            |  3 ++
 algorithms/linfa-linear/benches/ols_bench.rs  | 10 +++++++
 algorithms/linfa-nn/Cargo.toml                |  3 ++
 algorithms/linfa-nn/benches/nn.rs             |  8 ++++-
 algorithms/linfa-pls/Cargo.toml               |  3 ++
 algorithms/linfa-pls/benches/pls.rs           | 10 +++++++
 algorithms/linfa-trees/Cargo.toml             |  4 ++-
 .../linfa-trees/benches/decision_tree.rs      | 10 +++++++
 18 files changed, 143 insertions(+), 8 deletions(-)

diff --git a/CONTRIBUTE.md b/CONTRIBUTE.md
index eaefdbd9e..823794fa2 100644
--- a/CONTRIBUTE.md
+++ b/CONTRIBUTE.md
@@ -156,6 +156,7 @@ let sol = decomp
 
 ## Benchmarking
 
+### Building Benchmarks
 It is important to the project that we have benchmarks in place to evaluate the benefit of performance related changes. To make that process easier we provide some guidelines for writing benchmarks.
 
 1. Test for a variety of sample sizes for most algorithms [1_000, 10_000, 20_000] will be sufficient. For algorithms where it's not too slow, use 100k instead of 20k.
@@ -169,3 +170,31 @@ It is important to the project that we have benchmarks in place to evaluate the
 6. When benchmarking multi-target the target count should be within the following range: [2, 4].
 7. In `BenchmarkId` include the values used to parametrize the benchmark. For example if we're doing Pls then we may have something like `Canonical-Nipals-5feats-1_000samples`
 8. Pass data as an argument to the function being benched. This will prevent Criterion from including data creation time as part of the benchmark.
+9. Add a profiler see [here](https://github.com/tikv/pprof-rs#integrate-with-criterion) for an example on how to do so with pprof, Criterion, and Flamegraph.
+
+### Running Benchmarks
+When running benchmarks sometimes you will want to profile the code execution. Assuming you have followed step 9 to add a pprof profiling hook for the linfa-ica package you can run the following to get your profiling results as a flamegraph.
+
+`cargo bench -p linfa-ica --bench fast_ica -q -- --profile-time 30`
+
+If you are interested in running a regular criterion bench for linfa-ica then you can run the following
+
+`cargo bench -p linfa-ica`
+
+### Reporting Benchmark Metrics
+It is important that we have a consistent methodology for reporting benchmarks below is a template that should aid reviewers.
+
+```
+### Context
+In a bullet list describe the following:
+1. Run on battery charge or while plugged in
+2. Power saving mode
+3. If the computer was idle during benchmark
+4. If the computer was overheating
+5. Hardware specs
+
+### Bench Command Run
+bench results (code format)
+
+[Attached Flamegraphs if profile runs were also done]
+```
\ No newline at end of file
diff --git a/algorithms/linfa-clustering/Cargo.toml b/algorithms/linfa-clustering/Cargo.toml
index ac1f111a1..47bfa1cfc 100644
--- a/algorithms/linfa-clustering/Cargo.toml
+++ b/algorithms/linfa-clustering/Cargo.toml
@@ -50,6 +50,9 @@ serde_json = "1"
 approx = "0.4"
 lax = "0.15.0"
 
+[target.'cfg(not(windows))'.dev-dependencies]
+pprof = { version = "0.11.0", features = ["flamegraph", "criterion"] }
+
 [[bench]]
 name = "k_means"
 harness = false
diff --git a/algorithms/linfa-clustering/benches/appx_dbscan.rs b/algorithms/linfa-clustering/benches/appx_dbscan.rs
index 0de736178..5b477c8be 100644
--- a/algorithms/linfa-clustering/benches/appx_dbscan.rs
+++ b/algorithms/linfa-clustering/benches/appx_dbscan.rs
@@ -9,6 +9,8 @@ use ndarray::Array2;
 use ndarray_rand::rand::SeedableRng;
 use ndarray_rand::rand_distr::Uniform;
 use ndarray_rand::RandomExt;
+#[cfg(not(target_os = "windows"))]
+use pprof::criterion::{Output, PProfProfiler};
 use rand_xoshiro::Xoshiro256Plus;
 
 fn appx_dbscan_bench(c: &mut Criterion) {
@@ -48,9 +50,13 @@ fn appx_dbscan_bench(c: &mut Criterion) {
     benchmark.finish();
 }
 
+#[cfg(not(target_os = "windows"))]
 criterion_group! {
     name = benches;
-    config = Criterion::default();
+    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
     targets = appx_dbscan_bench
 }
+#[cfg(target_os = "windows")]
+criterion_group!(benches, appx_dbscan_bench);
+
 criterion_main!(benches);
diff --git a/algorithms/linfa-clustering/benches/dbscan.rs b/algorithms/linfa-clustering/benches/dbscan.rs
index 7db3c28a1..ea39a93de 100644
--- a/algorithms/linfa-clustering/benches/dbscan.rs
+++ b/algorithms/linfa-clustering/benches/dbscan.rs
@@ -9,6 +9,8 @@ use ndarray::Array2;
 use ndarray_rand::rand::SeedableRng;
 use ndarray_rand::rand_distr::Uniform;
 use ndarray_rand::RandomExt;
+#[cfg(not(target_os = "windows"))]
+use pprof::criterion::{Output, PProfProfiler};
 use rand_xoshiro::Xoshiro256Plus;
 
 fn dbscan_bench(c: &mut Criterion) {
@@ -44,9 +46,13 @@ fn dbscan_bench(c: &mut Criterion) {
     benchmark.finish()
 }
 
+#[cfg(not(target_os = "windows"))]
 criterion_group! {
     name = benches;
-    config = Criterion::default();
+    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
     targets = dbscan_bench
 }
+#[cfg(target_os = "windows")]
+criterion_group!(benches, dbscan_bench);
+
 criterion_main!(benches);
diff --git a/algorithms/linfa-clustering/benches/gaussian_mixture.rs b/algorithms/linfa-clustering/benches/gaussian_mixture.rs
index ebf0d3d1d..9352b608f 100644
--- a/algorithms/linfa-clustering/benches/gaussian_mixture.rs
+++ b/algorithms/linfa-clustering/benches/gaussian_mixture.rs
@@ -10,6 +10,8 @@ use ndarray::Array2;
 use ndarray_rand::rand::SeedableRng;
 use ndarray_rand::rand_distr::Uniform;
 use ndarray_rand::RandomExt;
+#[cfg(not(target_os = "windows"))]
+use pprof::criterion::{Output, PProfProfiler};
 use rand_xoshiro::Xoshiro256Plus;
 
 fn gaussian_mixture_bench(c: &mut Criterion) {
@@ -46,9 +48,13 @@ fn gaussian_mixture_bench(c: &mut Criterion) {
     benchmark.finish();
 }
 
+#[cfg(not(target_os = "windows"))]
 criterion_group! {
   name = benches;
-  config = Criterion::default();
+  config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
   targets = gaussian_mixture_bench
 }
+#[cfg(target_os = "windows")]
+criterion_group!(benches, gaussian_mixture_bench);
+
 criterion_main!(benches);
diff --git a/algorithms/linfa-clustering/benches/k_means.rs b/algorithms/linfa-clustering/benches/k_means.rs
index e2f29ee04..0e914bd7c 100644
--- a/algorithms/linfa-clustering/benches/k_means.rs
+++ b/algorithms/linfa-clustering/benches/k_means.rs
@@ -9,6 +9,8 @@ use linfa_datasets::generate;
 use ndarray::Array2;
 use ndarray_rand::RandomExt;
 use ndarray_rand::{rand::SeedableRng, rand_distr::Uniform};
+#[cfg(not(target_os = "windows"))]
+use pprof::criterion::{Output, PProfProfiler};
 use rand_xoshiro::Xoshiro256Plus;
 
 #[derive(Default)]
@@ -155,9 +157,18 @@ fn k_means_init_bench(c: &mut Criterion) {
     }
 }
 
+#[cfg(not(target_os = "windows"))]
 criterion_group! {
     name = benches;
-    config = Criterion::default();
+    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
     targets = k_means_bench, k_means_init_bench, k_means_incr_bench
 }
+#[cfg(target_os = "windows")]
+criterion_group!(
+    benches,
+    k_means_bench,
+    k_means_init_bench,
+    k_means_incr_bench
+);
+
 criterion_main!(benches);
diff --git a/algorithms/linfa-ftrl/Cargo.toml b/algorithms/linfa-ftrl/Cargo.toml
index 9a3319f3a..27e8bb022 100644
--- a/algorithms/linfa-ftrl/Cargo.toml
+++ b/algorithms/linfa-ftrl/Cargo.toml
@@ -29,6 +29,9 @@ criterion = "0.4.0"
 approx = "0.4"
 linfa-datasets = { version = "0.6.0", path = "../../datasets", features = ["winequality"] }
 
+[target.'cfg(not(windows))'.dev-dependencies]
+pprof = { version = "0.11.0", features = ["flamegraph", "criterion"] }
+
 [[bench]]
 name = "ftrl"
 harness = false
diff --git a/algorithms/linfa-ftrl/benches/ftrl.rs b/algorithms/linfa-ftrl/benches/ftrl.rs
index ade7322c5..8da8f50a4 100644
--- a/algorithms/linfa-ftrl/benches/ftrl.rs
+++ b/algorithms/linfa-ftrl/benches/ftrl.rs
@@ -7,6 +7,8 @@ use ndarray::{Array1, Array2};
 use ndarray_rand::{
     rand::distributions::Uniform, rand::rngs::SmallRng, rand::SeedableRng, RandomExt,
 };
+#[cfg(not(target_os = "windows"))]
+use pprof::criterion::{Output, PProfProfiler};
 
 fn fit_without_prior_model(c: &mut Criterion) {
     let mut rng = SmallRng::seed_from_u64(42);
@@ -86,9 +88,18 @@ fn get_dataset(
     Dataset::new(features, target)
 }
 
+#[cfg(not(target_os = "windows"))]
 criterion_group! {
     name = benches;
-    config = Criterion::default();
+    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
     targets = fit_without_prior_model, fit_with_prior_model, predict
 }
+#[cfg(target_os = "windows")]
+criterion_group!(
+    benches,
+    fit_without_prior_model,
+    fit_with_prior_model,
+    predict
+);
+
 criterion_main!(benches);
diff --git a/algorithms/linfa-ica/Cargo.toml b/algorithms/linfa-ica/Cargo.toml
index 10f03ed19..cccb79ff7 100644
--- a/algorithms/linfa-ica/Cargo.toml
+++ b/algorithms/linfa-ica/Cargo.toml
@@ -41,6 +41,9 @@ ndarray-npy = { version = "0.8", default-features = false }
 paste = "1.0"
 criterion = "0.4.0"
 
+[target.'cfg(not(windows))'.dev-dependencies]
+pprof = { version = "0.11.0", features = ["flamegraph", "criterion"] }
+
 [[bench]]
 name = "fast_ica"
 harness = false
diff --git a/algorithms/linfa-ica/benches/fast_ica.rs b/algorithms/linfa-ica/benches/fast_ica.rs
index 0380bd363..7960484e1 100644
--- a/algorithms/linfa-ica/benches/fast_ica.rs
+++ b/algorithms/linfa-ica/benches/fast_ica.rs
@@ -4,6 +4,8 @@ use linfa_ica::fast_ica::{FastIca, GFunc};
 use ndarray::{array, concatenate};
 use ndarray::{Array, Array2, Axis};
 use ndarray_rand::{rand::SeedableRng, rand_distr::Uniform, RandomExt};
+#[cfg(not(target_os = "windows"))]
+use pprof::criterion::{Output, PProfProfiler};
 use rand_xoshiro::Xoshiro256Plus;
 
 fn perform_ica(size: usize, gfunc: GFunc) {
@@ -11,7 +13,7 @@ fn perform_ica(size: usize, gfunc: GFunc) {
 
     let ica = FastIca::params().gfunc(gfunc).random_state(10);
 
-    let ica = ica.fit(&DatasetBase::from(sources_mixed.view()));
+    ica.fit(&DatasetBase::from(sources_mixed.view())).unwrap();
 }
 
 fn create_data(nsamples: usize) -> Array2<f64> {
@@ -64,5 +66,13 @@ fn bench(c: &mut Criterion) {
     }
 }
 
+#[cfg(not(target_os = "windows"))]
+criterion_group! {
+    name = benches;
+    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
+    targets = bench
+}
+#[cfg(target_os = "windows")]
 criterion_group!(benches, bench);
+
 criterion_main!(benches);
diff --git a/algorithms/linfa-linear/Cargo.toml b/algorithms/linfa-linear/Cargo.toml
index ed35e9485..2c649648f 100644
--- a/algorithms/linfa-linear/Cargo.toml
+++ b/algorithms/linfa-linear/Cargo.toml
@@ -36,6 +36,9 @@ approx = "0.4"
 criterion = "0.4.0" 
 statrs = "0.16.0"
 
+[target.'cfg(not(windows))'.dev-dependencies]
+pprof = { version = "0.11.0", features = ["flamegraph", "criterion"] }
+
 [[bench]]
 name = "ols_bench"
 harness = false
diff --git a/algorithms/linfa-linear/benches/ols_bench.rs b/algorithms/linfa-linear/benches/ols_bench.rs
index a0adb3943..13660d290 100644
--- a/algorithms/linfa-linear/benches/ols_bench.rs
+++ b/algorithms/linfa-linear/benches/ols_bench.rs
@@ -4,6 +4,8 @@ use linfa::Dataset;
 use linfa_datasets::generate::make_dataset;
 use linfa_linear::{LinearRegression, TweedieRegressor};
 use ndarray::Ix1;
+#[cfg(not(target_os = "windows"))]
+use pprof::criterion::{Output, PProfProfiler};
 use statrs::distribution::{DiscreteUniform, Laplace};
 
 #[allow(unused_must_use)]
@@ -57,5 +59,13 @@ fn bench(c: &mut Criterion) {
     group.finish();
 }
 
+#[cfg(not(target_os = "windows"))]
+criterion_group! {
+    name = benches;
+    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
+    targets = bench
+}
+#[cfg(target_os = "windows")]
 criterion_group!(benches, bench);
+
 criterion_main!(benches);
diff --git a/algorithms/linfa-nn/Cargo.toml b/algorithms/linfa-nn/Cargo.toml
index a9d4b7e35..6003cb76b 100644
--- a/algorithms/linfa-nn/Cargo.toml
+++ b/algorithms/linfa-nn/Cargo.toml
@@ -41,6 +41,9 @@ criterion = "0.4.0"
 rand_xoshiro = "0.6"
 ndarray-rand = "0.14"
 
+[target.'cfg(not(windows))'.dev-dependencies]
+pprof = { version = "0.11.0", features = ["flamegraph", "criterion"] }
+
 [[bench]]
 name = "nn"
 harness = false
diff --git a/algorithms/linfa-nn/benches/nn.rs b/algorithms/linfa-nn/benches/nn.rs
index 744f88358..4d5983ed5 100644
--- a/algorithms/linfa-nn/benches/nn.rs
+++ b/algorithms/linfa-nn/benches/nn.rs
@@ -2,6 +2,8 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
 use linfa_nn::{distance::*, CommonNearestNeighbour, NearestNeighbour};
 use ndarray::{Array1, Array2};
 use ndarray_rand::{rand::SeedableRng, rand_distr::Uniform, RandomExt};
+#[cfg(not(target_os = "windows"))]
+use pprof::criterion::{Output, PProfProfiler};
 use rand_xoshiro::Xoshiro256Plus;
 
 fn nn_build_bench(c: &mut Criterion) {
@@ -96,9 +98,13 @@ fn within_range_bench(c: &mut Criterion) {
     }
 }
 
+#[cfg(not(target_os = "windows"))]
 criterion_group! {
     name = benches;
-    config = Criterion::default();
+    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
     targets = nn_build_bench, k_nearest_bench, within_range_bench
 }
+#[cfg(target_os = "windows")]
+criterion_group!(benches, nn_build_bench, k_nearest_bench, within_range_bench);
+
 criterion_main!(benches);
diff --git a/algorithms/linfa-pls/Cargo.toml b/algorithms/linfa-pls/Cargo.toml
index f44ffc3ff..2fd2af432 100644
--- a/algorithms/linfa-pls/Cargo.toml
+++ b/algorithms/linfa-pls/Cargo.toml
@@ -42,6 +42,9 @@ rand_xoshiro = "0.6"
 criterion = "0.4.0"
 statrs = "0.16.0"
 
+[target.'cfg(not(windows))'.dev-dependencies]
+pprof = { version = "0.11.0", features = ["flamegraph", "criterion"] }
+
 [[bench]]
 name = "pls"
 harness = false
diff --git a/algorithms/linfa-pls/benches/pls.rs b/algorithms/linfa-pls/benches/pls.rs
index aa45e5a59..489ccdba8 100644
--- a/algorithms/linfa-pls/benches/pls.rs
+++ b/algorithms/linfa-pls/benches/pls.rs
@@ -4,6 +4,8 @@ use linfa::Dataset;
 use linfa_datasets::generate::make_dataset;
 use linfa_pls::Algorithm;
 use linfa_pls::{PlsCanonical, PlsCca, PlsRegression};
+#[cfg(not(target_os = "windows"))]
+use pprof::criterion::{Output, PProfProfiler};
 use statrs::distribution::{DiscreteUniform, Laplace};
 
 #[allow(unused_must_use)]
@@ -86,5 +88,13 @@ fn bench(c: &mut Criterion) {
     group.finish();
 }
 
+#[cfg(not(target_os = "windows"))]
+criterion_group! {
+    name = benches;
+    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
+    targets = bench
+}
+#[cfg(target_os = "windows")]
 criterion_group!(benches, bench);
+
 criterion_main!(benches);
diff --git a/algorithms/linfa-trees/Cargo.toml b/algorithms/linfa-trees/Cargo.toml
index fba369d8e..1024c22bb 100644
--- a/algorithms/linfa-trees/Cargo.toml
+++ b/algorithms/linfa-trees/Cargo.toml
@@ -33,9 +33,11 @@ linfa = { version = "0.6.0", path = "../.." }
 rand = { version = "0.8", features = ["small_rng"] }
 criterion = "0.4.0"
 approx = "0.4"
-
 linfa-datasets = { version = "0.6.0", path = "../../datasets/", features = ["iris"] }
 
+[target.'cfg(not(windows))'.dev-dependencies]
+pprof = { version = "0.11.0", features = ["flamegraph", "criterion"] }
+
 [[bench]]
 name = "decision_tree"
 harness = false
diff --git a/algorithms/linfa-trees/benches/decision_tree.rs b/algorithms/linfa-trees/benches/decision_tree.rs
index 2b7cd484f..da420f34b 100644
--- a/algorithms/linfa-trees/benches/decision_tree.rs
+++ b/algorithms/linfa-trees/benches/decision_tree.rs
@@ -5,6 +5,8 @@ use ndarray::{concatenate, Array, Array1, Array2, Axis};
 use ndarray_rand::rand::SeedableRng;
 use ndarray_rand::rand_distr::{StandardNormal, Uniform};
 use ndarray_rand::RandomExt;
+#[cfg(not(target_os = "windows"))]
+use pprof::criterion::{Output, PProfProfiler};
 use rand::rngs::SmallRng;
 
 fn generate_blobs(means: &Array2<f64>, samples: usize, mut rng: &mut SmallRng) -> Array2<f64> {
@@ -52,5 +54,13 @@ fn decision_tree_bench(c: &mut Criterion) {
     group.finish();
 }
 
+#[cfg(not(target_os = "windows"))]
+criterion_group! {
+    name = benches;
+    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
+    targets = decision_tree_bench
+}
+#[cfg(target_os = "windows")]
 criterion_group!(benches, decision_tree_bench);
+
 criterion_main!(benches);