From 3fd76cb7a1f020f19e00280a4b1345893304b15e Mon Sep 17 00:00:00 2001 From: Hoppe Date: Mon, 7 Oct 2024 18:12:17 +0200 Subject: [PATCH] unified data sizes for all benchmarks --- benchmarks/cb/linalg.py | 5 +-- benchmarks/cb/manipulations.py | 79 ++++++++++++++++++++++------------ benchmarks/cb/preprocessing.py | 16 +++++-- benchmarks/cb/sizes.py | 16 ++++++- 4 files changed, 80 insertions(+), 36 deletions(-) diff --git a/benchmarks/cb/linalg.py b/benchmarks/cb/linalg.py index e9c08220f8..0cb393da63 100644 --- a/benchmarks/cb/linalg.py +++ b/benchmarks/cb/linalg.py @@ -2,7 +2,7 @@ import heat as ht from mpi4py import MPI from perun import monitor -from sizes import GSIZE_TS_L, GSIZE_TS_S, GSIZE_SQ +from sizes import GSIZE_TS_L, GSIZE_TS_S, GSIZE_SQ, LANCZOS_SIZE """ Benchmarks in this file: @@ -99,8 +99,7 @@ def run_linalg_benchmarks(): qr_split_1(a_1) del a_1 - n = 1000 - A = ht.random.random((n, n), dtype=ht.float64, split=0) + A = ht.random.random((LANCZOS_SIZE, LANCZOS_SIZE), dtype=ht.float64, split=0) B = A @ A.T lanczos(B) del A, B diff --git a/benchmarks/cb/manipulations.py b/benchmarks/cb/manipulations.py index 0fde87cff9..72adc9446a 100644 --- a/benchmarks/cb/manipulations.py +++ b/benchmarks/cb/manipulations.py @@ -2,49 +2,72 @@ import heat as ht from typing import List from perun import monitor +from sizes import GSIZE_SQ, GSIZE_CB + +""" +Bencharks so far: +- concatenation along split axis +- reshaping along split axis with new_split +- resplitting (of a split array) +- unsplit a split array +""" @monitor() def concatenate(arrays): - # benchmark concatenation of 3 arrays with split 1, None, 1 respectively a = ht.concatenate(arrays, axis=1) @monitor() -def reshape(arrays): - for array in arrays: - a = ht.reshape(array, (10000000, -1), new_split=1) +def concatenate_nosplit(arrays): + a = ht.concatenate(arrays, axis=1) + + +@monitor() +def reshape(array): + a = ht.reshape(array, (array.shape[0] * array.shape[1], -1), new_split=1) + + +@monitor() +def reshape_nosplit(array): + a = ht.reshape(array, (array.shape[0] * array.shape[1], -1), new_split=1) @monitor() -def resplit(array, new_split: List[int | None]): - for new_split in new_split: - a = ht.resplit(array, axis=new_split) - del a +def resplit(array): + a = ht.resplit(array, axis=1) + + +@monitor() +def unsplit(array): + a = ht.resplit(array, axis=None) def run_manipulation_benchmarks(): - sizes = [10000, 20000, 40000] - arrays = [] - for size in sizes: - arrays.append(ht.zeros((1000, size), split=1)) + arrays = [ + ht.zeros((GSIZE_SQ // 2, GSIZE_SQ), split=1), + ht.zeros((GSIZE_SQ // 2, GSIZE_SQ), split=1), + ] + concatenate(arrays) + del arrays + + arrays = [ + ht.zeros((GSIZE_SQ // 2, GSIZE_SQ), split=0), + ht.zeros((GSIZE_SQ // 2, GSIZE_SQ), split=0), + ] + concatenate_nosplit(arrays) + del arrays + + array = ht.zeros((GSIZE_CB, GSIZE_CB, GSIZE_CB), split=0) reshape(arrays) + del array - arrays = [] - for i, size in enumerate(sizes): - if i == 1: - split = None - else: - split = 1 - arrays.append(ht.zeros((1000, size), split=split)) - concatenate(arrays) + array = ht.zeros((GSIZE_CB, GSIZE_CB, GSIZE_CB), split=2) + reshape_nosplit(arrays) + del array - if ht.comm.size > 1: - shape = [100, 50, 50, 20, 86] - n_elements = ht.array(shape).prod().item() - mem = n_elements * 4 / 1e9 - array = ht.reshape(ht.arange(0, n_elements, split=0, dtype=ht.float32), shape) * ( - ht.comm.rank + 1 - ) + array = ht.ones((GSIZE_SQ, GSIZE_SQ), split=0) + resplit(array) - resplit(array, [None, 2, 4]) + array = ht.ones((GSIZE_SQ, GSIZE_SQ), split=0) + unsplit(array) diff --git a/benchmarks/cb/preprocessing.py b/benchmarks/cb/preprocessing.py index ebd75ed71c..869f441ae3 100644 --- a/benchmarks/cb/preprocessing.py +++ b/benchmarks/cb/preprocessing.py @@ -2,9 +2,17 @@ import heat as ht from mpi4py import MPI from perun import monitor +from sizes import GSIZE_TS_L, GSIZE_TS_S -# we benchmark the in-place versions (`copy=False`) of the preprocessing functions -# for each function, both the forward and the inverse transformation are applied +""" +Benchmarks in this file: +- StandardScaler and inverse_transform +- MinMaxScaler and inverse_transform +- MaxAbsScaler and inverse_transform +- RobustScaler and inverse_transform +- Normalizer (without inverse, of course) +All of them are both fit_transform and inverse_transform (together); data is split along the data axis. +""" @monitor() @@ -42,8 +50,8 @@ def apply_inplace_normalizer(X): def run_preprocessing_benchmarks(): - n_data_points = 5000 - n_features = 50 + n_data_points = GSIZE_TS_L + n_features = GSIZE_TS_S X = ht.random.randn(n_data_points, n_features, split=0) apply_inplace_standard_scaler_and_inverse(X) diff --git a/benchmarks/cb/sizes.py b/benchmarks/cb/sizes.py index 1533c82144..7ae4fed5a7 100644 --- a/benchmarks/cb/sizes.py +++ b/benchmarks/cb/sizes.py @@ -10,7 +10,14 @@ TS_FACTOR_loc = 2 vTS_FACTOR_loc = 4 -# all other variables are calculated based on the number of elements per process +""" +all other variables are calculated based on the number of elements per process +shape of a 2D square array: (GSIZE_SQ, GSIZE_SQ) +shape of a 3D cube array: (GSIZE_CB, GSIZE_CB, GSIZE_CB) +shape of a 2D tall-skinny array: (GSIZE_TS_L, GSIZE_TS_S) +shape of a 2D very tall-skinny array: (GSIZE_vTS_L, GSIZE_vTS_S) +similar for short-fat and very short-fat arrays... +""" n_procs = ht.MPI_WORLD.size N_ELEMENTS_TOTAL = N_ELEMENTS_PER_PROC * n_procs @@ -26,3 +33,10 @@ (N_ELEMENTS_TOTAL / vTS_FACTOR_GLOB) ** 0.5 ) # short dimension of very tall-skinny matrix GSIZE_vTS_L = GSIZE_TS_S * vTS_FACTOR_GLOB + 1 # long dimension of very tall-skinny matrix + +GSIZE_CB = int(N_ELEMENTS_TOTAL ** (1 / 3)) # dimension of a cube array + +""" +Exceptions needed for the moment: +""" +LANCZOS_SIZE = 2**10