started working on benchmarks

helmholtz-analytics · Oct 7, 2024 · 5fc46fa · 5fc46fa
1 parent e013b99
commit 5fc46fa
Show file tree

Hide file tree

Showing 3 changed files with 82 additions and 15 deletions.
diff --git a/benchmarks/cb/cluster.py b/benchmarks/cb/cluster.py
@@ -1,32 +1,56 @@
 import heat as ht
+import torch
 from perun import monitor
+from sizes import GSIZE_vTS_S, GSIZE_vTS_L
+
+"""
+For clustering we assume very tall skinny data
+Benchmarks in this file:
+- K-Means (with kmeans++ initialization)
+- K-Medians (with kmedians++ initialization)
+- K-Medoids (with kmedoids++ initialization)
+- BatchParallelKMeans (with k-means++ initialization)
+"""
+
+N_CLUSTERS_TO_FIND = 4
 
 
 @monitor()
 def kmeans(data):
-    kmeans = ht.cluster.KMeans(n_clusters=4, init="kmeans++")
+    kmeans = ht.cluster.KMeans(n_clusters=N_CLUSTERS_TO_FIND, init="kmeans++")
     kmeans.fit(data)
 
 
 @monitor()
 def kmedians(data):
-    kmeans = ht.cluster.KMedians(n_clusters=4, init="kmedians++")
+    kmeans = ht.cluster.KMedians(n_clusters=N_CLUSTERS_TO_FIND, init="kmedians++")
     kmeans.fit(data)
 
 
 @monitor()
 def kmedoids(data):
-    kmeans = ht.cluster.KMedoids(n_clusters=4, init="kmedoids++")
+    kmeans = ht.cluster.KMedoids(n_clusters=N_CLUSTERS_TO_FIND, init="kmedoids++")
     kmeans.fit(data)
 
 
+@monitor()
+def batchparallel_kmeans(data):
+    bpkmeans = ht.cluster.BatchParallelKMeans(n_clusters=N_CLUSTERS_TO_FIND, init="k-means++")
+    bpkmeans.fit(data)
+
+
 def run_cluster_benchmarks():
-    n = 5000
-    seed = 1
-    data = ht.utils.data.spherical.create_spherical_dataset(
-        num_samples_cluster=n, radius=1.0, offset=4.0, dtype=ht.float32, random_state=seed
+    # N_CLUSTERS_TO_FIND many spherical clusters, "centers" are uniformly distributed in a hypercube [-5,5]^d
+    # each cluster is normally distributed with std=1
+    data = ht.utils.data.spherical.create_clusters(
+        GSIZE_vTS_L,
+        GSIZE_vTS_S,
+        N_CLUSTERS_TO_FIND,
+        10 * (torch.rand.rand(N_CLUSTERS_TO_FIND, GSIZE_vTS_S) - 1),
+        1,
     )
 
     kmeans(data)
     kmedians(data)
     kmedoids(data)
+    batchparallel_kmeans(data)
diff --git a/benchmarks/cb/linalg.py b/benchmarks/cb/linalg.py
@@ -2,6 +2,18 @@
 import heat as ht
 from mpi4py import MPI
 from perun import monitor
+from sizes import GSIZE_TS_L, GSIZE_TS_S, GSIZE_SQ
+
+"""
+Benchmarks in this file:
+- matrix-matrix multiplication of square matrices with different splits (00,11,01,10)
+- QR decomposition of
+    - tall-skinny matrix with split=0
+    - square matrix with split=1
+- Lanczos algorithm on a square matrix of size 1000 x 1000
+- Hierarchical SVD with fixed rank or fixed tolerance for a short-fat matrix of rank 10 and split=1
+- Full SVD with tall-skinny matrix and split=0
+"""
 
 
 @monitor()
@@ -46,8 +58,7 @@ def hierachical_svd_tol(data, tol):
 
 @monitor()
 def svd_full_ts(data):
-    svd = ht.linalg.svd(data, compute_uv=True)
-    ### TODO: go on here!
+    svd = ht.linalg.svd(data)
 
 
 @monitor()
@@ -56,7 +67,7 @@ def lanczos(B):
 
 
 def run_linalg_benchmarks():
-    n = 3000
+    n = GSIZE_SQ
     a = ht.random.random((n, n), split=0)
     b = ht.random.random((n, n), split=0)
     matmul_split_0(a, b)
@@ -77,26 +88,30 @@ def run_linalg_benchmarks():
     matmul_split_10(a, b)
     del a, b
 
-    n = int((4000000 // MPI.COMM_WORLD.size) ** 0.5)
-    m = MPI.COMM_WORLD.size * n
+    n = GSIZE_TS_S
+    m = GSIZE_TS_L
     a_0 = ht.random.random((m, n), split=0)
     qr_split_0(a_0)
     del a_0
 
-    n = 2000
+    n = GSIZE_SQ
     a_1 = ht.random.random((n, n), split=1)
     qr_split_1(a_1)
     del a_1
 
-    n = 50
+    n = 1000
     A = ht.random.random((n, n), dtype=ht.float64, split=0)
     B = A @ A.T
     lanczos(B)
     del A, B
 
     data = ht.utils.data.matrixgallery.random_known_rank(
-        1000, 500 * MPI.COMM_WORLD.Get_size(), 10, split=1, dtype=ht.float32
+        GSIZE_TS_S, GSIZE_TS_L, 10, split=1, dtype=ht.float32
     )[0]
     hierachical_svd_rank(data, 10)
     hierachical_svd_tol(data, 1e-2)
     del data
+
+    data = ht.random.random((GSIZE_TS_L, GSIZE_TS_S), split=0)
+    svd_full_ts(data)
+    del data
diff --git a/benchmarks/cb/sizes.py b/benchmarks/cb/sizes.py
@@ -0,0 +1,28 @@
+import heat as ht
+
+"""
+The following variables can be changed:
+- N_ELEMENTS_PER_PROC: number of elements per process
+- TS_FACTOR_loc: tall-skinny factor for each process (long dimension of local array in tall-skinny matrix is TS_FACTOR_loc times larger than the short dimension)
+- vTS_FACTOR_loc: very tall-skinny factor for each process (same as before, but for "very" tall-skinny matrices)
+"""
+N_ELEMENTS_PER_PROC = 2**30
+TS_FACTOR_loc = 2
+vTS_FACTOR_loc = 4
+
+# all other variables are calculated based on the number of elements per process
+n_procs = ht.MPI_WORLD.size
+N_ELEMENTS_TOTAL = N_ELEMENTS_PER_PROC * n_procs
+
+GSIZE_SQ = int(N_ELEMENTS_TOTAL**0.5)
+TS_FACTOR_GLOB = TS_FACTOR_loc * n_procs  # global tall-skinny factor
+GSIZE_TS_S = int(
+    (N_ELEMENTS_TOTAL / TS_FACTOR_GLOB) ** 0.5
+)  # short dimension of tall-skinny matrix
+GSIZE_TS_L = GSIZE_TS_S * TS_FACTOR_GLOB + 1  # long dimension of tall-skinny matrix
+
+vTS_FACTOR_GLOB = vTS_FACTOR_loc * n_procs  # global tall-skinny factor
+GSIZE_vTS_S = int(
+    (N_ELEMENTS_TOTAL / vTS_FACTOR_GLOB) ** 0.5
+)  # short dimension of very tall-skinny matrix
+GSIZE_vTS_L = GSIZE_TS_S * vTS_FACTOR_GLOB + 1  # long dimension of very tall-skinny matrix