-
Notifications
You must be signed in to change notification settings - Fork 54
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Hoppe
committed
Oct 7, 2024
1 parent
e013b99
commit 5fc46fa
Showing
3 changed files
with
82 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,56 @@ | ||
import heat as ht | ||
import torch | ||
from perun import monitor | ||
from sizes import GSIZE_vTS_S, GSIZE_vTS_L | ||
|
||
""" | ||
For clustering we assume very tall skinny data | ||
Benchmarks in this file: | ||
- K-Means (with kmeans++ initialization) | ||
- K-Medians (with kmedians++ initialization) | ||
- K-Medoids (with kmedoids++ initialization) | ||
- BatchParallelKMeans (with k-means++ initialization) | ||
""" | ||
|
||
N_CLUSTERS_TO_FIND = 4 | ||
|
||
|
||
@monitor() | ||
def kmeans(data): | ||
kmeans = ht.cluster.KMeans(n_clusters=4, init="kmeans++") | ||
kmeans = ht.cluster.KMeans(n_clusters=N_CLUSTERS_TO_FIND, init="kmeans++") | ||
kmeans.fit(data) | ||
|
||
|
||
@monitor() | ||
def kmedians(data): | ||
kmeans = ht.cluster.KMedians(n_clusters=4, init="kmedians++") | ||
kmeans = ht.cluster.KMedians(n_clusters=N_CLUSTERS_TO_FIND, init="kmedians++") | ||
kmeans.fit(data) | ||
|
||
|
||
@monitor() | ||
def kmedoids(data): | ||
kmeans = ht.cluster.KMedoids(n_clusters=4, init="kmedoids++") | ||
kmeans = ht.cluster.KMedoids(n_clusters=N_CLUSTERS_TO_FIND, init="kmedoids++") | ||
kmeans.fit(data) | ||
|
||
|
||
@monitor() | ||
def batchparallel_kmeans(data): | ||
bpkmeans = ht.cluster.BatchParallelKMeans(n_clusters=N_CLUSTERS_TO_FIND, init="k-means++") | ||
bpkmeans.fit(data) | ||
|
||
|
||
def run_cluster_benchmarks(): | ||
n = 5000 | ||
seed = 1 | ||
data = ht.utils.data.spherical.create_spherical_dataset( | ||
num_samples_cluster=n, radius=1.0, offset=4.0, dtype=ht.float32, random_state=seed | ||
# N_CLUSTERS_TO_FIND many spherical clusters, "centers" are uniformly distributed in a hypercube [-5,5]^d | ||
# each cluster is normally distributed with std=1 | ||
data = ht.utils.data.spherical.create_clusters( | ||
GSIZE_vTS_L, | ||
GSIZE_vTS_S, | ||
N_CLUSTERS_TO_FIND, | ||
10 * (torch.rand.rand(N_CLUSTERS_TO_FIND, GSIZE_vTS_S) - 1), | ||
1, | ||
) | ||
|
||
kmeans(data) | ||
kmedians(data) | ||
kmedoids(data) | ||
batchparallel_kmeans(data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import heat as ht | ||
|
||
""" | ||
The following variables can be changed: | ||
- N_ELEMENTS_PER_PROC: number of elements per process | ||
- TS_FACTOR_loc: tall-skinny factor for each process (long dimension of local array in tall-skinny matrix is TS_FACTOR_loc times larger than the short dimension) | ||
- vTS_FACTOR_loc: very tall-skinny factor for each process (same as before, but for "very" tall-skinny matrices) | ||
""" | ||
N_ELEMENTS_PER_PROC = 2**30 | ||
TS_FACTOR_loc = 2 | ||
vTS_FACTOR_loc = 4 | ||
|
||
# all other variables are calculated based on the number of elements per process | ||
n_procs = ht.MPI_WORLD.size | ||
N_ELEMENTS_TOTAL = N_ELEMENTS_PER_PROC * n_procs | ||
|
||
GSIZE_SQ = int(N_ELEMENTS_TOTAL**0.5) | ||
TS_FACTOR_GLOB = TS_FACTOR_loc * n_procs # global tall-skinny factor | ||
GSIZE_TS_S = int( | ||
(N_ELEMENTS_TOTAL / TS_FACTOR_GLOB) ** 0.5 | ||
) # short dimension of tall-skinny matrix | ||
GSIZE_TS_L = GSIZE_TS_S * TS_FACTOR_GLOB + 1 # long dimension of tall-skinny matrix | ||
|
||
vTS_FACTOR_GLOB = vTS_FACTOR_loc * n_procs # global tall-skinny factor | ||
GSIZE_vTS_S = int( | ||
(N_ELEMENTS_TOTAL / vTS_FACTOR_GLOB) ** 0.5 | ||
) # short dimension of very tall-skinny matrix | ||
GSIZE_vTS_L = GSIZE_TS_S * vTS_FACTOR_GLOB + 1 # long dimension of very tall-skinny matrix |