Skip to content

Commit

Permalink
Reduce heap allocations
Browse files Browse the repository at this point in the history
  • Loading branch information
JayKickliter committed Oct 14, 2020
1 parent 6508532 commit 232f479
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 114 deletions.
12 changes: 6 additions & 6 deletions benches/som.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion, Throughput};
use ndarray::{arr1, arr2};
use ndarray::{arr2, ArrayView1};
use rand::{thread_rng, Rng};
use rusticsom::SOM;

Expand All @@ -15,7 +15,7 @@ fn training(c: &mut Criterion) {
|| {
(
SOM::create(10, 10, 4, false, None, None, None, None),
data.clone(),
data.view(),
)
},
|(mut map, data)| map.train_random(data, iterations),
Expand All @@ -28,7 +28,7 @@ fn training(c: &mut Criterion) {
|| {
(
SOM::create(10, 10, 4, false, None, None, None, None),
data.clone(),
data.view(),
)
},
|(mut map, data)| map.train_batch(data, iterations),
Expand All @@ -43,19 +43,19 @@ fn winner(c: &mut Criterion) {
let mut group = c.benchmark_group("Winner");

let mut map = SOM::create(10, 10, 4, false, None, None, None, None);
map.train_random(data, 1000);
map.train_random(data.view(), 1000);

group.bench_function(BenchmarkId::new("Plain", 4), |b| {
b.iter_batched(
|| arr1(&IRIS[thread_rng().gen_range::<usize>(0, IRIS.len())]),
|| ArrayView1::from(&IRIS[thread_rng().gen_range::<usize>(0, IRIS.len())]),
|elem| map.winner(elem),
BatchSize::SmallInput,
);
});

group.bench_function(BenchmarkId::new("Distance", 4), |b| {
b.iter_batched(
|| arr1(&IRIS[thread_rng().gen_range::<usize>(0, IRIS.len())]),
|| ArrayView1::from(&IRIS[thread_rng().gen_range::<usize>(0, IRIS.len())]),
|elem| map.winner_dist(elem),
BatchSize::SmallInput,
);
Expand Down
162 changes: 82 additions & 80 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@ pub struct SomData {
}

/// A function for determining neighbours' weights.
pub type NeighbourhoodFn = fn((usize, usize), (usize, usize), f32) -> Array2<f64>;
pub type NeighbourhoodFn = fn((usize, usize), (usize, usize), f64) -> Array2<f64>;

/// A function for decaying `learning_rate` and `sigma`.
pub type DecayFn = fn(f32, u32, u32) -> f64;

#[derive(Debug, Clone)]
pub struct SOM {
data: SomData,
decay_fn: DecayFn,
Expand Down Expand Up @@ -72,48 +73,35 @@ impl SOM {
}
}

// To find and return the position of the winner neuron for a given input sample.
//
// TODO: (breaking-change) switch `elem` to `ArrayView1`. See todo
// for `Self::winner_dist()`.
pub fn winner(&mut self, elem: Array1<f64>) -> (usize, usize) {
let mut temp: Array1<f64> = Array1::<f64>::zeros(self.data.z);
/// Returns the `(x, y)` position of the winning neuron for `elem`.
pub fn winner(&mut self, elem: ArrayView1<f64>) -> (usize, usize) {
let mut min: f64 = std::f64::MAX;
let mut ret: (usize, usize) = (0, 0);

for i in 0..self.data.x {
for j in 0..self.data.y {
for k in 0..self.data.z {
temp[k] = self.data.map[[i, j, k]] - elem[[k]];
}

let norm = norm(temp.view());

for (i, row) in self.data.map.outer_iter().enumerate() {
for (j, entry) in row.outer_iter().enumerate() {
let norm = norm2(entry, elem);
if norm < min {
min = norm;
ret = (i, j);
}
}
}

if let Some(elem) = self.data.activation_map.get_mut(ret) {
*(elem) += 1;
}

self.data.activation_map[ret] += 1;
ret
}

// Update the weights of the SOM
fn update(&mut self, elem: Array1<f64>, winner: (usize, usize), iteration_index: u32) {
fn update(&mut self, elem: ArrayView1<f64>, winner: (usize, usize), iteration_index: u32) {
let new_lr = (self.decay_fn)(
self.data.learning_rate,
iteration_index,
self.data.regulate_lrate,
);
let new_sig = (self.decay_fn)(self.data.sigma, iteration_index, self.data.regulate_lrate);

let g =
(self.neighbourhood_fn)((self.data.x, self.data.y), winner, new_sig as f32) * new_lr;
let g = (self.neighbourhood_fn)((self.data.x, self.data.y), winner, new_sig) * new_lr;

for i in 0..self.data.x {
for j in 0..self.data.y {
Expand All @@ -130,40 +118,34 @@ impl SOM {
}

// Trains the SOM by picking random data points as inputs from the dataset
pub fn train_random(&mut self, data: Array2<f64>, iterations: u32) {
let mut random_value: i32;
let mut temp1: Array1<f64>;
let mut temp2: Array1<f64>;
pub fn train_random(&mut self, data: ArrayView2<f64>, iterations: u32) {
let (rows, cols) = data.dim();
let rnd_row = || rand::thread_rng().gen_range(0, rows);
self.update_regulate_lrate(iterations);
// TODO: temporary heap allocation undesirable, but not easily
// avoidable.
let mut randomized_col = Array1::<f64>::zeros(cols);
for iteration in 0..iterations {
temp1 = Array1::<f64>::zeros(ndarray::ArrayBase::dim(&data).1);
temp2 = Array1::<f64>::zeros(ndarray::ArrayBase::dim(&data).1);
random_value = rand::thread_rng().gen_range(0, ndarray::ArrayBase::dim(&data).0 as i32);
for i in 0..ndarray::ArrayBase::dim(&data).1 {
temp1[i] = data[[random_value as usize, i]];
temp2[i] = data[[random_value as usize, i]];
// TODO: Does the algorithm require us to operate over
// columns like this? It is not optimal from a
// cache-coherency POV.
for data_col in data.axis_iter(Axis(1)) {
ndarray::Zip::from(&mut randomized_col)
.apply(|rc_elem| *rc_elem = data_col[rnd_row()]);
}
let win = self.winner(temp1);
self.update(temp2, win, iteration);
let win = self.winner(randomized_col.view());
self.update(randomized_col.view(), win, iteration);
}
}

// Trains the SOM by picking data points in batches (sequentially) as inputs from the dataset
pub fn train_batch(&mut self, data: Array2<f64>, iterations: u32) {
let mut index: u32;
let mut temp1: Array1<f64>;
let mut temp2: Array1<f64>;
self.update_regulate_lrate(ndarray::ArrayBase::dim(&data).0 as u32 * iterations);
pub fn train_batch(&mut self, data: ArrayView2<f64>, iterations: u32) {
self.update_regulate_lrate(data.nrows() as u32 * iterations);
for iteration in 0..iterations {
temp1 = Array1::<f64>::zeros(ndarray::ArrayBase::dim(&data).1);
temp2 = Array1::<f64>::zeros(ndarray::ArrayBase::dim(&data).1);
index = iteration % (ndarray::ArrayBase::dim(&data).0 - 1) as u32;
for i in 0..ndarray::ArrayBase::dim(&data).1 {
temp1[i] = data[[index as usize, i]];
temp2[i] = data[[index as usize, i]];
}
let win = self.winner(temp1);
self.update(temp2, win, iteration);
let index = iteration as usize % (data.nrows() - 1);
let col = data.index_axis(Axis(0), index);
let win = self.winner(col);
self.update(col, win, iteration);
}
}

Expand All @@ -177,30 +159,20 @@ impl SOM {
self.data.activation_map.view()
}

// Similar to winner(), but also returns distance of input sample from winner neuron.
//
// TODO: (breaking-change) make `elem` an `ArrayView1` to remove
// at least one heap allocation. Requires same change to
// `Self::winner()`.
//
pub fn winner_dist(&mut self, elem: Array1<f64>) -> ((usize, usize), f64) {
// TODO: use more descriptive names than temp[..]
let tempelem = elem.clone();
let temp = self.winner(elem);

(
temp,
euclid_dist(
self.data
.map
.index_axis(Axis(0), temp.0)
.index_axis(Axis(0), temp.1),
tempelem.view(),
),
)
/// Returns a tuple of the winner and its distance from `elem`.
pub fn winner_dist(&mut self, elem: ArrayView1<f64>) -> ((usize, usize), f64) {
let winner = self.winner(elem);
let distance = euclid_dist(
self.data
.map
.index_axis(Axis(0), winner.0)
.index_axis(Axis(0), winner.1),
elem,
);
(winner, distance)
}

// Returns size of SOM.
/// Returns `self`'s size in `(rows, cols)`.
pub fn get_size(&self) -> (usize, usize) {
(self.data.x, self.data.y)
}
Expand All @@ -227,6 +199,14 @@ impl SOM {
dist_map[[i, j]] = temp_dist;
}
}

// A bug in an earlier version of this commit led to all
// division by 0.0, which silent results in NaN and not a
// process signal. This was made by worse by integration tests
// not comparing test results to reference results. Let's
// assert here for the time being.
debug_assert!(max_dist.abs() > 0.0);

for i in 0..self.data.x {
for j in 0..self.data.y {
dist_map[[i, j]] /= max_dist;
Expand Down Expand Up @@ -288,9 +268,25 @@ impl SOM {
}
}

// Returns the 2-norm of a vector represented as a 1D ArrayView
fn norm(a: ArrayView1<f64>) -> f64 {
a.iter().map(|elem| elem.powi(2)).sum::<f64>().sqrt()
// Returns the 2-norm of a vector.
fn norm<'a, I>(iti: I) -> f64
where
I: IntoIterator<Item = &'a f64>,
{
iti.into_iter().map(|elem| elem.powi(2)).sum::<f64>().sqrt()
}

// Returns the 2-norm of a vector.
fn norm2<'l, 'r, L, R>(lhs: L, rhs: R) -> f64
where
L: IntoIterator<Item = &'l f64>,
R: IntoIterator<Item = &'r f64>,
{
lhs.into_iter()
.zip(rhs.into_iter())
.map(|(l, r)| (l - r) * (l - r))
.sum::<f64>()
.sqrt()
}

// The default decay function for LR and Sigma
Expand All @@ -301,15 +297,21 @@ fn default_decay_fn(val: f32, curr_iter: u32, max_iter: u32) -> f64 {
/// Default neighborhood function.
///
/// Returns a two-dimensional Gaussian distribution centered at `pos`.
fn gaussian(dims: (usize, usize), pos: (usize, usize), sigma: f32) -> Array2<f64> {
let div = 2.0 * PI * (sigma as f64).powi(2);
fn gaussian(dims: (usize, usize), pos: (usize, usize), sigma: f64) -> Array2<f64> {
let div = 2.0 * PI * sigma * sigma;
debug_assert!(div.abs() > f64::EPSILON);

let shape_fn = |(i, j)| {
let x = (-((i as f64 - (pos.0 as f64)).powi(2) / div)).exp();
let y = (-((j as f64 - (pos.1 as f64)).powi(2) / div)).exp();
let x = ((i as f64 - pos.0 as f64) * (i as f64 - pos.0 as f64) / -div).exp();
let y = ((j as f64 - pos.1 as f64) * (j as f64 - pos.1 as f64) / -div).exp();
x * y
};

// This allocation is fine. Benchmarking shows that eliminating
// this allocation by changing making `NeighbourhoodFn` lazy
// (computing a single array element at a time instead of
// returning the whole array) actually causes a performance
// regression of greater than 12%.
Array2::from_shape_fn(dims, shape_fn)
}

Expand All @@ -321,7 +323,7 @@ fn euclid_dist(a: ArrayView1<f64>, b: ArrayView1<f64>) -> f64 {

a.iter()
.zip(b.iter())
.map(|(a, b)| (a - b).powi(2))
.map(|(a, b)| (a - b) * (a - b))
.sum::<f64>()
.sqrt()
}
Expand All @@ -344,8 +346,8 @@ mod tests {
assert_eq!(map.get_map_cell((1, 1, k)), 1.5);
}

assert_eq!(map.winner(Array1::from(vec![1.5; 5])), (1, 1));
assert_eq!(map.winner(Array1::from(vec![0.5; 5])), (0, 0));
assert_eq!(map.winner(ArrayView1::from(&vec![1.5; 5])), (1, 1));
assert_eq!(map.winner(ArrayView1::from(&vec![0.5; 5])), (0, 0));
}

#[test]
Expand Down
31 changes: 13 additions & 18 deletions tests/test.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,21 @@
extern crate ndarray;
extern crate rusticsom;

use approx::assert_relative_eq;
use ndarray::{Array1, Array2};
use ndarray::{Array2, ArrayView1};
use rusticsom::*;
mod data;

#[test]
fn t_test_som() {
let mut map = SOM::create(2, 3, 5, false, Some(0.1), None, None, None);

assert_eq!(map.winner(Array1::from(vec![0.5; 5])), (0, 0));
assert_eq!(map.winner(ArrayView1::from(&[0.5; 5])), (0, 0));

let mut temp_train = Array2::<f64>::zeros((2, 5));
for i in temp_train.iter_mut() {
*i = 1.0;
}

map.train_batch(temp_train, 1);
assert_eq!(map.winner(Array1::from(vec![0.5; 5])), (0, 1));
map.train_batch(temp_train.view(), 1);
assert_eq!(map.winner(ArrayView1::from(&[0.5; 5])), (0, 1));
}

#[test]
Expand All @@ -35,7 +32,7 @@ fn t_distance_map() {
*i = 1.0;
}

map.train_batch(temp_train, 1);
map.train_batch(temp_train.view(), 1);
let dist = map.distance_map();

assert_ne!(dist[[0, 0]], 0.0);
Expand All @@ -48,15 +45,14 @@ fn t_full_test_random() {
// Plotted with Matplotlib
let mut map = SOM::create(10, 10, 4, false, None, None, None, None);
let data = ndarray::arr2(&data::IRIS);
let data2 = data.to_owned();
map.train_random(data, 1000);

map.train_random(data.view(), 1000);

let dist_map = map.distance_map();
println!("{:?}", dist_map);

for x in data2.genrows() {
let y = x.to_owned();
print!("{:?}, ", map.winner(y));
for x in data.genrows() {
print!("{:?}, ", map.winner(x));
}
}

Expand All @@ -66,15 +62,14 @@ fn t_full_test_batch() {
// Plotted with Matplotlib
let mut map = SOM::create(10, 10, 4, false, None, None, None, None);
let data = ndarray::arr2(&data::IRIS);
let data2 = data.to_owned();
map.train_batch(data, 1000);

map.train_batch(data.view(), 1000);

let dist_map = map.distance_map();
assert_relative_eq!(dist_map, ndarray::arr2(&data::IRIS_BATCH_DISTANCES));
println!("{:?}", dist_map);

for x in data2.genrows() {
let y = x.to_owned();
print!("{:?}, ", map.winner(y));
for x in data.genrows() {
print!("{:?}, ", map.winner(x));
}
}
Loading

0 comments on commit 232f479

Please sign in to comment.