Skip to content

Commit

Permalink
2.1.5
Browse files Browse the repository at this point in the history
  • Loading branch information
liborty committed May 7, 2024
1 parent eb50493 commit e94163d
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 60 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "rstats"
version = "2.1.4"
version = "2.1.5"
authors = ["Libor Spacek"]
edition = "2021"
description = "Statistics, Information Measures, Data Analysis, Linear Algebra, Clifford Algebra, Machine Learning, Geometric Median, Matrix Decompositions, PCA, Mahalanobis Distance, Hulls, Multithreading.."
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -343,9 +343,11 @@ Methods which take an additional generic vector argument, such as a vector of we

## Appendix: Recent Releases

* **Version 2.1.5** - Added `projection` to trait `VecVecg` to project all self vectors to a new basis. This can be used e.g. for Principal Components Analysis data reduction, using some chosen eigenvectors as the new basis.

* **Version 2.1.4** - Tidied up some error processing.

* **Version 2.1.3** - Added `pca_reduction` to `struct TriangMat`. Changed `eigenvectors` to compute normalized eigenvectors of the original data rather than of its covariance matrix. That is now done by better named `normalize` (should you still need it). `Eigenvectors` solves forward substitution to find each vector.
* **Version 2.1.3** - Changed `eigenvectors` to compute normalized eigenvectors of the original data rather than of its covariance matrix. That is now done by better named `normalize` (should you still need it). `Eigenvectors` solves forward substitution to find each vector.

* **Version 2.1.2** - Added function `project` to project a `TriangMat` to a lower dimensional space of selected dimensions. Removed `rows` which was a duplicate of `dim`.

Expand Down
6 changes: 4 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,8 @@ pub trait VecVec<T> {
fn covar(self, mid: &[f64]) -> Result<TriangMat, RE>;
/// Flattened lower triangular part of a covariance matrix of a Vec of f64 vectors.
fn serial_covar(self, mid: &[f64]) -> Result<TriangMat, RE>;
/// Projects self onto a given basis, e.g. PCA dimensional reduction.
fn projection(self, basis: &[Vec<f64>]) -> Result<Vec<Vec<f64>>, RE>;
}

/// Methods applicable to slice of vectors of generic end type, plus one other argument
Expand All @@ -367,7 +369,7 @@ pub trait VecVecg<T, U> {
self,
v: &[U],
f: impl Fn(&[T]) -> Result<Vec<f64>, RE>,
) -> Result<(Vec<Vec<f64>>, f64), RE>;
) -> Result<(Vec<Vec<f64>>, f64), RE>;
/// Individually weighted time series derivative of vectors
fn wdvdt(self, ws: &[U], centre: &[f64]) -> Result<Vec<f64>, RE>;
/// 1.0-dotproducts with **v**, in range [0,2]
Expand All @@ -380,7 +382,7 @@ pub trait VecVecg<T, U> {
fn wradii(self, ws: &[U], gm: &[f64]) -> Result<(Vec<f64>, f64), RE>;
/// wmadgm median of weighted radii from (weighted) gm: stable nd data spread estimator
fn wmadgm(self, ws: &[U], wgm: &[f64]) -> Result<f64, RE>;
/// Leftmultiply (column) vector v by (rows of) matrix self
/// Leftmultiply (column) vector v by (rows of) matrix self: projects v to new basis self
fn leftmultv(self, v: &[U]) -> Result<Vec<f64>, RE>;
/// Rightmultiply (row) vector v by (columns of) matrix self
fn rightmultv(self, v: &[U]) -> Result<Vec<f64>, RE>;
Expand Down
92 changes: 42 additions & 50 deletions src/triangmat.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::*; // MStats, MinMax, MutVecg, Stats, VecVec };
pub use indxvec::{printing::*, Printing, Indices, Vecops};
pub use indxvec::{printing::*, Indices, Printing, Vecops};

/// Meanings of 'kind' field. Note that 'Upper Symmetric' would represent the same full matrix as
/// 'Lower Symmetric', so it is not used (lower symmetric matrix is never transposed)
Expand Down Expand Up @@ -43,7 +43,7 @@ impl TriangMat {
/// Squared euclidian vector magnitude (norm) of the data vector
pub fn magsq(&self) -> f64 {
self.data.vmagsq()
}
}
/// Sum of the elements:
/// when applied to the wedge product **a∧b**, returns det(**a,b**)
pub fn sum(&self) -> f64 {
Expand Down Expand Up @@ -90,53 +90,40 @@ impl TriangMat {
/// Normalized full rows from a triangular matrix.
/// When the matrix is symmetric, e.g. a covariance matrix, the result are its normalized eigenvectors
pub fn normalize(&self) -> Vec<Vec<f64>> {
let mut fullcov = self.to_full();
fullcov.iter_mut().for_each(|eigenvector| eigenvector.munit());
let mut fullcov = self.to_full();
fullcov
.iter_mut()
.for_each(|eigenvector| eigenvector.munit());
fullcov
}

/// Normalized eigenvectors of A, given L.
/// Where L is the lower triangular Cholesky decomposition of covariance/comediance matrix for A.
/// Index gives the descending order of eigenvalues.
/// Can be used to order eigenvectors by their relative significance (covariance in their direction).
pub fn eigenvectors(&self) -> Result<(Vec<Vec<f64>>,Vec<usize>),RE> {
if self.is_empty() { return nodata_error("eigenvectors applied to empty L") };
/// Normalized eigenvectors of A=LL'.
/// Where L, supplied as self, is the lower triangular Cholesky decomposition
/// of covariance/comediance matrix for A.
/// Returns `choose` number of eigenvectors corresponding to the largest eigenvalues.
pub fn eigenvectors(&self, choose: usize) -> Result<Vec<Vec<f64>>, RE> {
if self.is_empty() {
return nodata_error("eigenvectors: empty L");
};
let n = self.dim();
let mut evectors = Vec::new();
if choose > n {
return data_error("eigenvectors: choose is more than the number of eigenvectors");
};
let mut eigenvectors = Vec::new();
let eigenvals = self.eigenvalues();
for (rownum,&eval) in eigenvals.iter().enumerate() {
let mut padded_row = self.row(rownum);
for _i in rownum+1..n { padded_row.push(0_f64); };
let mut eigenvec = self
.forward_substitute(&padded_row.smult(eval))?;
for (rownum, &eigenvalue) in eigenvals.iter().enumerate() {
let mut padded_row = self.row(rownum);
for _i in rownum+1..n {
padded_row.push(0_f64);
}
let mut eigenvec = self.forward_substitute(&padded_row.smult(eigenvalue))?;
eigenvec.munit(); // normalize the eigenvector
evectors.push(eigenvec);
};
eigenvectors.push(eigenvec);
}
// descending sort index of eigenvalues
let index = eigenvals
.isort_indexed(0..n, |a, b| b.total_cmp(a));
Ok((evectors,index))
}

/// PCA dimensional reduction using cholesky lower triangular matrix L (as self).
/// Projecting data using only `dimensions` number of eigenvectors,
/// corresponding to the largest eigenvalues.
pub fn pca_reduction(self, data: &[Vec<f64>], dimensions: usize) -> Result<Vec<Vec<f64>>,RE> {
if data.is_empty() { return nodata_error("pca_reduction: empty data") };
let n = data[0].len();
if dimensions > n { return data_error("pca_reduction: new dimensions exceed those of data") };
if self.dim() != n { return data_error("pca_reduction: L and data dimensions mismatch") };
let mut res = Vec::with_capacity(data.len());
let (evecs,mut index) = self.eigenvectors()?;
index.truncate(dimensions);
let pruned_evecs = index.unindex(&evecs, true);
for dvec in data {
res.push(
pruned_evecs
.iter().map(|ev| dvec.dotp(ev))
.collect::<Vec<f64>>())
};
Ok(res)
let mut index = eigenvals.isort_indexed(0..n, |a, b| b.total_cmp(a));
index.truncate(choose); // keep only `choose` best
let pruned = index.unindex(&eigenvectors, true);
Ok(pruned)
}

/// Translates subscripts to a 1d vector, i.e. natural numbers, to a pair of
Expand All @@ -149,18 +136,23 @@ impl TriangMat {
}
/// Project symmetric/antisymmetric triangmat to a smaller one of the same kind,
/// into a subspace specified by an ascending index of dimensions.
/// Deletes all rows and columns of the missing dimensions.
/// Deletes all rows and columns of the missing dimensions.
pub fn project(&self, index: &[usize]) -> Self {
let mut res = Vec::with_capacity(sumn(index.len()));
for &row_idx in index {
let row = self.row(row_idx);
for &column_idx in index {
if column_idx >= row.len() { break; };
if column_idx >= row.len() {
break;
};
res.push(row[column_idx]);
};
};
TriangMat { kind:self.kind, data: res }
}
}
}
TriangMat {
kind: self.kind,
data: res,
}
}

/// Extract one row from TriangMat
pub fn row(&self, r: usize) -> Vec<f64> {
Expand Down Expand Up @@ -289,7 +281,7 @@ impl TriangMat {
/// where `inv()` denotes matrix inverse, which is not explicitly computed.
/// Let `x = inv(L)d` ( and therefore also `x' = d'inv(L')` ).
/// Substituting x into the above definition: `m(d) = sqrt(x'x) = |x|.
/// We obtain x by setting Lx = d and solving by forward substitution.
/// We obtain x by setting Lx = d and solving by forward substitution.
/// All the calculations are done in the compact triangular form.
pub fn mahalanobis<U>(&self, d: &[U]) -> Result<f64, RE>
where
Expand Down
25 changes: 25 additions & 0 deletions src/vecvec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -604,4 +604,29 @@ where
Ok(TriangMat{ kind:2,data:covsums }) // kind 2 = symmetric, non transposed
}

/// Projects self onto a given basis, e.g. PCA dimensional reduction
/// The returned vectors will have lengths equal to the number of supplied basis vectors.
fn projection(self, basis: &[Vec<f64>]) -> Result<Vec<Vec<f64>>, RE>
{
if self.is_empty() {
return nodata_error("projection: empty data");
};
let olddims = self[0].len();
if basis.len() > olddims {
return data_error("projection: given too many basis vectors");
};
if olddims != basis[0].len() {
return data_error("projection: lengths of data vectors and basis vectors differ!");
};
let mut res = Vec::with_capacity(self.len());
for dvec in self {
res.push(
basis
.iter()
.map(|ev| dvec.dotp(ev))
.collect::<Vec<f64>>(),
)
};
Ok(res)
}
}
3 changes: 2 additions & 1 deletion src/vecvecg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
wsum += wf;
Ok(f(s)?.smult(wf))}).collect::<Result<Vec<Vec<f64>>,RE>>()?;
Ok((resvecvec,wsum))
}
}

/// Individually time weighted time series derivative of vectors.
/// Weighted arithmetic mean, minus the centre (geometric median).
Expand Down Expand Up @@ -102,6 +102,7 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
}

/// Rows of matrix self multiplying (column) vector v
/// Projects vector v onto the new basis given by self
fn leftmultv(self,v: &[U]) -> Result<Vec<f64>,RE> {
if self[0].len() != v.len() {
return data_error("leftmultv dimensions mismatch"); };
Expand Down
9 changes: 4 additions & 5 deletions tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,11 +268,10 @@ fn triangmat() -> Result<(), RE> {
mahamag,
mahamag / dmag
);
let (evecs,index) = chol.eigenvectors()?;
println!("Eigenvectors:\n{}Their sort index by eigenvalues:\n{}",
evecs.gr(),index.gr());
println!("Original data PCA reduced to 3 dimensions:\n{}",
chol.pca_reduction(&pts,3)?.gr());
let evecs = chol.eigenvectors(3)?;
println!("Best three unit eigenvectors:\n{}",evecs.gr());
let newdat = pts.projection(&evecs)?;
println!("Original data projected (PCA reduced) onto these eigenvectors:\n{}",newdat.gr());
Ok(())
}

Expand Down

0 comments on commit e94163d

Please sign in to comment.