From e94163d43d0a41b98b85843c6168bfa3f761252b Mon Sep 17 00:00:00 2001 From: L <457124+liborty@users.noreply.github.com> Date: Tue, 7 May 2024 23:50:10 +1000 Subject: [PATCH] 2.1.5 --- Cargo.toml | 2 +- README.md | 4 ++- src/lib.rs | 6 ++-- src/triangmat.rs | 92 ++++++++++++++++++++++-------------------------- src/vecvec.rs | 25 +++++++++++++ src/vecvecg.rs | 3 +- tests/tests.rs | 9 +++-- 7 files changed, 81 insertions(+), 60 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0e08bac..89d604c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "rstats" -version = "2.1.4" +version = "2.1.5" authors = ["Libor Spacek"] edition = "2021" description = "Statistics, Information Measures, Data Analysis, Linear Algebra, Clifford Algebra, Machine Learning, Geometric Median, Matrix Decompositions, PCA, Mahalanobis Distance, Hulls, Multithreading.." diff --git a/README.md b/README.md index a93f24d..fc6c6af 100644 --- a/README.md +++ b/README.md @@ -343,9 +343,11 @@ Methods which take an additional generic vector argument, such as a vector of we ## Appendix: Recent Releases +* **Version 2.1.5** - Added `projection` to trait `VecVecg` to project all self vectors to a new basis. This can be used e.g. for Principal Components Analysis data reduction, using some chosen eigenvectors as the new basis. + * **Version 2.1.4** - Tidied up some error processing. -* **Version 2.1.3** - Added `pca_reduction` to `struct TriangMat`. Changed `eigenvectors` to compute normalized eigenvectors of the original data rather than of its covariance matrix. That is now done by better named `normalize` (should you still need it). `Eigenvectors` solves forward substitution to find each vector. +* **Version 2.1.3** - Changed `eigenvectors` to compute normalized eigenvectors of the original data rather than of its covariance matrix. That is now done by better named `normalize` (should you still need it). `Eigenvectors` solves forward substitution to find each vector. * **Version 2.1.2** - Added function `project` to project a `TriangMat` to a lower dimensional space of selected dimensions. Removed `rows` which was a duplicate of `dim`. diff --git a/src/lib.rs b/src/lib.rs index cec96d0..7125510 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -351,6 +351,8 @@ pub trait VecVec { fn covar(self, mid: &[f64]) -> Result; /// Flattened lower triangular part of a covariance matrix of a Vec of f64 vectors. fn serial_covar(self, mid: &[f64]) -> Result; + /// Projects self onto a given basis, e.g. PCA dimensional reduction. + fn projection(self, basis: &[Vec]) -> Result>, RE>; } /// Methods applicable to slice of vectors of generic end type, plus one other argument @@ -367,7 +369,7 @@ pub trait VecVecg { self, v: &[U], f: impl Fn(&[T]) -> Result, RE>, - ) -> Result<(Vec>, f64), RE>; + ) -> Result<(Vec>, f64), RE>; /// Individually weighted time series derivative of vectors fn wdvdt(self, ws: &[U], centre: &[f64]) -> Result, RE>; /// 1.0-dotproducts with **v**, in range [0,2] @@ -380,7 +382,7 @@ pub trait VecVecg { fn wradii(self, ws: &[U], gm: &[f64]) -> Result<(Vec, f64), RE>; /// wmadgm median of weighted radii from (weighted) gm: stable nd data spread estimator fn wmadgm(self, ws: &[U], wgm: &[f64]) -> Result; - /// Leftmultiply (column) vector v by (rows of) matrix self + /// Leftmultiply (column) vector v by (rows of) matrix self: projects v to new basis self fn leftmultv(self, v: &[U]) -> Result, RE>; /// Rightmultiply (row) vector v by (columns of) matrix self fn rightmultv(self, v: &[U]) -> Result, RE>; diff --git a/src/triangmat.rs b/src/triangmat.rs index 8419e4a..48a55f5 100644 --- a/src/triangmat.rs +++ b/src/triangmat.rs @@ -1,5 +1,5 @@ use crate::*; // MStats, MinMax, MutVecg, Stats, VecVec }; -pub use indxvec::{printing::*, Printing, Indices, Vecops}; +pub use indxvec::{printing::*, Indices, Printing, Vecops}; /// Meanings of 'kind' field. Note that 'Upper Symmetric' would represent the same full matrix as /// 'Lower Symmetric', so it is not used (lower symmetric matrix is never transposed) @@ -43,7 +43,7 @@ impl TriangMat { /// Squared euclidian vector magnitude (norm) of the data vector pub fn magsq(&self) -> f64 { self.data.vmagsq() - } + } /// Sum of the elements: /// when applied to the wedge product **a∧b**, returns det(**a,b**) pub fn sum(&self) -> f64 { @@ -90,53 +90,40 @@ impl TriangMat { /// Normalized full rows from a triangular matrix. /// When the matrix is symmetric, e.g. a covariance matrix, the result are its normalized eigenvectors pub fn normalize(&self) -> Vec> { - let mut fullcov = self.to_full(); - fullcov.iter_mut().for_each(|eigenvector| eigenvector.munit()); + let mut fullcov = self.to_full(); + fullcov + .iter_mut() + .for_each(|eigenvector| eigenvector.munit()); fullcov } - - /// Normalized eigenvectors of A, given L. - /// Where L is the lower triangular Cholesky decomposition of covariance/comediance matrix for A. - /// Index gives the descending order of eigenvalues. - /// Can be used to order eigenvectors by their relative significance (covariance in their direction). - pub fn eigenvectors(&self) -> Result<(Vec>,Vec),RE> { - if self.is_empty() { return nodata_error("eigenvectors applied to empty L") }; + /// Normalized eigenvectors of A=LL'. + /// Where L, supplied as self, is the lower triangular Cholesky decomposition + /// of covariance/comediance matrix for A. + /// Returns `choose` number of eigenvectors corresponding to the largest eigenvalues. + pub fn eigenvectors(&self, choose: usize) -> Result>, RE> { + if self.is_empty() { + return nodata_error("eigenvectors: empty L"); + }; let n = self.dim(); - let mut evectors = Vec::new(); + if choose > n { + return data_error("eigenvectors: choose is more than the number of eigenvectors"); + }; + let mut eigenvectors = Vec::new(); let eigenvals = self.eigenvalues(); - for (rownum,&eval) in eigenvals.iter().enumerate() { - let mut padded_row = self.row(rownum); - for _i in rownum+1..n { padded_row.push(0_f64); }; - let mut eigenvec = self - .forward_substitute(&padded_row.smult(eval))?; + for (rownum, &eigenvalue) in eigenvals.iter().enumerate() { + let mut padded_row = self.row(rownum); + for _i in rownum+1..n { + padded_row.push(0_f64); + } + let mut eigenvec = self.forward_substitute(&padded_row.smult(eigenvalue))?; eigenvec.munit(); // normalize the eigenvector - evectors.push(eigenvec); - }; + eigenvectors.push(eigenvec); + } // descending sort index of eigenvalues - let index = eigenvals - .isort_indexed(0..n, |a, b| b.total_cmp(a)); - Ok((evectors,index)) - } - - /// PCA dimensional reduction using cholesky lower triangular matrix L (as self). - /// Projecting data using only `dimensions` number of eigenvectors, - /// corresponding to the largest eigenvalues. - pub fn pca_reduction(self, data: &[Vec], dimensions: usize) -> Result>,RE> { - if data.is_empty() { return nodata_error("pca_reduction: empty data") }; - let n = data[0].len(); - if dimensions > n { return data_error("pca_reduction: new dimensions exceed those of data") }; - if self.dim() != n { return data_error("pca_reduction: L and data dimensions mismatch") }; - let mut res = Vec::with_capacity(data.len()); - let (evecs,mut index) = self.eigenvectors()?; - index.truncate(dimensions); - let pruned_evecs = index.unindex(&evecs, true); - for dvec in data { - res.push( - pruned_evecs - .iter().map(|ev| dvec.dotp(ev)) - .collect::>()) - }; - Ok(res) + let mut index = eigenvals.isort_indexed(0..n, |a, b| b.total_cmp(a)); + index.truncate(choose); // keep only `choose` best + let pruned = index.unindex(&eigenvectors, true); + Ok(pruned) } /// Translates subscripts to a 1d vector, i.e. natural numbers, to a pair of @@ -149,18 +136,23 @@ impl TriangMat { } /// Project symmetric/antisymmetric triangmat to a smaller one of the same kind, /// into a subspace specified by an ascending index of dimensions. - /// Deletes all rows and columns of the missing dimensions. + /// Deletes all rows and columns of the missing dimensions. pub fn project(&self, index: &[usize]) -> Self { let mut res = Vec::with_capacity(sumn(index.len())); for &row_idx in index { let row = self.row(row_idx); for &column_idx in index { - if column_idx >= row.len() { break; }; + if column_idx >= row.len() { + break; + }; res.push(row[column_idx]); - }; - }; - TriangMat { kind:self.kind, data: res } - } + } + } + TriangMat { + kind: self.kind, + data: res, + } + } /// Extract one row from TriangMat pub fn row(&self, r: usize) -> Vec { @@ -289,7 +281,7 @@ impl TriangMat { /// where `inv()` denotes matrix inverse, which is not explicitly computed. /// Let `x = inv(L)d` ( and therefore also `x' = d'inv(L')` ). /// Substituting x into the above definition: `m(d) = sqrt(x'x) = |x|. - /// We obtain x by setting Lx = d and solving by forward substitution. + /// We obtain x by setting Lx = d and solving by forward substitution. /// All the calculations are done in the compact triangular form. pub fn mahalanobis(&self, d: &[U]) -> Result where diff --git a/src/vecvec.rs b/src/vecvec.rs index 0e0084c..8bda636 100644 --- a/src/vecvec.rs +++ b/src/vecvec.rs @@ -604,4 +604,29 @@ where Ok(TriangMat{ kind:2,data:covsums }) // kind 2 = symmetric, non transposed } + /// Projects self onto a given basis, e.g. PCA dimensional reduction + /// The returned vectors will have lengths equal to the number of supplied basis vectors. + fn projection(self, basis: &[Vec]) -> Result>, RE> + { + if self.is_empty() { + return nodata_error("projection: empty data"); + }; + let olddims = self[0].len(); + if basis.len() > olddims { + return data_error("projection: given too many basis vectors"); + }; + if olddims != basis[0].len() { + return data_error("projection: lengths of data vectors and basis vectors differ!"); + }; + let mut res = Vec::with_capacity(self.len()); + for dvec in self { + res.push( + basis + .iter() + .map(|ev| dvec.dotp(ev)) + .collect::>(), + ) + }; + Ok(res) + } } diff --git a/src/vecvecg.rs b/src/vecvecg.rs index 6153ddd..9ab9f01 100644 --- a/src/vecvecg.rs +++ b/src/vecvecg.rs @@ -33,7 +33,7 @@ impl VecVecg for &[Vec] wsum += wf; Ok(f(s)?.smult(wf))}).collect::>,RE>>()?; Ok((resvecvec,wsum)) - } + } /// Individually time weighted time series derivative of vectors. /// Weighted arithmetic mean, minus the centre (geometric median). @@ -102,6 +102,7 @@ impl VecVecg for &[Vec] } /// Rows of matrix self multiplying (column) vector v + /// Projects vector v onto the new basis given by self fn leftmultv(self,v: &[U]) -> Result,RE> { if self[0].len() != v.len() { return data_error("leftmultv dimensions mismatch"); }; diff --git a/tests/tests.rs b/tests/tests.rs index a34c907..c242b19 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -268,11 +268,10 @@ fn triangmat() -> Result<(), RE> { mahamag, mahamag / dmag ); - let (evecs,index) = chol.eigenvectors()?; - println!("Eigenvectors:\n{}Their sort index by eigenvalues:\n{}", - evecs.gr(),index.gr()); - println!("Original data PCA reduced to 3 dimensions:\n{}", - chol.pca_reduction(&pts,3)?.gr()); + let evecs = chol.eigenvectors(3)?; + println!("Best three unit eigenvectors:\n{}",evecs.gr()); + let newdat = pts.projection(&evecs)?; + println!("Original data projected (PCA reduced) onto these eigenvectors:\n{}",newdat.gr()); Ok(()) }