From e94163d43d0a41b98b85843c6168bfa3f761252b Mon Sep 17 00:00:00 2001
From: L <457124+liborty@users.noreply.github.com>
Date: Tue, 7 May 2024 23:50:10 +1000
Subject: [PATCH] 2.1.5

---
 Cargo.toml       |  2 +-
 README.md        |  4 ++-
 src/lib.rs       |  6 ++--
 src/triangmat.rs | 92 ++++++++++++++++++++++--------------------------
 src/vecvec.rs    | 25 +++++++++++++
 src/vecvecg.rs   |  3 +-
 tests/tests.rs   |  9 +++--
 7 files changed, 81 insertions(+), 60 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index 0e08bac..89d604c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "rstats"
-version = "2.1.4"
+version = "2.1.5"
 authors = ["Libor Spacek"]
 edition = "2021"
 description = "Statistics, Information Measures, Data Analysis, Linear Algebra, Clifford Algebra, Machine Learning, Geometric Median, Matrix Decompositions, PCA, Mahalanobis Distance, Hulls, Multithreading.."
diff --git a/README.md b/README.md
index a93f24d..fc6c6af 100644
--- a/README.md
+++ b/README.md
@@ -343,9 +343,11 @@ Methods which take an additional generic vector argument, such as a vector of we
 
 ## Appendix: Recent Releases
 
+* **Version 2.1.5** - Added `projection` to trait `VecVecg` to project all self vectors to a new basis. This can be used e.g. for Principal Components Analysis data reduction, using some chosen eigenvectors as the new basis.
+
 * **Version 2.1.4** - Tidied up some error processing.
 
-* **Version 2.1.3** - Added `pca_reduction` to `struct TriangMat`. Changed `eigenvectors` to compute normalized eigenvectors of the original data rather than of its covariance matrix. That is now done by better named `normalize` (should you still need it). `Eigenvectors` solves forward substitution to find each vector.
+* **Version 2.1.3** - Changed `eigenvectors` to compute normalized eigenvectors of the original data rather than of its covariance matrix. That is now done by better named `normalize` (should you still need it). `Eigenvectors` solves forward substitution to find each vector.
 
 * **Version 2.1.2** - Added function `project` to project a `TriangMat` to a lower dimensional space of selected dimensions. Removed `rows` which was a duplicate of `dim`.
 
diff --git a/src/lib.rs b/src/lib.rs
index cec96d0..7125510 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -351,6 +351,8 @@ pub trait VecVec<T> {
     fn covar(self, mid: &[f64]) -> Result<TriangMat, RE>;
     /// Flattened lower triangular part of a covariance matrix of a Vec of f64 vectors.
     fn serial_covar(self, mid: &[f64]) -> Result<TriangMat, RE>;
+    /// Projects self onto a given basis, e.g. PCA dimensional reduction. 
+    fn projection(self, basis: &[Vec<f64>]) -> Result<Vec<Vec<f64>>, RE>;
 }
 
 /// Methods applicable to slice of vectors of generic end type, plus one other argument
@@ -367,7 +369,7 @@ pub trait VecVecg<T, U> {
         self,
         v: &[U],
         f: impl Fn(&[T]) -> Result<Vec<f64>, RE>,
-    ) -> Result<(Vec<Vec<f64>>, f64), RE>;
+    ) -> Result<(Vec<Vec<f64>>, f64), RE>; 
     /// Individually weighted time series derivative of vectors
     fn wdvdt(self, ws: &[U], centre: &[f64]) -> Result<Vec<f64>, RE>;
     /// 1.0-dotproducts with **v**, in range [0,2]
@@ -380,7 +382,7 @@ pub trait VecVecg<T, U> {
     fn wradii(self, ws: &[U], gm: &[f64]) -> Result<(Vec<f64>, f64), RE>;
     /// wmadgm median of weighted radii from (weighted) gm: stable nd data spread estimator
     fn wmadgm(self, ws: &[U], wgm: &[f64]) -> Result<f64, RE>;
-    /// Leftmultiply (column) vector v by (rows of) matrix self
+    /// Leftmultiply (column) vector v by (rows of) matrix self: projects v to new basis self
     fn leftmultv(self, v: &[U]) -> Result<Vec<f64>, RE>;
     /// Rightmultiply (row) vector v by (columns of) matrix self
     fn rightmultv(self, v: &[U]) -> Result<Vec<f64>, RE>;
diff --git a/src/triangmat.rs b/src/triangmat.rs
index 8419e4a..48a55f5 100644
--- a/src/triangmat.rs
+++ b/src/triangmat.rs
@@ -1,5 +1,5 @@
 use crate::*; // MStats, MinMax, MutVecg, Stats, VecVec };
-pub use indxvec::{printing::*, Printing, Indices, Vecops};
+pub use indxvec::{printing::*, Indices, Printing, Vecops};
 
 /// Meanings of 'kind' field. Note that 'Upper Symmetric' would represent the same full matrix as
 /// 'Lower Symmetric', so it is not used (lower symmetric matrix is never transposed)
@@ -43,7 +43,7 @@ impl TriangMat {
     /// Squared euclidian vector magnitude (norm) of the data vector
     pub fn magsq(&self) -> f64 {
         self.data.vmagsq()
-    }    
+    }
     /// Sum of the elements:  
     /// when applied to the wedge product **a∧b**, returns det(**a,b**)
     pub fn sum(&self) -> f64 {
@@ -90,53 +90,40 @@ impl TriangMat {
     /// Normalized full rows from a triangular matrix.
     /// When the matrix is symmetric, e.g. a covariance matrix, the result are its normalized eigenvectors
     pub fn normalize(&self) -> Vec<Vec<f64>> {
-        let mut fullcov = self.to_full(); 
-        fullcov.iter_mut().for_each(|eigenvector| eigenvector.munit());
+        let mut fullcov = self.to_full();
+        fullcov
+            .iter_mut()
+            .for_each(|eigenvector| eigenvector.munit());
         fullcov
     }
-
-    /// Normalized eigenvectors of A, given L.
-    /// Where L is the lower triangular Cholesky decomposition of covariance/comediance matrix for A.
-    /// Index gives the descending order of eigenvalues.
-    /// Can be used to order eigenvectors by their relative significance (covariance in their direction).
-    pub fn eigenvectors(&self) -> Result<(Vec<Vec<f64>>,Vec<usize>),RE> {
-        if self.is_empty() { return nodata_error("eigenvectors applied to empty L") };
+    /// Normalized eigenvectors of A=LL'.
+    /// Where L, supplied as self, is the lower triangular Cholesky decomposition 
+    /// of covariance/comediance matrix for A.
+    /// Returns `choose` number of eigenvectors corresponding to the largest eigenvalues.
+    pub fn eigenvectors(&self, choose: usize) -> Result<Vec<Vec<f64>>, RE> {
+        if self.is_empty() {
+            return nodata_error("eigenvectors: empty L");
+        };
         let n = self.dim();
-        let mut evectors = Vec::new();
+        if choose > n {
+            return data_error("eigenvectors: choose is more than the number of eigenvectors");
+        };
+        let mut eigenvectors = Vec::new();
         let eigenvals = self.eigenvalues();
-        for (rownum,&eval) in eigenvals.iter().enumerate() { 
-            let mut padded_row = self.row(rownum);  
-            for _i in rownum+1..n { padded_row.push(0_f64); };
-            let mut eigenvec = self
-                .forward_substitute(&padded_row.smult(eval))?;
+        for (rownum, &eigenvalue) in eigenvals.iter().enumerate() {
+            let mut padded_row = self.row(rownum);
+            for _i in rownum+1..n {
+                padded_row.push(0_f64);
+            }
+            let mut eigenvec = self.forward_substitute(&padded_row.smult(eigenvalue))?;
             eigenvec.munit(); // normalize the eigenvector
-            evectors.push(eigenvec);
-        };
+            eigenvectors.push(eigenvec);
+        }
         // descending sort index of eigenvalues
-        let index = eigenvals
-            .isort_indexed(0..n, |a, b| b.total_cmp(a));
-        Ok((evectors,index))
-    }
-
-    /// PCA dimensional reduction using cholesky lower triangular matrix L (as self).
-    /// Projecting data using only `dimensions` number of eigenvectors,
-    /// corresponding to the largest eigenvalues.
-    pub fn pca_reduction(self, data: &[Vec<f64>], dimensions: usize) -> Result<Vec<Vec<f64>>,RE> {
-        if data.is_empty() { return nodata_error("pca_reduction: empty data") };
-        let n = data[0].len();        
-        if dimensions > n { return data_error("pca_reduction: new dimensions exceed those of data") };
-        if self.dim() != n { return data_error("pca_reduction: L and data dimensions mismatch") };
-        let mut res = Vec::with_capacity(data.len());
-        let (evecs,mut index) = self.eigenvectors()?; 
-        index.truncate(dimensions);
-        let pruned_evecs = index.unindex(&evecs, true); 
-        for dvec in data {
-            res.push( 
-                pruned_evecs
-                .iter().map(|ev| dvec.dotp(ev))
-            .collect::<Vec<f64>>())
-            };
-        Ok(res)   
+        let mut index = eigenvals.isort_indexed(0..n, |a, b| b.total_cmp(a));
+        index.truncate(choose); // keep only `choose` best
+        let pruned = index.unindex(&eigenvectors, true);
+        Ok(pruned)
     }
 
     /// Translates subscripts to a 1d vector, i.e. natural numbers, to a pair of
@@ -149,18 +136,23 @@ impl TriangMat {
     }
     /// Project symmetric/antisymmetric triangmat to a smaller one of the same kind,
     /// into a subspace specified by an ascending index of dimensions.
-    /// Deletes all rows and columns of the missing dimensions. 
+    /// Deletes all rows and columns of the missing dimensions.
     pub fn project(&self, index: &[usize]) -> Self {
         let mut res = Vec::with_capacity(sumn(index.len()));
         for &row_idx in index {
             let row = self.row(row_idx);
             for &column_idx in index {
-                if column_idx >= row.len() { break; };
+                if column_idx >= row.len() {
+                    break;
+                };
                 res.push(row[column_idx]);
-            };
-        };
-        TriangMat { kind:self.kind, data: res }   
-    }    
+            }
+        }
+        TriangMat {
+            kind: self.kind,
+            data: res,
+        }
+    }
 
     /// Extract one row from TriangMat
     pub fn row(&self, r: usize) -> Vec<f64> {
@@ -289,7 +281,7 @@ impl TriangMat {
     /// where `inv()` denotes matrix inverse, which is not explicitly computed.  
     /// Let  `x = inv(L)d` ( and therefore also  `x' = d'inv(L')` ).
     /// Substituting x into the above definition: `m(d) = sqrt(x'x) = |x|.  
-    /// We obtain x by setting Lx = d and solving by forward substitution. 
+    /// We obtain x by setting Lx = d and solving by forward substitution.
     /// All the calculations are done in the compact triangular form.
     pub fn mahalanobis<U>(&self, d: &[U]) -> Result<f64, RE>
     where
diff --git a/src/vecvec.rs b/src/vecvec.rs
index 0e0084c..8bda636 100644
--- a/src/vecvec.rs
+++ b/src/vecvec.rs
@@ -604,4 +604,29 @@ where
         Ok(TriangMat{ kind:2,data:covsums }) // kind 2 = symmetric, non transposed
     }
 
+    /// Projects self onto a given basis, e.g. PCA dimensional reduction  
+    /// The returned vectors will have lengths equal to the number of supplied basis vectors.
+    fn projection(self, basis: &[Vec<f64>]) -> Result<Vec<Vec<f64>>, RE>
+    {
+        if self.is_empty() {
+            return nodata_error("projection: empty data");
+        };
+        let olddims = self[0].len();
+        if basis.len() > olddims { 
+            return data_error("projection: given too many basis vectors");
+        };
+        if olddims != basis[0].len() {
+            return data_error("projection: lengths of data vectors and basis vectors differ!");
+        };
+        let mut res = Vec::with_capacity(self.len()); 
+        for dvec in self {
+            res.push(
+                basis
+                    .iter()
+                    .map(|ev| dvec.dotp(ev))
+                    .collect::<Vec<f64>>(),
+            )
+        };
+        Ok(res)
+    }
 }
diff --git a/src/vecvecg.rs b/src/vecvecg.rs
index 6153ddd..9ab9f01 100644
--- a/src/vecvecg.rs
+++ b/src/vecvecg.rs
@@ -33,7 +33,7 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
             wsum += wf;            
             Ok(f(s)?.smult(wf))}).collect::<Result<Vec<Vec<f64>>,RE>>()?;
         Ok((resvecvec,wsum))
-    } 
+    }
 
     /// Individually time weighted time series derivative of vectors.
     /// Weighted arithmetic mean, minus the centre (geometric median). 
@@ -102,6 +102,7 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
     }
 
     /// Rows of matrix self multiplying (column) vector v
+    /// Projects vector v onto the new basis given by self
     fn leftmultv(self,v: &[U]) -> Result<Vec<f64>,RE> {
         if self[0].len() != v.len() { 
             return data_error("leftmultv dimensions mismatch"); };
diff --git a/tests/tests.rs b/tests/tests.rs
index a34c907..c242b19 100644
--- a/tests/tests.rs
+++ b/tests/tests.rs
@@ -268,11 +268,10 @@ fn triangmat() -> Result<(), RE> {
         mahamag,
         mahamag / dmag
     );
-    let (evecs,index) = chol.eigenvectors()?;
-    println!("Eigenvectors:\n{}Their sort index by eigenvalues:\n{}",
-        evecs.gr(),index.gr());
-    println!("Original data PCA reduced to 3 dimensions:\n{}",
-        chol.pca_reduction(&pts,3)?.gr());
+    let evecs = chol.eigenvectors(3)?;
+    println!("Best three unit eigenvectors:\n{}",evecs.gr());
+    let newdat = pts.projection(&evecs)?; 
+    println!("Original data projected (PCA reduced) onto these eigenvectors:\n{}",newdat.gr());
     Ok(())
 }