2.0.11

liborty · Apr 24, 2024 · 0d5ef58 · 0d5ef58
1 parent 9824988
commit 0d5ef58
Show file tree

Hide file tree

Showing 7 changed files with 87 additions and 35 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "rstats"
-version = "2.0.10"
+version = "2.0.11"
 authors = ["Libor Spacek"]
 edition = "2021"
 description = "Statistics, Information Measures, Data Analysis, Linear Algebra, Clifford Algebra, Machine Learning, Geometric Median, Matrix Decompositions, PCA, Mahalanobis Distance, Hulls, Multithreading.."

diff --git a/README.md b/README.md
@@ -326,7 +326,9 @@ Methods which take an additional generic vector argument, such as a vector of we
 
 ## Appendix: Recent Releases
 
-* **Version 2.0.10** - Added to struct TriangMat `eigenvectors` (enabling PCA) and `variances` which compute variances of the data cloud along the original axes, by projecting on them and summing the eigenvalue weighted principal components.
+* **Version 2.0.11** - removed not so useful `variances`. Tidied up error processing in `vecveg.rs`. Added to it `serial_covar` and `serial_wcovar` for when heavy loading of all the cores may not be wanted.
+
+* **Version 2.0.10** - Added to struct TriangMat `eigenvectors` (enabling PCA).
 
 * **Version 2.0.9** - Pruned some rarely used methods, simplified `gmparts` and `gmerror`, updated dependencies.
 

diff --git a/src/lib.rs b/src/lib.rs
@@ -407,7 +407,11 @@ pub trait VecVecg<T, U> {
     /// Like `wgmedian` but returns also the sum of reciprocals.
     fn wgmparts(self, ws: &[U], eps: f64) -> Result<(Vec<f64>, f64), RE>;
     /// Flattened lower triangular part of a covariance matrix of a Vec of f64 vectors.
+    fn serial_covar(self, mid:&[U]) -> Result<TriangMat,RE>;
+    /// Flattened lower triangular part of a covariance matrix of a Vec of f64 vectors.
     fn covar(self, med: &[U]) -> Result<TriangMat, RE>;
     /// Flattened lower triangular part of a covariance matrix for weighted f64 vectors.
+    fn serial_wcovar(self, ws: &[U], m: &[U]) -> Result<TriangMat, RE>;
+    /// Flattened lower triangular part of a covariance matrix for weighted f64 vectors.
     fn wcovar(self, ws: &[U], m: &[f64]) -> Result<TriangMat, RE>;
 }
diff --git a/src/triangmat.rs b/src/triangmat.rs
@@ -100,17 +100,7 @@ impl TriangMat {
         fullcov.iter_mut().for_each(|eigenvector| eigenvector.munit());
         fullcov
     }
-    /// Independent variances along the original axes (dimensions),  
-    /// from triangular covariance matrix. 
-    pub fn variances(&self) -> Result< Vec<f64>, RE > {
-        let eigenvalues = self.cholesky()?.eigenvalues();
-        let eigenvectors = self.eigenvectors();
-        let mut result = vec![0_f64;eigenvalues.len()];
-        eigenvectors.iter().zip(eigenvalues).for_each(
-            |(eigenvector, eigenvalue)| result.mutvadd(&eigenvector.smult(eigenvalue)));
-        Ok(result)
-    }
-
+
     /// Translates subscripts to a 1d vector, i.e. natural numbers, to a pair of
     /// (row,column) coordinates within a lower/upper triangular matrix.
     /// Enables memory efficient representation of triangular matrices as one flat vector.

diff --git a/src/vecvec.rs b/src/vecvec.rs
@@ -359,7 +359,7 @@ where
     }
 
     /// Likelihood of zero median point **p** belonging to zero median data cloud `self`,
-    /// based on the cloud's shape outside of normal plane through **p**. 
+    /// based on the points outside of normal plane through **p**. 
     /// Returns the sum of unit vectors of its outside points, projected onto unit **p**. 
     /// Index should be in the descending order of magnitudes of self points (for efficiency).
     fn depth(self, descending_index: &[usize], p: &[f64]) -> Result<f64,RE> {

diff --git a/src/vecvecg.rs b/src/vecvecg.rs
@@ -40,7 +40,7 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
     fn wdvdt(self, ws: &[U], centre: &[f64]) -> Result<Vec<f64>, RE> {
         let len = self.len();
         if len < 2 {
-            return re_error("NoDataError","time series too short: {len}");
+            return re_error("empty","time series too short: {len}");
         };
         let mut weightsum:f64 = ws[0].clone().into();
         let mut sumv:Vec<f64> = self[0].smult(weightsum);
@@ -104,14 +104,14 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
     /// Rows of matrix self multiplying (column) vector v
     fn leftmultv(self,v: &[U]) -> Result<Vec<f64>,RE> {
         if self[0].len() != v.len() { 
-            return re_error("DataError","leftmultv dimensions mismatch")?; };
+            return re_error("size","leftmultv dimensions mismatch")?; };
         Ok(self.iter().map(|s| s.dotp(v)).collect())
     }
 
     /// Row vector v multipying columns of matrix self
     fn rightmultv(self,v: &[U]) -> Result<Vec<f64>,RE> {
         if v.len() != self.len() { 
-            return re_error("DataError","rightmultv dimensions mismatch")?; }; 
+            return re_error("size","rightmultv dimensions mismatch")?; }; 
         Ok((0..self[0].len()).map(|colnum| v.columnp(colnum,self)).collect())
     }
 
@@ -121,7 +121,7 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
     /// Result dimensions are self.len() x m[0].len() 
     fn matmult(self,m: &[Vec<U>]) -> Result<Vec<Vec<f64>>,RE> {
         if self[0].len() != m.len() { 
-            return re_error("DataError","matmult dimensions mismatch")?; }; 
+            return re_error("size","matmult dimensions mismatch")?; }; 
         Ok(self.par_iter().map(|srow| 
             (0..m[0].len()).map(|colnum| srow.columnp(colnum,m)).collect()
             ).collect::<Vec<Vec<f64>>>()) 
@@ -180,7 +180,7 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
     /// both of which depend on the choice of axis.
      fn translate(self, m:&[U]) -> Result<Vec<Vec<f64>>,RE> { 
         if self[0].len() != m.len() { 
-            return re_error("DataError","translate dimensions mismatch")?; }; 
+            return re_error("size","translate dimensions mismatch")?; }; 
         self.vector_fn(|s| Ok(s.vsub(m)))   
     }
 
@@ -225,7 +225,7 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
     /// Factors out the entropy of m to save repetition of work
     fn dependencies(self, m:&[U]) -> Result<Vec<f64>,RE> {  
         if self[0].len() != m.len() { 
-            return re_error("DataError","dependencies: dimensions mismatch")?; };
+            return re_error("size","dependencies: dimensions mismatch")?; };
         let entropym = m.entropy();
         return self.par_iter().map(|s| -> Result<f64,RE> {  
             Ok((entropym + s.entropy())/
@@ -235,7 +235,7 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
     /// Individual distances from any point v, typically not a member, to all the members of self.    
     fn dists(self, v:&[U]) -> Result<Vec<f64>,RE> {
         if self[0].len() != v.len() { 
-            return re_error("DataError","dists dimensions mismatch")?; }
+            return re_error("size","dists dimensions mismatch")?; }
         self.scalar_fn(|p| Ok(p.vdist(v)))
     }
 
@@ -246,16 +246,16 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
     /// The radius (distance) from gm is far more efficient, once gm has been found.
     fn distsum(self, v: &[U]) -> Result<f64,RE> {
         if self[0].len() != v.len() { 
-            return re_error("DataError","distsum dimensions mismatch")?; }
+            return re_error("size","distsum dimensions mismatch")?; }
         Ok(self.iter().map(|p| p.vdist(v)).sum::<f64>())
     }
 
     /// Sorted weighted radii to all member points from the Geometric Median.
     fn wsortedrads(self, ws: &[U], gm:&[f64]) -> Result<Vec<f64>,RE> {
         if self.len() != ws.len() { 
-            return Err(RError::DataError("wsortedrads self and ws lengths mismatch".to_owned())); };
+            return re_error("size","wsortedrads self and ws lengths mismatch")?; };
         if self[0].len() != gm.len() { 
-            return Err(RError::DataError("wsortedrads self and gm dimensions mismatch".to_owned())); };
+            return re_error("size","wsortedrads self and gm dimensions mismatch")?; };
         let wf = ws.iter().map(|x| x.clone().into()).collect::<Vec<f64>>();
         let wnorm = 1.0 / wf.iter().sum::<f64>(); 
         let mut res = self.iter().map(|s| wnorm*s.vdist::<f64>(gm))
@@ -267,7 +267,7 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
     /// Weighted Geometric Median (gm) is the point that minimises the sum of distances to a given set of points.
     fn wgmedian(self, ws:&[U], eps: f64) -> Result<Vec<f64>,RE> { 
         if self.len() != ws.len() { 
-            return Err(RError::DataError("wgmedian and ws lengths mismatch".to_owned())); };
+            return re_error("size","wgmedian and ws lengths mismatch")?; };
         let mut g = self.wacentroid(ws); // start iterating from the weighted centre 
         let mut recsum = 0f64;
         loop { // vector iteration till accuracy eps is exceeded  
@@ -376,14 +376,67 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
         }
     }
 
+    /// Symmetric covariance matrix. Becomes comediance when supplied argument `mid`  
+    /// is the geometric median instead of the centroid.
+    /// Indexing is always in this order: (row,column) (left to right, top to bottom).
+    fn serial_covar(self, mid:&[U]) -> Result<TriangMat,RE> {
+        let d = self[0].len(); // dimension of the vector(s)
+        if d != mid.len() { 
+            return re_error("data","serial_covar self and mid dimensions mismatch")? }; 
+		let mut covsums = vec![0_f64; (d+1)*d/2];
+ 		for p in self { 
+            let mut covsub = 0_usize; // subscript into the flattened array cov
+            let zp = p.vsub(mid);     // zero mean/median vector
+            zp.iter().enumerate().for_each(|(i,thisc)| 
+                  // its products up to and including the diagonal 
+                  zp.iter().take(i+1).for_each(|otherc| { 
+                      covsums[covsub] += thisc*otherc;
+                      covsub += 1;
+                  }) )
+        };
+        // now compute the means and return
+        let lf = self.len() as f64;
+        for c in covsums.iter_mut() { *c /= lf }; 
+        Ok(TriangMat{ kind:2,data:covsums }) // kind 2 = symmetric, non transposed
+    }
+
+    /// Symmetric covariance matrix for weighted vectors.
+	 /// Becomes comediance when supplied argument `mid`  
+    /// is the geometric median instead of the centroid.
+    /// Indexing is always in this order: (row,column) (left to right, top to bottom).
+    fn serial_wcovar(self, ws:&[U], mid:&[U]) -> Result<TriangMat,RE> {
+        let d = self[0].len(); // dimension of the vector(s)
+        if d != mid.len() { 
+            return re_error("data","serial_wcovar self and mid dimensions mismatch")? };
+        if self.len() != ws.len() { 
+            return re_error("data","serial_wcovar self and ws lengths mismatch")? };  
+		let mut covsums = vec![0_f64; (d+1)*d/2];
+        let mut wsum = 0_f64; 
+ 		  for (p,w) in self.iter().zip(ws) { 
+            let mut covsub = 0_usize; // subscript into the flattened array cov
+            let zp = p.vsub(mid);     // zero mean vector
+            let wf:f64 = w.clone().into();
+            wsum += wf;
+            zp.iter().enumerate().for_each(|(i,thisc)| 
+                  // its products up to and including the diagonal 
+                  zp.iter().take(i+1).for_each(|otherc| { 
+                      covsums[covsub] += wf*thisc*otherc;
+                      covsub += 1;
+                  }) ) 
+        };
+        // now compute the means and return 
+        for c in covsums.iter_mut() { *c /= wsum }; 
+        Ok(TriangMat{ kind:2,data:covsums }) // kind 2 = symmetric, non transposed
+    }
+
     /// Symmetric covariance matrix. Becomes comediance when argument `mid`  
     /// is the geometric median instead of the centroid.
     /// The indexing is always in this order: (row,column) (left to right, top to bottom).
     /// The items are flattened into a single vector in this order.
     fn covar(self, mid:&[U]) -> Result<TriangMat,RE> {
         let d = self[0].len(); // dimension of the vector(s)
         if d != mid.len() { 
-            return Err(RError::DataError("covar self and mid dimensions mismatch".to_owned())); }; 
+            return re_error("data","covar self and mid dimensions mismatch")? }; 
         let mut covsum = self
             .par_iter()
             .fold(
@@ -423,9 +476,9 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
     fn wcovar(self, ws:&[U], m:&[f64]) -> Result<TriangMat,RE> {
         let n = self[0].len(); // dimension of the vector(s)
         if n != m.len() { 
-            return Err(RError::DataError("wcovar self and m dimensions mismatch".to_owned())); }; 
+            return re_error("data","wcovar self and m dimensions mismatch")? }; 
         if self.len() != ws.len() { 
-            return Err(RError::DataError("wcovar self and ws lengths mismatch".to_owned())); }; 
+            return re_error("data","wcovar self and ws lengths mismatch")? }; 
         let (mut covsum,wsum) = self
             .par_iter().zip(ws)
             .fold(

diff --git a/tests/tests.rs b/tests/tests.rs
@@ -104,9 +104,13 @@ fn fstats() -> Result<(), RE> {
     println!("{YL}Testing on a random set of {n} points in {d} d space:{UN}");
     let pt = ranvv_f64(n,d)?;
     println!(
-        "Classical Covariances:\n{}",
+        "Classical Covariances (multithreading implementation):\n{}",
         pt.covar(&pt.acentroid())?.gr()
     );
+    println!(
+        "Classical Covariances (serial implementation):\n{}",
+        pt.serial_covar(&pt.acentroid())?.gr()
+    );
     println!(
         "Comediances (covariances of zero median data):\n{}",
         pt.covar(&pt.gmedian(EPS))?.gr()
@@ -252,8 +256,7 @@ fn triangmat() -> Result<(), RE> {
     println!("Cholesky L matrix:\n{chol}");
     println!("Eigenvalues by Cholesky decomposition:\n{}",
         chol.eigenvalues().gr());
-    println!("Determinant (their product): {}",chol.determinant().gr());
-    println!("Variances of the original data by summing principal components:\n{}",cov.variances()?.gr());
+    println!("Determinant (their product): {}",chol.determinant().gr());  
     let d = ranv_f64(d)?;
     let dmag = d.vmag();
     let mahamag = chol.mahalanobis(&d)?;
@@ -278,13 +281,13 @@ fn mat() -> Result<(), RE> {
     let m = ranvv_f64(n,d)?;
     println!("\nTest matrix M:\n{}", m.gr());
     let t = m.transpose();
-    println!("\nTransposed matrix T:\n{}", t.gr());
+    println!("\nTransposed matrix M':\n{}", t.gr());
     let v = ranv_f64(d)?;
     println!("\nVector V:\n{}", v.gr());
     println!("\nMV:\n{}", m.leftmultv(&v)?.gr());
-    println!("\nVT:\n{}", t.rightmultv(&v)?.gr());
-    println!("\nMT:\n{}", t.matmult(&m)?.gr());
-    println!("\nTM:\n{}", m.matmult(&t)?.gr());
+    println!("\nVM':\n{}", t.rightmultv(&v)?.gr());
+    println!("\nMM':\n{}", t.matmult(&m)?.gr());
+    println!("\nM'M:\n{}", m.matmult(&t)?.gr());
     Ok(())
 }