Skip to content

Commit

Permalink
2.0.11
Browse files Browse the repository at this point in the history
  • Loading branch information
liborty committed Apr 24, 2024
1 parent 9824988 commit 0d5ef58
Show file tree
Hide file tree
Showing 7 changed files with 87 additions and 35 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "rstats"
version = "2.0.10"
version = "2.0.11"
authors = ["Libor Spacek"]
edition = "2021"
description = "Statistics, Information Measures, Data Analysis, Linear Algebra, Clifford Algebra, Machine Learning, Geometric Median, Matrix Decompositions, PCA, Mahalanobis Distance, Hulls, Multithreading.."
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,9 @@ Methods which take an additional generic vector argument, such as a vector of we

## Appendix: Recent Releases

* **Version 2.0.10** - Added to struct TriangMat `eigenvectors` (enabling PCA) and `variances` which compute variances of the data cloud along the original axes, by projecting on them and summing the eigenvalue weighted principal components.
* **Version 2.0.11** - removed not so useful `variances`. Tidied up error processing in `vecveg.rs`. Added to it `serial_covar` and `serial_wcovar` for when heavy loading of all the cores may not be wanted.

* **Version 2.0.10** - Added to struct TriangMat `eigenvectors` (enabling PCA).

* **Version 2.0.9** - Pruned some rarely used methods, simplified `gmparts` and `gmerror`, updated dependencies.

Expand Down
4 changes: 4 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,11 @@ pub trait VecVecg<T, U> {
/// Like `wgmedian` but returns also the sum of reciprocals.
fn wgmparts(self, ws: &[U], eps: f64) -> Result<(Vec<f64>, f64), RE>;
/// Flattened lower triangular part of a covariance matrix of a Vec of f64 vectors.
fn serial_covar(self, mid:&[U]) -> Result<TriangMat,RE>;
/// Flattened lower triangular part of a covariance matrix of a Vec of f64 vectors.
fn covar(self, med: &[U]) -> Result<TriangMat, RE>;
/// Flattened lower triangular part of a covariance matrix for weighted f64 vectors.
fn serial_wcovar(self, ws: &[U], m: &[U]) -> Result<TriangMat, RE>;
/// Flattened lower triangular part of a covariance matrix for weighted f64 vectors.
fn wcovar(self, ws: &[U], m: &[f64]) -> Result<TriangMat, RE>;
}
12 changes: 1 addition & 11 deletions src/triangmat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,17 +100,7 @@ impl TriangMat {
fullcov.iter_mut().for_each(|eigenvector| eigenvector.munit());
fullcov
}
/// Independent variances along the original axes (dimensions),
/// from triangular covariance matrix.
pub fn variances(&self) -> Result< Vec<f64>, RE > {
let eigenvalues = self.cholesky()?.eigenvalues();
let eigenvectors = self.eigenvectors();
let mut result = vec![0_f64;eigenvalues.len()];
eigenvectors.iter().zip(eigenvalues).for_each(
|(eigenvector, eigenvalue)| result.mutvadd(&eigenvector.smult(eigenvalue)));
Ok(result)
}


/// Translates subscripts to a 1d vector, i.e. natural numbers, to a pair of
/// (row,column) coordinates within a lower/upper triangular matrix.
/// Enables memory efficient representation of triangular matrices as one flat vector.
Expand Down
2 changes: 1 addition & 1 deletion src/vecvec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ where
}

/// Likelihood of zero median point **p** belonging to zero median data cloud `self`,
/// based on the cloud's shape outside of normal plane through **p**.
/// based on the points outside of normal plane through **p**.
/// Returns the sum of unit vectors of its outside points, projected onto unit **p**.
/// Index should be in the descending order of magnitudes of self points (for efficiency).
fn depth(self, descending_index: &[usize], p: &[f64]) -> Result<f64,RE> {
Expand Down
81 changes: 67 additions & 14 deletions src/vecvecg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
fn wdvdt(self, ws: &[U], centre: &[f64]) -> Result<Vec<f64>, RE> {
let len = self.len();
if len < 2 {
return re_error("NoDataError","time series too short: {len}");
return re_error("empty","time series too short: {len}");
};
let mut weightsum:f64 = ws[0].clone().into();
let mut sumv:Vec<f64> = self[0].smult(weightsum);
Expand Down Expand Up @@ -104,14 +104,14 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
/// Rows of matrix self multiplying (column) vector v
fn leftmultv(self,v: &[U]) -> Result<Vec<f64>,RE> {
if self[0].len() != v.len() {
return re_error("DataError","leftmultv dimensions mismatch")?; };
return re_error("size","leftmultv dimensions mismatch")?; };
Ok(self.iter().map(|s| s.dotp(v)).collect())
}

/// Row vector v multipying columns of matrix self
fn rightmultv(self,v: &[U]) -> Result<Vec<f64>,RE> {
if v.len() != self.len() {
return re_error("DataError","rightmultv dimensions mismatch")?; };
return re_error("size","rightmultv dimensions mismatch")?; };
Ok((0..self[0].len()).map(|colnum| v.columnp(colnum,self)).collect())
}

Expand All @@ -121,7 +121,7 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
/// Result dimensions are self.len() x m[0].len()
fn matmult(self,m: &[Vec<U>]) -> Result<Vec<Vec<f64>>,RE> {
if self[0].len() != m.len() {
return re_error("DataError","matmult dimensions mismatch")?; };
return re_error("size","matmult dimensions mismatch")?; };
Ok(self.par_iter().map(|srow|
(0..m[0].len()).map(|colnum| srow.columnp(colnum,m)).collect()
).collect::<Vec<Vec<f64>>>())
Expand Down Expand Up @@ -180,7 +180,7 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
/// both of which depend on the choice of axis.
fn translate(self, m:&[U]) -> Result<Vec<Vec<f64>>,RE> {
if self[0].len() != m.len() {
return re_error("DataError","translate dimensions mismatch")?; };
return re_error("size","translate dimensions mismatch")?; };
self.vector_fn(|s| Ok(s.vsub(m)))
}

Expand Down Expand Up @@ -225,7 +225,7 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
/// Factors out the entropy of m to save repetition of work
fn dependencies(self, m:&[U]) -> Result<Vec<f64>,RE> {
if self[0].len() != m.len() {
return re_error("DataError","dependencies: dimensions mismatch")?; };
return re_error("size","dependencies: dimensions mismatch")?; };
let entropym = m.entropy();
return self.par_iter().map(|s| -> Result<f64,RE> {
Ok((entropym + s.entropy())/
Expand All @@ -235,7 +235,7 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
/// Individual distances from any point v, typically not a member, to all the members of self.
fn dists(self, v:&[U]) -> Result<Vec<f64>,RE> {
if self[0].len() != v.len() {
return re_error("DataError","dists dimensions mismatch")?; }
return re_error("size","dists dimensions mismatch")?; }
self.scalar_fn(|p| Ok(p.vdist(v)))
}

Expand All @@ -246,16 +246,16 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
/// The radius (distance) from gm is far more efficient, once gm has been found.
fn distsum(self, v: &[U]) -> Result<f64,RE> {
if self[0].len() != v.len() {
return re_error("DataError","distsum dimensions mismatch")?; }
return re_error("size","distsum dimensions mismatch")?; }
Ok(self.iter().map(|p| p.vdist(v)).sum::<f64>())
}

/// Sorted weighted radii to all member points from the Geometric Median.
fn wsortedrads(self, ws: &[U], gm:&[f64]) -> Result<Vec<f64>,RE> {
if self.len() != ws.len() {
return Err(RError::DataError("wsortedrads self and ws lengths mismatch".to_owned())); };
return re_error("size","wsortedrads self and ws lengths mismatch")?; };
if self[0].len() != gm.len() {
return Err(RError::DataError("wsortedrads self and gm dimensions mismatch".to_owned())); };
return re_error("size","wsortedrads self and gm dimensions mismatch")?; };
let wf = ws.iter().map(|x| x.clone().into()).collect::<Vec<f64>>();
let wnorm = 1.0 / wf.iter().sum::<f64>();
let mut res = self.iter().map(|s| wnorm*s.vdist::<f64>(gm))
Expand All @@ -267,7 +267,7 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
/// Weighted Geometric Median (gm) is the point that minimises the sum of distances to a given set of points.
fn wgmedian(self, ws:&[U], eps: f64) -> Result<Vec<f64>,RE> {
if self.len() != ws.len() {
return Err(RError::DataError("wgmedian and ws lengths mismatch".to_owned())); };
return re_error("size","wgmedian and ws lengths mismatch")?; };
let mut g = self.wacentroid(ws); // start iterating from the weighted centre
let mut recsum = 0f64;
loop { // vector iteration till accuracy eps is exceeded
Expand Down Expand Up @@ -376,14 +376,67 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
}
}

/// Symmetric covariance matrix. Becomes comediance when supplied argument `mid`
/// is the geometric median instead of the centroid.
/// Indexing is always in this order: (row,column) (left to right, top to bottom).
fn serial_covar(self, mid:&[U]) -> Result<TriangMat,RE> {
let d = self[0].len(); // dimension of the vector(s)
if d != mid.len() {
return re_error("data","serial_covar self and mid dimensions mismatch")? };
let mut covsums = vec![0_f64; (d+1)*d/2];
for p in self {
let mut covsub = 0_usize; // subscript into the flattened array cov
let zp = p.vsub(mid); // zero mean/median vector
zp.iter().enumerate().for_each(|(i,thisc)|
// its products up to and including the diagonal
zp.iter().take(i+1).for_each(|otherc| {
covsums[covsub] += thisc*otherc;
covsub += 1;
}) )
};
// now compute the means and return
let lf = self.len() as f64;
for c in covsums.iter_mut() { *c /= lf };
Ok(TriangMat{ kind:2,data:covsums }) // kind 2 = symmetric, non transposed
}

/// Symmetric covariance matrix for weighted vectors.
/// Becomes comediance when supplied argument `mid`
/// is the geometric median instead of the centroid.
/// Indexing is always in this order: (row,column) (left to right, top to bottom).
fn serial_wcovar(self, ws:&[U], mid:&[U]) -> Result<TriangMat,RE> {
let d = self[0].len(); // dimension of the vector(s)
if d != mid.len() {
return re_error("data","serial_wcovar self and mid dimensions mismatch")? };
if self.len() != ws.len() {
return re_error("data","serial_wcovar self and ws lengths mismatch")? };
let mut covsums = vec![0_f64; (d+1)*d/2];
let mut wsum = 0_f64;
for (p,w) in self.iter().zip(ws) {
let mut covsub = 0_usize; // subscript into the flattened array cov
let zp = p.vsub(mid); // zero mean vector
let wf:f64 = w.clone().into();
wsum += wf;
zp.iter().enumerate().for_each(|(i,thisc)|
// its products up to and including the diagonal
zp.iter().take(i+1).for_each(|otherc| {
covsums[covsub] += wf*thisc*otherc;
covsub += 1;
}) )
};
// now compute the means and return
for c in covsums.iter_mut() { *c /= wsum };
Ok(TriangMat{ kind:2,data:covsums }) // kind 2 = symmetric, non transposed
}

/// Symmetric covariance matrix. Becomes comediance when argument `mid`
/// is the geometric median instead of the centroid.
/// The indexing is always in this order: (row,column) (left to right, top to bottom).
/// The items are flattened into a single vector in this order.
fn covar(self, mid:&[U]) -> Result<TriangMat,RE> {
let d = self[0].len(); // dimension of the vector(s)
if d != mid.len() {
return Err(RError::DataError("covar self and mid dimensions mismatch".to_owned())); };
return re_error("data","covar self and mid dimensions mismatch")? };
let mut covsum = self
.par_iter()
.fold(
Expand Down Expand Up @@ -423,9 +476,9 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
fn wcovar(self, ws:&[U], m:&[f64]) -> Result<TriangMat,RE> {
let n = self[0].len(); // dimension of the vector(s)
if n != m.len() {
return Err(RError::DataError("wcovar self and m dimensions mismatch".to_owned())); };
return re_error("data","wcovar self and m dimensions mismatch")? };
if self.len() != ws.len() {
return Err(RError::DataError("wcovar self and ws lengths mismatch".to_owned())); };
return re_error("data","wcovar self and ws lengths mismatch")? };
let (mut covsum,wsum) = self
.par_iter().zip(ws)
.fold(
Expand Down
17 changes: 10 additions & 7 deletions tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,13 @@ fn fstats() -> Result<(), RE> {
println!("{YL}Testing on a random set of {n} points in {d} d space:{UN}");
let pt = ranvv_f64(n,d)?;
println!(
"Classical Covariances:\n{}",
"Classical Covariances (multithreading implementation):\n{}",
pt.covar(&pt.acentroid())?.gr()
);
println!(
"Classical Covariances (serial implementation):\n{}",
pt.serial_covar(&pt.acentroid())?.gr()
);
println!(
"Comediances (covariances of zero median data):\n{}",
pt.covar(&pt.gmedian(EPS))?.gr()
Expand Down Expand Up @@ -252,8 +256,7 @@ fn triangmat() -> Result<(), RE> {
println!("Cholesky L matrix:\n{chol}");
println!("Eigenvalues by Cholesky decomposition:\n{}",
chol.eigenvalues().gr());
println!("Determinant (their product): {}",chol.determinant().gr());
println!("Variances of the original data by summing principal components:\n{}",cov.variances()?.gr());
println!("Determinant (their product): {}",chol.determinant().gr());
let d = ranv_f64(d)?;
let dmag = d.vmag();
let mahamag = chol.mahalanobis(&d)?;
Expand All @@ -278,13 +281,13 @@ fn mat() -> Result<(), RE> {
let m = ranvv_f64(n,d)?;
println!("\nTest matrix M:\n{}", m.gr());
let t = m.transpose();
println!("\nTransposed matrix T:\n{}", t.gr());
println!("\nTransposed matrix M':\n{}", t.gr());
let v = ranv_f64(d)?;
println!("\nVector V:\n{}", v.gr());
println!("\nMV:\n{}", m.leftmultv(&v)?.gr());
println!("\nVT:\n{}", t.rightmultv(&v)?.gr());
println!("\nMT:\n{}", t.matmult(&m)?.gr());
println!("\nTM:\n{}", m.matmult(&t)?.gr());
println!("\nVM':\n{}", t.rightmultv(&v)?.gr());
println!("\nMM':\n{}", t.matmult(&m)?.gr());
println!("\nM'M:\n{}", m.matmult(&t)?.gr());
Ok(())
}

Expand Down

0 comments on commit 0d5ef58

Please sign in to comment.