Skip to content

Commit

Permalink
Fix RT prediction issues caused by 0's in covariance matrix
Browse files Browse the repository at this point in the history
  • Loading branch information
lazear committed Mar 9, 2023
1 parent 0d64a47 commit 0070339
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 27 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion crates/sage-cli/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "sage-cli"
version = "0.9.3"
version = "0.9.4"
authors = ["Michael Lazear <[email protected]"]
edition = "2021"
rust-version = "1.62"
Expand Down
2 changes: 1 addition & 1 deletion crates/sage/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "sage-core"
version = "0.9.3"
version = "0.9.4"
authors = ["Michael Lazear <[email protected]"]
edition = "2021"
rust-version = "1.62"
Expand Down
19 changes: 16 additions & 3 deletions crates/sage/src/ml/gauss.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,33 @@ impl Matrix {
impl Gauss {
pub fn solve(left: Matrix, right: Matrix) -> Option<Matrix> {
let mut g = Gauss { left, right };
g.fill_zero();
g.echelon();
g.reduce();
g.backfill();

// let eye = Matrix::identity(g.left.rows);

// If `left` is the identity matrix, then `right` contains
// the solution to the system of equations
// match g.left.is_close(&eye, 0.00001) {
match g.left_solved() {
true => Some(g.right),
false => None,
}
}
/// This SO answer details how to handle covariance matrices with zeros on
/// diagonals, which can ruin solving
/// https://stackoverflow.com/a/35958102
/// "thus instead of using Sigma = Cov(X) you do Sigma = Cov(X) + eps * I,
/// where eps is prefedefined small constant, and I is identity matrix.
/// Consequently you never have a zero values on the diagonal,
/// and it is easy to prove that for reasonable epsilon, this will be inversible"
fn fill_zero(&mut self) {
for i in 0..self.left.cols {
if self.left[(i, i)] == 0.0 {
// I'm no mathematician, so hopefully this is a reasonable epsilon :)
self.left[(i, i)] = 1E-8;
}
}
}

// Is `left` an identity matrix, or else contains rows of all zeros?
fn left_solved(&self) -> bool {
Expand Down
33 changes: 13 additions & 20 deletions crates/sage/src/ml/linear_discriminant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,16 @@ const FEATURE_NAMES: [&str; FEATURES] = [
"ln1p(delta_rt)",
];

struct Features<'a>(&'a [f64]);

impl<'a> std::fmt::Debug for Features<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_map()
.entries(FEATURE_NAMES.iter().zip(self.0))
.finish()
}
}

pub struct LinearDiscriminantAnalysis {
eigenvector: Vec<f64>,
}
Expand All @@ -60,24 +70,7 @@ impl LinearDiscriminantAnalysis {
.collect::<Vec<_>>();

let mut class_data = Matrix::new(class_data, count, features.cols);
let mut class_mean = class_data.mean();

// Any zeroes in the covariance matrix will cause LDA to fail:
// Attempt to rectify by making the matrices contain 1 instead
let ln2 = (2.0f64).ln();
for (idx, col) in class_mean.iter_mut().enumerate() {
if *col == 0.0 {
log::trace!(
"- attempting to apply correction to LDA feature `{}`, where class mean = 0.0", FEATURE_NAMES[idx]
);
*col = -1.0;
} else if (*col - ln2).abs() <= 1E-8 {
log::trace!(
"- attempting to apply correction to LDA feature `{}`, where class mean = ln(2)", FEATURE_NAMES[idx]
);
*col = -1.0;
}
}
let class_mean = class_data.mean();

for row in 0..class_data.rows {
for col in 0..class_data.cols {
Expand Down Expand Up @@ -114,7 +107,7 @@ impl LinearDiscriminantAnalysis {
evec.iter_mut().for_each(|c| *c *= -1.0);
}

log::trace!("- linear model fit with eigenvector: {:?}", evec);
log::trace!("- linear model fit with {:?}", Features(&evec));

Some(LinearDiscriminantAnalysis { eigenvector: evec })
}
Expand Down Expand Up @@ -146,7 +139,7 @@ pub fn score_psms(scores: &mut [Feature]) -> Option<()> {
perc.average_ppm as f64,
poisson,
(perc.matched_intensity_pct as f64).ln_1p(),
(perc.matched_peaks as f64).ln_1p(),
(perc.matched_peaks as f64),
(perc.longest_b as f64).ln_1p(),
(perc.longest_y as f64).ln_1p(),
(perc.longest_y as f64 / perc.peptide_len as f64),
Expand Down
8 changes: 8 additions & 0 deletions crates/sage/src/ml/matrix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,14 @@ impl Matrix {
matrix
}

pub fn diagonal(size: usize, value: f64) -> Matrix {
let mut matrix = Matrix::zeros(size, size);
for i in 0..size {
matrix[(i, i)] = value
}
matrix
}

/// Consume `self`, returning the underlying data in row-major order
pub fn take(self) -> Vec<f64> {
self.data
Expand Down

0 comments on commit 0070339

Please sign in to comment.