From 81d0f1e5c2e0afde5ed8b7e4b441f6d85c9ae888 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Wed, 19 Jan 2022 13:55:58 +0100 Subject: [PATCH 01/28] added block coordinate descent function --- algorithms/linfa-elasticnet/src/algorithm.rs | 71 +++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index 6eb540426..bbb1f8db9 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -1,5 +1,7 @@ use approx::{abs_diff_eq, abs_diff_ne}; -use ndarray::{s, Array1, ArrayBase, ArrayView1, ArrayView2, Axis, CowArray, Data, Ix1, Ix2}; +use ndarray::{ + s, Array1, Array2, ArrayBase, ArrayView1, ArrayView2, Axis, CowArray, Data, Ix1, Ix2, +}; use ndarray_linalg::{Inverse, Lapack}; use linfa::traits::{Fit, PredictInplace}; @@ -187,6 +189,73 @@ fn coordinate_descent<'a, F: Float>( (w, gap, n_steps) } +fn block_coordinate_descent<'a, F: Float>( + x: ArrayView2<'a, F>, + y: ArrayView2<'a, F>, + tol: F, + max_steps: u32, + l1_ratio: F, + penalty: F, +) -> (Array2, F, u32) { + let n_samples = F::cast(x.shape()[0]); + let n_features = x.shape()[1]; + let n_tasks = y.shape()[1]; + // the parameters of the model + let mut w = Array2::::zeros((n_features, n_tasks)); + // the residuals: `Y - XW` (since W=0, this is just `Y` for now), + // the residuals are updated during the algorithm as the parameters change + let mut r = y.to_owned(); + let mut n_steps = 0u32; + let norm_cols_x = x.map_axis(Axis(0), |col| col.dot(&col)); + let mut gap = F::one() + tol; + let d_w_tol = tol; + let tol = tol * y.map(|yij| yij.powi(2)).sum(); + while n_steps < max_steps { + let mut w_max = F::zero(); + let mut d_w_max = F::zero(); + for j in 0..n_features { + if abs_diff_eq!(norm_cols_x[j], F::zero()) { + continue; + } + let old_w_j: ArrayView1 = w.slice(s![j, ..]); + let x_j: ArrayView1 = x.slice(s![.., j]); + let norm_old_w_j = old_w_j.map(|wjt| wjt.powi(2)).sum().sqrt(); + if abs_diff_ne!(norm_old_w_j, F::zero()) { + for i in 0..x.shape()[0] { + for t in 0..n_tasks { + r[[i, t]] += x_j[i] * old_w_j[t]; + } + } + } + let tmp = x_j.dot(&r); + // proximal step: TODO + let norm_w_j = w.slice(s![j, ..]).map(|wjt| wjt.powi(2)).sum().sqrt(); + if abs_diff_ne!(norm_w_j, F::zero()) { + for i in 0..x.shape()[0] { + for t in 0..n_tasks { + r[[i, t]] -= x_j[i] * w[[j, t]]; + } + } + } + let d_w_j = (norm_w_j - norm_old_w_j).abs(); + d_w_max = F::max(d_w_max, d_w_j); + w_max = F::max(w_max, norm_w_j); + } + n_steps += 1; + + if n_steps == max_steps - 1 || abs_diff_eq!(w_max, F::zero()) || d_w_max / w_max < d_w_tol { + // We've hit one potentiel stopping criteria + // check duality gap for ultimate stopping criterion + gap = duality_gap_mtl(x.view(), y.view(), w.view(), r.view(), l1_ratio, penalty); + if gap < tol { + break; + } + } + } + + (w, gap, n_steps) +} + fn duality_gap<'a, F: Float>( x: ArrayView2<'a, F>, y: ArrayView1<'a, F>, From 7e8408e06512dacae696db525e3cff19b34e1e02 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Wed, 19 Jan 2022 16:17:53 +0100 Subject: [PATCH 02/28] added duality_gap_mtl computation --- algorithms/linfa-elasticnet/src/algorithm.rs | 40 ++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index bbb1f8db9..111a669e7 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -1,4 +1,5 @@ use approx::{abs_diff_eq, abs_diff_ne}; +use ndarray::linalg::general_mat_mul; use ndarray::{ s, Array1, Array2, ArrayBase, ArrayView1, ArrayView2, Axis, CowArray, Data, Ix1, Ix2, }; @@ -286,6 +287,45 @@ fn duality_gap<'a, F: Float>( gap } +fn duality_gap_mtl<'a, F: Float>( + x: ArrayView2<'a, F>, + y: ArrayView2<'a, F>, + w: ArrayView2<'a, F>, + r: ArrayView2<'a, F>, + l1_ratio: F, + penalty: F, +) -> F { + let half = F::cast(0.5); + let n_samples = F::cast(x.shape()[0]); + let n_features = x.shape()[1]; + let n_tasks = y.shape()[1]; + let l1_reg = l1_ratio * penalty * n_samples; + let l2_reg = (F::one() - l1_ratio) * penalty * n_samples; + let mut xta = Array2::::zeros((n_features, n_tasks)); + general_mat_mul(F::one(), &x.t(), &r, F::one(), &mut xta); + xta = xta - &w * l2_reg; + + let dual_norm_xta = xta + .map_axis(Axis(1), |x| x.dot(&x).sqrt()) + .fold(F::zero(), |max_norm, &nrm| max_norm.max(nrm)); + let r_norm2 = r.map(|rij| rij.powi(2)).sum(); + let w_norm2 = w.map(|wij| wij.powi(2)).sum(); + let (const_, mut gap) = if dual_norm_xta > l1_reg { + let const_ = l1_reg / dual_norm_xta; + let a_norm2 = r_norm2 * const_ * const_; + (const_, half * (r_norm2 + a_norm2)) + } else { + (F::one(), r_norm2) + }; + let mut rty = Array2::::zeros((n_tasks, n_tasks)); + general_mat_mul(F::one(), &r.t(), &y, F::one(), &mut rty); + let trace_rty = rty.diag().sum(); + let l21_norm = w.map_axis(Axis(1), |wj| (wj.dot(&wj)).sqrt()).sum(); + gap += l1_reg * l21_norm - const_ * trace_rty + + half * l2_reg * (F::one() + const_ * const_) * w_norm2; + gap +} + fn variance_params, D: Data>( ds: &DatasetBase, T>, y_est: Array1, From 1b69b8e19d620105db44868f1abbc95154e59a9f Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Wed, 19 Jan 2022 16:23:24 +0100 Subject: [PATCH 03/28] ENH cd pass to be consistent with bcd --- algorithms/linfa-elasticnet/src/algorithm.rs | 33 +++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index 111a669e7..36e480e05 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -156,25 +156,28 @@ fn coordinate_descent<'a, F: Float>( while n_steps < max_steps { let mut w_max = F::zero(); let mut d_w_max = F::zero(); - for ii in 0..n_features { - if abs_diff_eq!(norm_cols_x[ii], F::zero()) { + for j in 0..n_features { + if abs_diff_eq!(norm_cols_x[j], F::zero()) { continue; } - let w_ii = w[ii]; - let x_slc: ArrayView1 = x.slice(s![.., ii]); - if abs_diff_ne!(w_ii, F::zero()) { - // FIXME: direct addition with loop might be faster as it does not have to allocate - r += &(&x_slc * w_ii); + let old_w_j = w[j]; + let x_j: ArrayView1 = x.slice(s![.., j]); + if abs_diff_ne!(old_w_j, F::zero()) { + for i in 0..x.shape()[0] { + r[i] += x_j[i] * old_w_j; + } } - let tmp: F = x_slc.dot(&r); - w[ii] = tmp.signum() * F::max(tmp.abs() - n_samples * l1_ratio * penalty, F::zero()) - / (norm_cols_x[ii] + n_samples * (F::one() - l1_ratio) * penalty); - if abs_diff_ne!(w[ii], F::zero()) { - r -= &(&x_slc * w[ii]); + let tmp: F = x_j.dot(&r); + w[j] = tmp.signum() * F::max(tmp.abs() - n_samples * l1_ratio * penalty, F::zero()) + / (norm_cols_x[j] + n_samples * (F::one() - l1_ratio) * penalty); + if abs_diff_ne!(w[j], F::zero()) { + for i in 0..x.shape()[0] { + r[i] -= x_j[i] * old_w_j; + } } - let d_w_ii = (w[ii] - w_ii).abs(); - d_w_max = F::max(d_w_max, d_w_ii); - w_max = F::max(w_max, w[ii].abs()); + let d_w_j = (w[j] - old_w_j).abs(); + d_w_max = F::max(d_w_max, d_w_j); + w_max = F::max(w_max, w[j].abs()); } n_steps += 1; From e5b76e74853c92fbed7851c2f6c78895da729f9a Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Wed, 19 Jan 2022 16:53:11 +0100 Subject: [PATCH 04/28] added prox operator for MTL Enet --- algorithms/linfa-elasticnet/src/algorithm.rs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index 36e480e05..dbc7ba04d 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -232,7 +232,10 @@ fn block_coordinate_descent<'a, F: Float>( } } let tmp = x_j.dot(&r); - // proximal step: TODO + w.slice_mut(s![j, ..]).assign( + &(block_soft_thresholding(tmp.view(), n_samples * l1_ratio * penalty) + / (norm_cols_x[j] + n_samples * (F::one() - l1_ratio) * penalty)), + ); let norm_w_j = w.slice(s![j, ..]).map(|wjt| wjt.powi(2)).sum().sqrt(); if abs_diff_ne!(norm_w_j, F::zero()) { for i in 0..x.shape()[0] { @@ -260,6 +263,18 @@ fn block_coordinate_descent<'a, F: Float>( (w, gap, n_steps) } +fn block_soft_thresholding<'a, F: Float>(x: ArrayView1<'a, F>, threshold: F) -> Array1 { + let norm_x = x.map(|&xi| xi.powi(2)).sum().sqrt(); + let mut _res = Array1::::zeros(x.len()); + if norm_x >= threshold { + let scal = F::one() - threshold / norm_x; + for i in 0..x.len() { + _res[i] = scal * x[i]; + } + } + _res +} + fn duality_gap<'a, F: Float>( x: ArrayView2<'a, F>, y: ArrayView1<'a, F>, From 196a2a54b6041279427ceb60204095f8bdb81e75 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Wed, 19 Jan 2022 17:10:39 +0100 Subject: [PATCH 05/28] added helper functions for tests --- algorithms/linfa-elasticnet/src/algorithm.rs | 49 +++++++++++++++++++- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index dbc7ba04d..84f9a7bae 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -386,9 +386,10 @@ pub fn compute_intercept( #[cfg(test)] mod tests { - use super::{coordinate_descent, ElasticNet}; + use super::{block_coordinate_descent, coordinate_descent, ElasticNet}; use approx::assert_abs_diff_eq; - use ndarray::{array, s, Array, Array1, Array2}; + use ndarray::linalg::general_mat_mul; + use ndarray::{array, s, Array, Array1, Array2, Axis}; use ndarray_rand::rand::SeedableRng; use ndarray_rand::rand_distr::Uniform; use ndarray_rand::RandomExt; @@ -411,6 +412,17 @@ mod tests { squared_error(x, y, intercept, beta) + lambda * elastic_net_penalty(beta, alpha) } + fn elastic_net_multi_task_objective( + x: &Array2, + y: &Array2, + intercept: &Array1, + beta: &Array2, + alpha: f64, + lambda: f64, + ) -> f64 { + squared_error_mtl(x, y, intercept, beta) + lambda * elastic_net_mtl_penalty(beta, alpha) + } + fn squared_error(x: &Array2, y: &Array1, intercept: f64, beta: &Array1) -> f64 { let mut resid = -x.dot(beta); resid -= intercept; @@ -423,6 +435,22 @@ mod tests { result } + fn squared_error_mtl( + x: &Array2, + y: &Array2, + intercept: &Array1, + beta: &Array2, + ) -> f64 { + let mut resid = Array2::::zeros((x.shape()[0], y.shape()[1])); + general_mat_mul(1., &x, &beta, 1., &mut resid); + resid = &resid * -1.; + resid = resid - intercept; + resid = resid + y; + let mut datafit = resid.map(|r| r.powi(2)).sum(); + datafit /= 2.0 * y.len() as f64; + datafit + } + fn elastic_net_penalty(beta: &Array1, alpha: f64) -> f64 { let mut penalty = 0.0; for beta_j in beta { @@ -431,6 +459,14 @@ mod tests { penalty } + fn elastic_net_mtl_penalty(beta: &Array2, alpha: f64) -> f64 { + let frob_norm = beta.map(|beta_ij| beta_ij.powi(2)).sum(); + let l21_norm = beta + .map_axis(Axis(1), |beta_j| (beta_j.dot(&beta_j)).sqrt()) + .sum(); + (1.0 - alpha) / 2.0 * frob_norm + alpha * l21_norm + } + #[test] fn elastic_net_penalty_works() { let beta = array![-2.0, 1.0]; @@ -448,6 +484,9 @@ mod tests { assert_abs_diff_eq!(elastic_net_penalty(&beta2, 0.0), 0.0); } + #[test] + fn elastic_net_mtl_penalty_works() {} + #[test] fn squared_error_works() { let x = array![[2.0, 1.0], [-1.0, 2.0]]; @@ -456,6 +495,9 @@ mod tests { assert_abs_diff_eq!(squared_error(&x, &y, 0.0, &beta), 0.25); } + #[test] + fn squared_error_mtl_works() {} + #[test] fn coordinate_descent_lowers_objective() { let x = array![[1.0, 0.0], [0.0, 1.0]]; @@ -470,6 +512,9 @@ mod tests { assert!(objective_start > objective_end); } + #[test] + fn block_coordinate_descent_lowers_objective() {} + #[test] fn lasso_zero_works() { let dataset = Dataset::from((array![[0.], [0.], [0.]], array![0., 0., 0.])); From dc91b627347a7ce99a67897eaed0e78ac7c8fc8a Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Wed, 19 Jan 2022 17:17:01 +0100 Subject: [PATCH 06/28] fix failing CD tests --- algorithms/linfa-elasticnet/src/algorithm.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index 84f9a7bae..746952599 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -172,7 +172,7 @@ fn coordinate_descent<'a, F: Float>( / (norm_cols_x[j] + n_samples * (F::one() - l1_ratio) * penalty); if abs_diff_ne!(w[j], F::zero()) { for i in 0..x.shape()[0] { - r[i] -= x_j[i] * old_w_j; + r[i] -= x_j[i] * w[j]; } } let d_w_j = (w[j] - old_w_j).abs(); From 3bb53338e3a52c808fd17d8f44e7f269d750e7ea Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Wed, 19 Jan 2022 20:01:38 +0100 Subject: [PATCH 07/28] working ent mtl penalties --- algorithms/linfa-elasticnet/src/algorithm.rs | 52 ++++++++++++++++++-- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index 746952599..9a2d52246 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -444,10 +444,13 @@ mod tests { let mut resid = Array2::::zeros((x.shape()[0], y.shape()[1])); general_mat_mul(1., &x, &beta, 1., &mut resid); resid = &resid * -1.; - resid = resid - intercept; + for i in 0..resid.shape()[0] { + let _res = &resid.slice(s![i, ..]) - intercept; + resid.slice_mut(s![i, ..]).assign(&_res); + } resid = resid + y; let mut datafit = resid.map(|r| r.powi(2)).sum(); - datafit /= 2.0 * y.len() as f64; + datafit /= 2.0 * x.shape()[0] as f64; datafit } @@ -485,7 +488,29 @@ mod tests { } #[test] - fn elastic_net_mtl_penalty_works() {} + fn elastic_net_mtl_penalty_works() { + let beta = array![[-2.0, 1.0, 3.0], [3.0, 1.5, -1.7]]; + assert_abs_diff_eq!( + elastic_net_mtl_penalty(&beta, 0.7), + 9.472383565516601, + epsilon = 1e-12 + ); + assert_abs_diff_eq!( + elastic_net_mtl_penalty(&beta, 1.0), + 7.501976522166574, + epsilon = 1e-12 + ); + assert_abs_diff_eq!( + elastic_net_mtl_penalty(&beta, 0.2), + 12.756395304433315, + epsilon = 1e-12 + ); + + let beta2 = array![[0., 0.], [0., 0.], [0., 0.]]; + assert_abs_diff_eq!(elastic_net_mtl_penalty(&beta2, 0.8), 0.0); + assert_abs_diff_eq!(elastic_net_mtl_penalty(&beta2, 1.2), 0.0); + assert_abs_diff_eq!(elastic_net_mtl_penalty(&beta2, 0.8), 0.0); + } #[test] fn squared_error_works() { @@ -496,7 +521,24 @@ mod tests { } #[test] - fn squared_error_mtl_works() {} + fn squared_error_mtl_works() { + let x = array![[1.2, 2.3], [-1.3, 0.3], [-1.3, 0.1]]; + let y = array![ + [0.2, 1.0, 0.0, 1.], + [-0.3, 0.7, 0.1, 2.], + [-0.3, 0.7, 2.3, 3.] + ]; + let beta = array![[2.3, 4.5, 1.2, -3.4], [1.2, -3.4, 0.7, -1.2]]; + assert_abs_diff_eq!( + squared_error_mtl(&x, &y, &array![0., 0., 0., 0.], &beta), + 41.66298333333333 + ); + let intercept = array![1., 3., 2., 0.3]; + assert_abs_diff_eq!( + squared_error_mtl(&x, &y, &intercept, &beta), + 29.059983333333335 + ); + } #[test] fn coordinate_descent_lowers_objective() { @@ -696,7 +738,7 @@ mod tests { [-5.514554978810590376e-03,5.068011873981870252e-02,-1.590626280073640167e-02,-6.764228304218700139e-02,4.934129593323050011e-02,7.916527725369119917e-02,-2.867429443567860031e-02,3.430885887772629900e-02,-1.811826730789670159e-02,4.448547856271539702e-02], [4.170844488444359899e-02,5.068011873981870252e-02,-1.590626280073640167e-02,1.728186074811709910e-02,-3.734373413344069942e-02,-1.383981589779990050e-02,-2.499265663159149983e-02,-1.107951979964190078e-02,-4.687948284421659950e-02,1.549073015887240078e-02], [-4.547247794002570037e-02,-4.464163650698899782e-02,3.906215296718960200e-02,1.215130832538269907e-03,1.631842733640340160e-02,1.528299104862660025e-02,-2.867429443567860031e-02,2.655962349378539894e-02,4.452837402140529671e-02,-2.593033898947460017e-02], - [-4.547247794002570037e-02,-4.464163650698899782e-02,-7.303030271642410587e-02,-8.141376581713200000e-02,8.374011738825870577e-02,2.780892952020790065e-02,1.738157847891100005e-01,-3.949338287409189657e-02,-4.219859706946029777e-03,3.064409414368320182e-03] + [-4.547247794002570037e-02,-4.464163650698899782e-02,-7.303030271642410587e-02,-8.141376581713200000e-02,8.374011738825870577e-02,2.780892952020790065e-02,1.738157847891100005e-01,-3.949338287409189657e-02,-4.219859706946029777e-03,3.064409414368320182e-03] ]; #[rustfmt::skip] let y = array![2.33e+02, 9.1e+01, 1.11e+02, 1.52e+02, 1.2e+02, 6.70e+01, 3.1e+02, 9.4e+01, 1.83e+02, 6.6e+01, 1.73e+02, 7.2e+01, 4.9e+01, 6.4e+01, 4.8e+01, 1.78e+02, 1.04e+02, 1.32e+02, 2.20e+02, 5.7e+01]; From f9f9959b6021951f200bcc5dad43534d8162f9fc Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Wed, 19 Jan 2022 20:09:42 +0100 Subject: [PATCH 08/28] bcd lower objective test pass --- algorithms/linfa-elasticnet/src/algorithm.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index 9a2d52246..f1dfc7938 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -555,7 +555,20 @@ mod tests { } #[test] - fn block_coordinate_descent_lowers_objective() {} + fn block_coordinate_descent_lowers_objective() { + let x = array![[1.0, 0., -0.3, 3.2], [0.3, 1.2, -0.6, 1.2]]; + let y = array![[0.3, -1.2, 0.7], [1.4, -3.2, 0.2]]; + let beta = array![[0., 0., 0.], [0., 0., 0.], [0., 0., 0.], [0., 0., 0.]]; + let intercept = array![0., 0., 0.]; + let alpha = 0.4; + let lambda = 0.002; + let objective_start = + elastic_net_multi_task_objective(&x, &y, &intercept, &beta, alpha, lambda); + let opt_result = block_coordinate_descent(x.view(), y.view(), 1e-4, 3, alpha, lambda); + let objective_end = + elastic_net_multi_task_objective(&x, &y, &intercept, &opt_result.0, alpha, lambda); + assert!(objective_start > objective_end); + } #[test] fn lasso_zero_works() { From e23d876930ca91f223bee38caeba667acea624e8 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Wed, 19 Jan 2022 20:28:54 +0100 Subject: [PATCH 09/28] added MultiTaskEnet struct --- algorithms/linfa-elasticnet/src/lib.rs | 46 +++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/algorithms/linfa-elasticnet/src/lib.rs b/algorithms/linfa-elasticnet/src/lib.rs index 789f27c86..94d480cf6 100644 --- a/algorithms/linfa-elasticnet/src/lib.rs +++ b/algorithms/linfa-elasticnet/src/lib.rs @@ -1,7 +1,7 @@ #![doc = include_str!("../README.md")] use linfa::Float; -use ndarray::Array1; +use ndarray::{Array1, Array2}; #[cfg(feature = "serde")] use serde_crate::{Deserialize, Serialize}; @@ -65,3 +65,47 @@ impl ElasticNet { ElasticNetParams::new().l1_ratio(F::one()) } } + +/// MultiTask Elastic Net model +/// +/// This struct contains the parameters of a fitted multi-task elastic net model. This includes the +/// coefficients (a 2-dimensional array), (optionally) intercept (a 1-dimensional array), duality gaps +/// and the number of steps needed in the computation. +/// +/// ## Model implementation +/// +/// The block coordinate descent is widely used to solve generalized linear models optimization problems, +/// like Group Lasso, MultiTask Ridge or MultiTask Lasso. It cycles through a group of parameters and update +/// the groups separately, holding all the others fixed. The optimization routine stops when a criterion is +/// satisfied (dual sub-optimality gap or change in coefficients). +pub struct MultiTaskElasticNet { + hyperplane: Array2, + intercept: Array1, + duality_gap: F, + n_steps: u32, + variance: Result>, +} + +impl MultiTaskElasticNet { + /// Create a default parameter set for construction of MultiTaskElasticNet model + /// + /// By default, an intercept will be fitted. To disable fitting an + /// intercept, call `.with_intercept(false)` before calling `.fit()`. + /// + /// To additionally normalize the feature matrix before fitting, call + /// `fit_intercept_and_normalize()` before calling `fit()`. The feature + /// matrix will not be normalized by default. + pub fn params() -> ElasticNetParams { + ElasticNetParams::new() + } + + /// Create a multi-task ridge only model + pub fn ridge() -> ElasticNetParams { + ElasticNetParams::new().l1_ratio(F::zero()) + } + + /// Create a multi-task Lasso only model + pub fn lasso() -> ElasticNetParams { + ElasticNetParams::new().l1_ratio(F::one()) + } +} From f316c95527177ae99a992eb4a9649b521cdb828b Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Wed, 19 Jan 2022 20:32:51 +0100 Subject: [PATCH 10/28] added MTENET documentation --- algorithms/linfa-elasticnet/src/hyperparams.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/algorithms/linfa-elasticnet/src/hyperparams.rs b/algorithms/linfa-elasticnet/src/hyperparams.rs index 8cb40c7d9..f8dc9a96d 100644 --- a/algorithms/linfa-elasticnet/src/hyperparams.rs +++ b/algorithms/linfa-elasticnet/src/hyperparams.rs @@ -54,6 +54,14 @@ impl ElasticNetValidParams { /// + 0.5 * penalty * (1 - l1_ratio) * ||w||^2_2 /// ``` /// +/// The multi-task version (Y becomes a measurement matrix) is also supported and +/// solves the following objective function: +/// ```ignore +/// 1 / (2 * n_samples) * || Y - XW ||^2_F +/// + penalty * l1_ratio * ||W||_2,1 +/// + 0.5 * penalty * (1 - l1_ratio) * ||W||^2_F +/// ``` +/// /// The parameter set can be verified into a /// [`ElasticNetValidParams`](crate::hyperparams::ElasticNetValidParams) by calling /// [ParamGuard::check](Self::check). It is also possible to directly fit a model with From 1ae652226e423903db95d0957acc91ac307ef8b0 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Wed, 19 Jan 2022 20:39:44 +0100 Subject: [PATCH 11/28] added API MTENET --- algorithms/linfa-elasticnet/src/algorithm.rs | 33 +++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index f1dfc7938..67bcbdffe 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -11,7 +11,9 @@ use linfa::{ DatasetBase, Float, }; -use super::{hyperparams::ElasticNetValidParams, ElasticNet, ElasticNetError, Result}; +use super::{ + hyperparams::ElasticNetValidParams, ElasticNet, ElasticNetError, MultiTaskElasticNet, Result, +}; impl Fit, T, ElasticNetError> for ElasticNetValidParams where @@ -133,6 +135,35 @@ impl ElasticNet { } } +/// View the fitted parameters and make predictions with a fitted +/// elastic net model +impl MultiTaskElasticNet { + /// Get the fitted hyperplane + pub fn hyperplane(&self) -> &Array2 { + &self.hyperplane + } + + /// Get the fitted intercept, [0., ..., 0.] if no intercept was fitted + /// Note that there are as many intercepts as tasks + pub fn intercept(&self) -> &Array1 { + &self.intercept + } + + /// Get the number of steps taken in optimization algorithm + pub fn n_steps(&self) -> u32 { + self.n_steps + } + + /// Get the duality gap at the end of the optimization algorithm + pub fn duality_gap(&self) -> F { + self.duality_gap + } + + // TODO: Z-score + + // TODO: confidence level +} + fn coordinate_descent<'a, F: Float>( x: ArrayView2<'a, F>, y: ArrayView1<'a, F>, From f13a150e4b3e84a5a89c6dc68e13edc11cad2872 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Wed, 19 Jan 2022 22:02:23 +0100 Subject: [PATCH 12/28] added variance, z-score, conf interval for multitask ENET --- algorithms/linfa-elasticnet/src/algorithm.rs | 121 ++++++++++++++++++- src/dataset/impl_targets.rs | 14 ++- src/dataset/mod.rs | 8 ++ 3 files changed, 139 insertions(+), 4 deletions(-) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index 67bcbdffe..acc9a20af 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -7,7 +7,7 @@ use ndarray_linalg::{Inverse, Lapack}; use linfa::traits::{Fit, PredictInplace}; use linfa::{ - dataset::{AsTargets, Records}, + dataset::{AsTargets, MultiTaskTarget, Records}, DatasetBase, Float, }; @@ -61,6 +61,64 @@ where } } +// impl Fit, T, ElasticNetError> for ElasticNetValidParams +// where +// F: Float + Lapack, +// D: Data, +// T: AsTargets, +// { +// type Object = MultiTaskElasticNet; + +// /// Fit a multi-task Elastic Net model given a feature matrix `x` and a target +// /// matrix `y`. +// /// +// /// The feature matrix `x` must have shape `(n_samples, n_features)` +// /// +// /// The target variable `y` must have shape `(n_samples, n_tasks)` +// /// +// /// Returns a `FittedMultiTaskElasticNet` object which contains the fitted +// /// parameters and can be used to `predict` values of the target variables +// /// for new feature values. +// fn fit(&self, dataset: &DatasetBase, T>) -> Result { +// let target = dataset.targets(); + +// let (intercept, y) = compute_intercept_multi_task(self.with_intercept(), target); +// let (hyperplane, duality_gap, n_steps) = block_coordinate_descent( +// dataset.records().view(), +// y.view(), +// self.tolerance(), +// self.max_iterations(), +// self.l1_ratio(), +// self.penalty(), +// ); + +// let mut y_est = Array2::::zeros((dataset.nsamples(), dataset.ntargets())); +// general_mat_mul( +// F::one(), +// &dataset.records(), +// &hyperplane, +// F::one(), +// &mut y_est, +// ); +// // Adding intercept +// for i in 0..dataset.n_samples() { +// let _res = &y_est.slice(s![i, ..]) + intercept; +// y_est.slice_mut(s![i, ..]).assign(&_res); +// } + +// // TODO: compute variance +// let variance = variance_params_multi_task(dataset, y_est); + +// Ok(MultiTaskElasticNet { +// hyperplane, +// intercept, +// duality_gap, +// n_steps, +// variance, +// }) +// } +// } + impl> PredictInplace, Array1> for ElasticNet { /// Given an input matrix `X`, with shape `(n_samples, n_features)`, /// `predict` returns the target variable according to elastic net @@ -159,9 +217,36 @@ impl MultiTaskElasticNet { self.duality_gap } - // TODO: Z-score + /// Calculate the Z score + pub fn z_score(&self) -> Result> { + self.variance + .as_ref() + .map(|variance| { + self.hyperplane + .iter() + .zip(variance.iter()) + .map(|(a, b)| *a / b.sqrt()) + .collect() + }) + .map_err(|err| err.clone()) + } + + /// Calculate the confidence level + pub fn confidence_95th(&self) -> Result> { + // the 95th percentile of our confidence level + let p = F::cast(1.645); - // TODO: confidence level + self.variance + .as_ref() + .map(|variance| { + self.hyperplane + .iter() + .zip(variance.iter()) + .map(|(a, b)| (*a - p * b.sqrt(), *a + p * b.sqrt())) + .collect() + }) + .map_err(|err| err.clone()) + } } fn coordinate_descent<'a, F: Float>( @@ -400,6 +485,36 @@ fn variance_params, D: Data> } } +fn variance_params_multi_task( + ds: &DatasetBase, T>, + y_est: Array2, +) -> Result> +where + F: Float + Lapack, + T: AsTargets + MultiTaskTarget, + D: Data, +{ + let nfeatures = ds.nfeatures(); + let nsamples = ds.nsamples(); + let ntasks = ds.targets().ntasks(); + + let target = ds.targets().as_multi_targets(); + + if nsamples < nfeatures + 1 { + return Err(ElasticNetError::NotEnoughSamples); + } + + let var_target = + (&target - &y_est).mapv(|x| x * x).sum() / F::cast(ntasks * (nsamples - nfeatures)); + + let inv_cov = ds.records().t().dot(ds.records()).inv(); + + match inv_cov { + Ok(inv_cov) => Ok(inv_cov.diag().mapv(|x| var_target * x)), + Err(_) => Err(ElasticNetError::IllConditioned), + } +} + /// Compute the intercept as the mean of `y` and center `y` if an intercept should /// be used, use `0.0` as intercept and leave `y` unchanged otherwise. pub fn compute_intercept( diff --git a/src/dataset/impl_targets.rs b/src/dataset/impl_targets.rs index a72170e46..6949acbd0 100644 --- a/src/dataset/impl_targets.rs +++ b/src/dataset/impl_targets.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use super::{ AsProbabilities, AsTargets, AsTargetsMut, CountedTargets, DatasetBase, FromTargetArray, Label, - Labels, Pr, Records, + Labels, MultiTaskTarget, Pr, Records, }; use ndarray::{ Array1, Array2, ArrayBase, ArrayView2, ArrayViewMut2, Axis, CowArray, Data, DataMut, Dimension, @@ -17,6 +17,18 @@ impl<'a, L, S: Data> AsTargets for ArrayBase { } } +impl<'a, L, S: Data> MultiTaskTarget for ArrayBase { + type Elem = L; + + fn nsamples(&self) -> usize { + self.shape()[0] + } + + fn ntasks(&self) -> usize { + self.shape()[1] + } +} + impl<'a, L: Clone + 'a, S: Data> FromTargetArray<'a, L> for ArrayBase { type Owned = ArrayBase, Ix2>; type View = ArrayBase, Ix2>; diff --git a/src/dataset/mod.rs b/src/dataset/mod.rs index dc19a3cc0..f21a45b95 100644 --- a/src/dataset/mod.rs +++ b/src/dataset/mod.rs @@ -198,6 +198,14 @@ pub trait Records: Sized { fn nfeatures(&self) -> usize; } +/// MultiTask trait +pub trait MultiTaskTarget: Sized { + type Elem; + + fn nsamples(&self) -> usize; + fn ntasks(&self) -> usize; +} + /// Return a reference to single or multiple target variables pub trait AsTargets { type Elem; From b3479634debfa677dfff173d88a23a9531f453e6 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Sat, 22 Jan 2022 15:20:55 +0100 Subject: [PATCH 13/28] added multi-task estimators --- algorithms/linfa-elasticnet/src/algorithm.rs | 130 ++++++++++-------- .../linfa-elasticnet/src/hyperparams.rs | 30 ++++ algorithms/linfa-elasticnet/src/lib.rs | 2 +- 3 files changed, 103 insertions(+), 59 deletions(-) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index acc9a20af..c6b136d00 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -12,7 +12,8 @@ use linfa::{ }; use super::{ - hyperparams::ElasticNetValidParams, ElasticNet, ElasticNetError, MultiTaskElasticNet, Result, + hyperparams::{ElasticNetValidParams, MultiTaskElasticNetValidParams}, + ElasticNet, ElasticNetError, MultiTaskElasticNet, Result, }; impl Fit, T, ElasticNetError> for ElasticNetValidParams @@ -61,63 +62,76 @@ where } } -// impl Fit, T, ElasticNetError> for ElasticNetValidParams -// where -// F: Float + Lapack, -// D: Data, -// T: AsTargets, -// { -// type Object = MultiTaskElasticNet; - -// /// Fit a multi-task Elastic Net model given a feature matrix `x` and a target -// /// matrix `y`. -// /// -// /// The feature matrix `x` must have shape `(n_samples, n_features)` -// /// -// /// The target variable `y` must have shape `(n_samples, n_tasks)` -// /// -// /// Returns a `FittedMultiTaskElasticNet` object which contains the fitted -// /// parameters and can be used to `predict` values of the target variables -// /// for new feature values. -// fn fit(&self, dataset: &DatasetBase, T>) -> Result { -// let target = dataset.targets(); - -// let (intercept, y) = compute_intercept_multi_task(self.with_intercept(), target); -// let (hyperplane, duality_gap, n_steps) = block_coordinate_descent( -// dataset.records().view(), -// y.view(), -// self.tolerance(), -// self.max_iterations(), -// self.l1_ratio(), -// self.penalty(), -// ); - -// let mut y_est = Array2::::zeros((dataset.nsamples(), dataset.ntargets())); -// general_mat_mul( -// F::one(), -// &dataset.records(), -// &hyperplane, -// F::one(), -// &mut y_est, -// ); -// // Adding intercept -// for i in 0..dataset.n_samples() { -// let _res = &y_est.slice(s![i, ..]) + intercept; -// y_est.slice_mut(s![i, ..]).assign(&_res); -// } - -// // TODO: compute variance -// let variance = variance_params_multi_task(dataset, y_est); - -// Ok(MultiTaskElasticNet { -// hyperplane, -// intercept, -// duality_gap, -// n_steps, -// variance, -// }) -// } -// } +impl Fit, ArrayBase, ElasticNetError> + for MultiTaskElasticNetValidParams +where + F: Float + Lapack, + D: Data, +{ + type Object = MultiTaskElasticNet; + + /// Fit a multi-task Elastic Net model given a feature matrix `x` and a target + /// matrix `y`. + /// + /// The feature matrix `x` must have shape `(n_samples, n_features)` + /// + /// The target variable `y` must have shape `(n_samples, n_tasks)` + /// + /// Returns a `FittedMultiTaskElasticNet` object which contains the fitted + /// parameters and can be used to `predict` values of the target variables + /// for new feature values. + fn fit( + &self, + dataset: &DatasetBase, ArrayBase>, + ) -> Result { + let target = dataset.targets(); + let nsamples = dataset.nsamples(); + let ntasks = target.ntasks(); + + let mut intercept = Array1::::zeros(ntasks); + let mut y = Array2::::zeros((nsamples, ntasks)); + + for t in 0..target.ntasks() { + let (intercept_t, y_t) = + compute_intercept(self.with_intercept(), target.slice(s![.., t])); + intercept[t] = intercept_t; + y.slice_mut(s![.., t]).assign(&y_t); + } + + let (hyperplane, duality_gap, n_steps) = block_coordinate_descent( + dataset.records().view(), + y.view(), + self.tolerance(), + self.max_iterations(), + self.l1_ratio(), + self.penalty(), + ); + + let mut y_est = Array2::::zeros((dataset.nsamples(), dataset.ntargets())); + general_mat_mul( + F::one(), + &dataset.records(), + &hyperplane, + F::one(), + &mut y_est, + ); + // Adding intercept + for i in 0..dataset.nsamples() { + let _res = &y_est.slice(s![i, ..]) + &intercept; + y_est.slice_mut(s![i, ..]).assign(&_res); + } + + let variance = variance_params_multi_task(dataset, y_est); + + Ok(MultiTaskElasticNet { + hyperplane, + intercept, + duality_gap, + n_steps, + variance, + }) + } +} impl> PredictInplace, Array1> for ElasticNet { /// Given an input matrix `X`, with shape `(n_samples, n_features)`, diff --git a/algorithms/linfa-elasticnet/src/hyperparams.rs b/algorithms/linfa-elasticnet/src/hyperparams.rs index f8dc9a96d..661c0edf1 100644 --- a/algorithms/linfa-elasticnet/src/hyperparams.rs +++ b/algorithms/linfa-elasticnet/src/hyperparams.rs @@ -23,6 +23,14 @@ pub struct ElasticNetValidParams { tolerance: F, } +pub struct MultiTaskElasticNetValidParams { + penalty: F, + l1_ratio: F, + with_intercept: bool, + max_iterations: u32, + tolerance: F, +} + impl ElasticNetValidParams { pub fn penalty(&self) -> F { self.penalty @@ -45,6 +53,28 @@ impl ElasticNetValidParams { } } +impl MultiTaskElasticNetValidParams { + pub fn penalty(&self) -> F { + self.penalty + } + + pub fn l1_ratio(&self) -> F { + self.l1_ratio + } + + pub fn with_intercept(&self) -> bool { + self.with_intercept + } + + pub fn max_iterations(&self) -> u32 { + self.max_iterations + } + + pub fn tolerance(&self) -> F { + self.tolerance + } +} + /// A hyper-parameter set during construction /// /// Configures and minimizes the following objective function: diff --git a/algorithms/linfa-elasticnet/src/lib.rs b/algorithms/linfa-elasticnet/src/lib.rs index 94d480cf6..04203000e 100644 --- a/algorithms/linfa-elasticnet/src/lib.rs +++ b/algorithms/linfa-elasticnet/src/lib.rs @@ -83,7 +83,7 @@ pub struct MultiTaskElasticNet { intercept: Array1, duality_gap: F, n_steps: u32, - variance: Result>, + variance: Result>, } impl MultiTaskElasticNet { From 80d9a023e5e86eaede3891c29f15633f283685c0 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Sat, 22 Jan 2022 16:20:55 +0100 Subject: [PATCH 14/28] added tests for MTL --- algorithms/linfa-elasticnet/src/algorithm.rs | 103 +++++++++++++++++- .../linfa-elasticnet/src/hyperparams.rs | 73 +++++++++++++ algorithms/linfa-elasticnet/src/lib.rs | 25 ++--- 3 files changed, 185 insertions(+), 16 deletions(-) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index c6b136d00..2a45195e7 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -152,6 +152,28 @@ impl> PredictInplace, Array1> f } } +impl> PredictInplace, Array2> + for MultiTaskElasticNet +{ + /// Given an input matrix `X`, with shape `(n_samples, n_features)`, + /// `predict` returns the target variable according to elastic net + /// learned from the training data distribution. + fn predict_inplace(&self, x: &ArrayBase, y: &mut Array2) { + assert_eq!( + x.nrows(), + y.nrows(), + "The number of data points must match the number of output targets." + ); + + general_mat_mul(F::one(), &x, &self.hyperplane, F::one(), y); + // *y = *y + self.intercept; + } + + fn default_target(&self, x: &ArrayBase) -> Array2 { + Array2::zeros((x.nrows(), x.nrows())) + } +} + /// View the fitted parameters and make predictions with a fitted /// elastic net model impl ElasticNet { @@ -546,7 +568,7 @@ pub fn compute_intercept( #[cfg(test)] mod tests { - use super::{block_coordinate_descent, coordinate_descent, ElasticNet}; + use super::{block_coordinate_descent, coordinate_descent, ElasticNet, MultiTaskElasticNet}; use approx::assert_abs_diff_eq; use ndarray::linalg::general_mat_mul; use ndarray::{array, s, Array, Array1, Array2, Axis}; @@ -744,6 +766,20 @@ mod tests { assert_abs_diff_eq!(model.hyperplane(), &array![0.]); } + #[test] + fn mtl_lasso_zero_works() { + let dataset = Dataset::from((array![[0.], [0.], [0.]], array![[0.], [0.], [0.]])); + + let model = MultiTaskElasticNet::params() + .l1_ratio(1.0) + .penalty(0.1) + .fit(&dataset) + .unwrap(); + + assert_abs_diff_eq!(model.intercept(), &array![0.]); + assert_abs_diff_eq!(model.hyperplane(), &array![[0.]]); + } + #[test] fn lasso_toy_example_works() { // Test Lasso on a toy example for various values of alpha. @@ -778,6 +814,71 @@ mod tests { assert_abs_diff_eq!(model.duality_gap(), 0.0); } + #[test] + fn multitask_lasso_toy_example_works() { + // Test MultiTaskLasso on a toy example for various values of alpha. + // When validating this against sklearn notice that sklearn divides it + // against n_samples. + let dataset = Dataset::new( + array![[-1.0], [0.0], [1.0]], + array![[-1.0, 1.0], [0.0, -1.5], [1.0, 1.3]], + ); + + // input for prediction + let t = array![[2.0], [3.0], [4.0]]; + let model = MultiTaskElasticNet::lasso() + .penalty(1e-8) + .fit(&dataset) + .unwrap(); + assert_abs_diff_eq!(model.intercept(), &array![0., -1.5]); + assert_abs_diff_eq!(model.hyperplane(), &array![[0.], [2.65]], epsilon = 1e-6); + assert_abs_diff_eq!( + model.predict(&t), + array![[0., 3.79999998], [0., 6.44999996], [0., 9.09999995]], + epsilon = 1e-6 + ); + assert_abs_diff_eq!(model.duality_gap(), 0.0); + + let model = MultiTaskElasticNet::lasso() + .penalty(0.1) + .fit(&dataset) + .unwrap(); + assert_abs_diff_eq!(model.intercept(), &array![0., -1.4]); + assert_abs_diff_eq!(model.hyperplane(), &array![[0.], [2.5]], epsilon = 1e-6); + assert_abs_diff_eq!( + model.predict(&t), + &array![[0., 3.6], [0., 6.1], [0., 8.6]], + epsilon = 1e-6 + ); + assert_abs_diff_eq!(model.duality_gap(), 0.0); + + let model = MultiTaskElasticNet::lasso() + .penalty(0.5) + .fit(&dataset) + .unwrap(); + assert_abs_diff_eq!(model.intercept(), &array![0., -1.]); + assert_abs_diff_eq!(model.hyperplane(), &array![[0.], [1.9]], epsilon = 1e-6); + assert_abs_diff_eq!( + model.predict(&t), + &array![[0., 2.8], [0., 4.7], [0., 6.6]], + epsilon = 1e-6 + ); + assert_abs_diff_eq!(model.duality_gap(), 0.0); + + let model = MultiTaskElasticNet::lasso() + .penalty(1.0) + .fit(&dataset) + .unwrap(); + assert_abs_diff_eq!(model.intercept(), &array![0., -0.5]); + assert_abs_diff_eq!(model.hyperplane(), &array![[0.0], [1.15]], epsilon = 1e-6); + assert_abs_diff_eq!( + model.predict(&t), + &array![[0., 1.8], [0., 2.95], [0., 4.1]], + epsilon = 1e-6 + ); + assert_abs_diff_eq!(model.duality_gap(), 0.0); + } + #[test] fn elastic_net_toy_example_works() { let dataset = Dataset::new(array![[-1.0], [0.0], [1.0]], array![-1.0, 0.0, 1.0]); diff --git a/algorithms/linfa-elasticnet/src/hyperparams.rs b/algorithms/linfa-elasticnet/src/hyperparams.rs index 661c0edf1..1649ed843 100644 --- a/algorithms/linfa-elasticnet/src/hyperparams.rs +++ b/algorithms/linfa-elasticnet/src/hyperparams.rs @@ -143,6 +143,7 @@ impl MultiTaskElasticNetValidParams { /// # Ok::<(), ElasticNetError>(()) /// ``` pub struct ElasticNetParams(ElasticNetValidParams); +pub struct MultiTaskElasticNetParams(MultiTaskElasticNetValidParams); impl Default for ElasticNetParams { fn default() -> Self { @@ -150,6 +151,12 @@ impl Default for ElasticNetParams { } } +impl Default for MultiTaskElasticNetParams { + fn default() -> Self { + Self::new() + } +} + /// Configure and fit a Elastic Net model impl ElasticNetParams { /// Create default elastic net hyper parameters @@ -216,6 +223,43 @@ impl ElasticNetParams { } } +impl MultiTaskElasticNetParams { + pub fn new() -> MultiTaskElasticNetParams { + Self(MultiTaskElasticNetValidParams { + penalty: F::one(), + l1_ratio: F::cast(0.5), + with_intercept: true, + max_iterations: 1000, + tolerance: F::cast(1e-4), + }) + } + + pub fn penalty(mut self, penalty: F) -> Self { + self.0.penalty = penalty; + self + } + + pub fn l1_ratio(mut self, l1_ratio: F) -> Self { + self.0.l1_ratio = l1_ratio; + self + } + + pub fn with_intercept(mut self, with_intercept: bool) -> Self { + self.0.with_intercept = with_intercept; + self + } + + pub fn tolerance(mut self, tolerance: F) -> Self { + self.0.tolerance = tolerance; + self + } + + pub fn max_iterations(mut self, max_iterations: u32) -> Self { + self.0.max_iterations = max_iterations; + self + } +} + impl ParamGuard for ElasticNetParams { type Checked = ElasticNetValidParams; type Error = ElasticNetError; @@ -244,3 +288,32 @@ impl ParamGuard for ElasticNetParams { Ok(self.0) } } + +impl ParamGuard for MultiTaskElasticNetParams { + type Checked = MultiTaskElasticNetValidParams; + type Error = ElasticNetError; + + /// Validate the hyper parameters + fn check_ref(&self) -> Result<&Self::Checked> { + if self.0.penalty.is_negative() { + Err(ElasticNetError::InvalidPenalty( + self.0.penalty.to_f32().unwrap(), + )) + } else if !(F::zero()..=F::one()).contains(&self.0.l1_ratio) { + Err(ElasticNetError::InvalidL1Ratio( + self.0.l1_ratio.to_f32().unwrap(), + )) + } else if self.0.tolerance.is_negative() { + Err(ElasticNetError::InvalidTolerance( + self.0.tolerance.to_f32().unwrap(), + )) + } else { + Ok(&self.0) + } + } + + fn check(self) -> Result { + self.check_ref()?; + Ok(self.0) + } +} diff --git a/algorithms/linfa-elasticnet/src/lib.rs b/algorithms/linfa-elasticnet/src/lib.rs index 04203000e..3ca803499 100644 --- a/algorithms/linfa-elasticnet/src/lib.rs +++ b/algorithms/linfa-elasticnet/src/lib.rs @@ -11,7 +11,10 @@ mod error; mod hyperparams; pub use error::{ElasticNetError, Result}; -pub use hyperparams::{ElasticNetParams, ElasticNetValidParams}; +pub use hyperparams::{ + ElasticNetParams, ElasticNetValidParams, MultiTaskElasticNetParams, + MultiTaskElasticNetValidParams, +}; #[cfg_attr( feature = "serde", @@ -87,25 +90,17 @@ pub struct MultiTaskElasticNet { } impl MultiTaskElasticNet { - /// Create a default parameter set for construction of MultiTaskElasticNet model - /// - /// By default, an intercept will be fitted. To disable fitting an - /// intercept, call `.with_intercept(false)` before calling `.fit()`. - /// - /// To additionally normalize the feature matrix before fitting, call - /// `fit_intercept_and_normalize()` before calling `fit()`. The feature - /// matrix will not be normalized by default. - pub fn params() -> ElasticNetParams { - ElasticNetParams::new() + pub fn params() -> MultiTaskElasticNetParams { + MultiTaskElasticNetParams::new() } /// Create a multi-task ridge only model - pub fn ridge() -> ElasticNetParams { - ElasticNetParams::new().l1_ratio(F::zero()) + pub fn ridge() -> MultiTaskElasticNetParams { + MultiTaskElasticNetParams::new().l1_ratio(F::zero()) } /// Create a multi-task Lasso only model - pub fn lasso() -> ElasticNetParams { - ElasticNetParams::new().l1_ratio(F::one()) + pub fn lasso() -> MultiTaskElasticNetParams { + MultiTaskElasticNetParams::new().l1_ratio(F::one()) } } From 3d981ef42a574d0d667d00373878d6e7356add38 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Sun, 23 Jan 2022 19:08:42 +0100 Subject: [PATCH 15/28] pass comments --- algorithms/linfa-elasticnet/src/algorithm.rs | 54 +++++++------------ .../linfa-elasticnet/src/hyperparams.rs | 30 ----------- algorithms/linfa-elasticnet/src/lib.rs | 12 +++-- src/dataset/impl_targets.rs | 14 +---- src/dataset/mod.rs | 8 --- 5 files changed, 28 insertions(+), 90 deletions(-) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index 2a45195e7..3d1a31e37 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -1,5 +1,4 @@ use approx::{abs_diff_eq, abs_diff_ne}; -use ndarray::linalg::general_mat_mul; use ndarray::{ s, Array1, Array2, ArrayBase, ArrayView1, ArrayView2, Axis, CowArray, Data, Ix1, Ix2, }; @@ -7,7 +6,7 @@ use ndarray_linalg::{Inverse, Lapack}; use linfa::traits::{Fit, PredictInplace}; use linfa::{ - dataset::{AsTargets, MultiTaskTarget, Records}, + dataset::{AsTargets, Records}, DatasetBase, Float, }; @@ -16,11 +15,10 @@ use super::{ ElasticNet, ElasticNetError, MultiTaskElasticNet, Result, }; -impl Fit, T, ElasticNetError> for ElasticNetValidParams +impl Fit, ArrayBase, ElasticNetError> for ElasticNetValidParams where F: Float + Lapack, D: Data, - T: AsTargets, { type Object = ElasticNet; @@ -34,7 +32,10 @@ where /// Returns a `FittedElasticNet` object which contains the fitted /// parameters and can be used to `predict` values of the target variable /// for new feature values. - fn fit(&self, dataset: &DatasetBase, T>) -> Result { + fn fit( + &self, + dataset: &DatasetBase, ArrayBase>, + ) -> Result { let target = dataset.try_single_target()?; let (intercept, y) = compute_intercept(self.with_intercept(), target); @@ -62,8 +63,7 @@ where } } -impl Fit, ArrayBase, ElasticNetError> - for MultiTaskElasticNetValidParams +impl Fit, ArrayBase, ElasticNetError> for ElasticNetValidParams where F: Float + Lapack, D: Data, @@ -107,19 +107,9 @@ where self.penalty(), ); - let mut y_est = Array2::::zeros((dataset.nsamples(), dataset.ntargets())); - general_mat_mul( - F::one(), - &dataset.records(), - &hyperplane, - F::one(), - &mut y_est, - ); - // Adding intercept - for i in 0..dataset.nsamples() { - let _res = &y_est.slice(s![i, ..]) + &intercept; - y_est.slice_mut(s![i, ..]).assign(&_res); - } + let y_est = dataset.records().dot(&hyperplane); + + y_est = y_est + &intercept; let variance = variance_params_multi_task(dataset, y_est); @@ -165,11 +155,11 @@ impl> PredictInplace, Array2> "The number of data points must match the number of output targets." ); - general_mat_mul(F::one(), &x, &self.hyperplane, F::one(), y); - // *y = *y + self.intercept; + let y = x.dot(&self.hyperplane) + &self.intercept; } fn default_target(&self, x: &ArrayBase) -> Array2 { + // TODO: fix, should be (x.nrows(), y.ncols()) Array2::zeros((x.nrows(), x.nrows())) } } @@ -315,9 +305,10 @@ fn coordinate_descent<'a, F: Float>( let old_w_j = w[j]; let x_j: ArrayView1 = x.slice(s![.., j]); if abs_diff_ne!(old_w_j, F::zero()) { - for i in 0..x.shape()[0] { - r[i] += x_j[i] * old_w_j; - } + // for i in 0..x.shape()[0] { + // r[i] += x_j[i] * old_w_j; + // } + r.scaled_add(old_w_j, &x_j); } let tmp: F = x_j.dot(&r); w[j] = tmp.signum() * F::max(tmp.abs() - n_samples * l1_ratio * penalty, F::zero()) @@ -420,9 +411,7 @@ fn block_soft_thresholding<'a, F: Float>(x: ArrayView1<'a, F>, threshold: F) -> let mut _res = Array1::::zeros(x.len()); if norm_x >= threshold { let scal = F::one() - threshold / norm_x; - for i in 0..x.len() { - _res[i] = scal * x[i]; - } + _res = x * scal; } _res } @@ -471,9 +460,7 @@ fn duality_gap_mtl<'a, F: Float>( let n_tasks = y.shape()[1]; let l1_reg = l1_ratio * penalty * n_samples; let l2_reg = (F::one() - l1_ratio) * penalty * n_samples; - let mut xta = Array2::::zeros((n_features, n_tasks)); - general_mat_mul(F::one(), &x.t(), &r, F::one(), &mut xta); - xta = xta - &w * l2_reg; + let xta = x.t().dot(&r) - &w * l2_reg; let dual_norm_xta = xta .map_axis(Axis(1), |x| x.dot(&x).sqrt()) @@ -488,7 +475,7 @@ fn duality_gap_mtl<'a, F: Float>( (F::one(), r_norm2) }; let mut rty = Array2::::zeros((n_tasks, n_tasks)); - general_mat_mul(F::one(), &r.t(), &y, F::one(), &mut rty); + let rty = r.t().dot(&y); let trace_rty = rty.diag().sum(); let l21_norm = w.map_axis(Axis(1), |wj| (wj.dot(&wj)).sqrt()).sum(); gap += l1_reg * l21_norm - const_ * trace_rty @@ -570,7 +557,6 @@ pub fn compute_intercept( mod tests { use super::{block_coordinate_descent, coordinate_descent, ElasticNet, MultiTaskElasticNet}; use approx::assert_abs_diff_eq; - use ndarray::linalg::general_mat_mul; use ndarray::{array, s, Array, Array1, Array2, Axis}; use ndarray_rand::rand::SeedableRng; use ndarray_rand::rand_distr::Uniform; @@ -624,7 +610,7 @@ mod tests { beta: &Array2, ) -> f64 { let mut resid = Array2::::zeros((x.shape()[0], y.shape()[1])); - general_mat_mul(1., &x, &beta, 1., &mut resid); + let resid = x.dot(&beta); resid = &resid * -1.; for i in 0..resid.shape()[0] { let _res = &resid.slice(s![i, ..]) - intercept; diff --git a/algorithms/linfa-elasticnet/src/hyperparams.rs b/algorithms/linfa-elasticnet/src/hyperparams.rs index 1649ed843..236fc596e 100644 --- a/algorithms/linfa-elasticnet/src/hyperparams.rs +++ b/algorithms/linfa-elasticnet/src/hyperparams.rs @@ -23,14 +23,6 @@ pub struct ElasticNetValidParams { tolerance: F, } -pub struct MultiTaskElasticNetValidParams { - penalty: F, - l1_ratio: F, - with_intercept: bool, - max_iterations: u32, - tolerance: F, -} - impl ElasticNetValidParams { pub fn penalty(&self) -> F { self.penalty @@ -53,28 +45,6 @@ impl ElasticNetValidParams { } } -impl MultiTaskElasticNetValidParams { - pub fn penalty(&self) -> F { - self.penalty - } - - pub fn l1_ratio(&self) -> F { - self.l1_ratio - } - - pub fn with_intercept(&self) -> bool { - self.with_intercept - } - - pub fn max_iterations(&self) -> u32 { - self.max_iterations - } - - pub fn tolerance(&self) -> F { - self.tolerance - } -} - /// A hyper-parameter set during construction /// /// Configures and minimizes the following objective function: diff --git a/algorithms/linfa-elasticnet/src/lib.rs b/algorithms/linfa-elasticnet/src/lib.rs index 3ca803499..d2865093a 100644 --- a/algorithms/linfa-elasticnet/src/lib.rs +++ b/algorithms/linfa-elasticnet/src/lib.rs @@ -11,14 +11,11 @@ mod error; mod hyperparams; pub use error::{ElasticNetError, Result}; -pub use hyperparams::{ - ElasticNetParams, ElasticNetValidParams, MultiTaskElasticNetParams, - MultiTaskElasticNetValidParams, -}; +pub use hyperparams::{ElasticNetParams, ElasticNetValidParams, MultiTaskElasticNetParams}; #[cfg_attr( feature = "serde", - derive(Serialize, Deserialize), + derive(Serialize, Deserialize, Debug, Clone, PartialEq), serde(crate = "serde_crate") )] /// Elastic Net model @@ -69,6 +66,11 @@ impl ElasticNet { } } +#[cfg_attr( + feature = "serde", + derive(Serialize, Deserialize, Debug, Clone, PartialEq), + serde(crate = "serde_crate") +)] /// MultiTask Elastic Net model /// /// This struct contains the parameters of a fitted multi-task elastic net model. This includes the diff --git a/src/dataset/impl_targets.rs b/src/dataset/impl_targets.rs index 6949acbd0..a72170e46 100644 --- a/src/dataset/impl_targets.rs +++ b/src/dataset/impl_targets.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use super::{ AsProbabilities, AsTargets, AsTargetsMut, CountedTargets, DatasetBase, FromTargetArray, Label, - Labels, MultiTaskTarget, Pr, Records, + Labels, Pr, Records, }; use ndarray::{ Array1, Array2, ArrayBase, ArrayView2, ArrayViewMut2, Axis, CowArray, Data, DataMut, Dimension, @@ -17,18 +17,6 @@ impl<'a, L, S: Data> AsTargets for ArrayBase { } } -impl<'a, L, S: Data> MultiTaskTarget for ArrayBase { - type Elem = L; - - fn nsamples(&self) -> usize { - self.shape()[0] - } - - fn ntasks(&self) -> usize { - self.shape()[1] - } -} - impl<'a, L: Clone + 'a, S: Data> FromTargetArray<'a, L> for ArrayBase { type Owned = ArrayBase, Ix2>; type View = ArrayBase, Ix2>; diff --git a/src/dataset/mod.rs b/src/dataset/mod.rs index f21a45b95..dc19a3cc0 100644 --- a/src/dataset/mod.rs +++ b/src/dataset/mod.rs @@ -198,14 +198,6 @@ pub trait Records: Sized { fn nfeatures(&self) -> usize; } -/// MultiTask trait -pub trait MultiTaskTarget: Sized { - type Elem; - - fn nsamples(&self) -> usize; - fn ntasks(&self) -> usize; -} - /// Return a reference to single or multiple target variables pub trait AsTargets { type Elem; From 36f4b70545dceefeb50131fe2b2128ca452ae35b Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Sun, 23 Jan 2022 19:57:26 +0100 Subject: [PATCH 16/28] CLN files --- algorithms/linfa-elasticnet/src/algorithm.rs | 52 ++----------- .../linfa-elasticnet/src/hyperparams.rs | 73 ------------------- algorithms/linfa-elasticnet/src/lib.rs | 14 ++-- 3 files changed, 15 insertions(+), 124 deletions(-) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index 3d1a31e37..868415cf5 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -11,8 +11,7 @@ use linfa::{ }; use super::{ - hyperparams::{ElasticNetValidParams, MultiTaskElasticNetValidParams}, - ElasticNet, ElasticNetError, MultiTaskElasticNet, Result, + hyperparams::ElasticNetValidParams, ElasticNet, ElasticNetError, MultiTaskElasticNet, Result, }; impl Fit, ArrayBase, ElasticNetError> for ElasticNetValidParams @@ -86,12 +85,12 @@ where ) -> Result { let target = dataset.targets(); let nsamples = dataset.nsamples(); - let ntasks = target.ntasks(); + let ntasks = dataset.targets().ncols(); let mut intercept = Array1::::zeros(ntasks); let mut y = Array2::::zeros((nsamples, ntasks)); - for t in 0..target.ntasks() { + for t in 0..ntasks { let (intercept_t, y_t) = compute_intercept(self.with_intercept(), target.slice(s![.., t])); intercept[t] = intercept_t; @@ -107,11 +106,9 @@ where self.penalty(), ); - let y_est = dataset.records().dot(&hyperplane); - - y_est = y_est + &intercept; - - let variance = variance_params_multi_task(dataset, y_est); + // let y_est = dataset.records().dot(&hyperplane) + &intercept; + // let variance = variance_params_multi_task(dataset, y_est); + let variance = Ok(Array1::::zeros(dataset.nfeatures())); Ok(MultiTaskElasticNet { hyperplane, @@ -411,7 +408,7 @@ fn block_soft_thresholding<'a, F: Float>(x: ArrayView1<'a, F>, threshold: F) -> let mut _res = Array1::::zeros(x.len()); if norm_x >= threshold { let scal = F::one() - threshold / norm_x; - _res = x * scal; + _res = &x * scal; } _res } @@ -456,8 +453,6 @@ fn duality_gap_mtl<'a, F: Float>( ) -> F { let half = F::cast(0.5); let n_samples = F::cast(x.shape()[0]); - let n_features = x.shape()[1]; - let n_tasks = y.shape()[1]; let l1_reg = l1_ratio * penalty * n_samples; let l2_reg = (F::one() - l1_ratio) * penalty * n_samples; let xta = x.t().dot(&r) - &w * l2_reg; @@ -474,7 +469,6 @@ fn duality_gap_mtl<'a, F: Float>( } else { (F::one(), r_norm2) }; - let mut rty = Array2::::zeros((n_tasks, n_tasks)); let rty = r.t().dot(&y); let trace_rty = rty.diag().sum(); let l21_norm = w.map_axis(Axis(1), |wj| (wj.dot(&wj)).sqrt()).sum(); @@ -508,36 +502,6 @@ fn variance_params, D: Data> } } -fn variance_params_multi_task( - ds: &DatasetBase, T>, - y_est: Array2, -) -> Result> -where - F: Float + Lapack, - T: AsTargets + MultiTaskTarget, - D: Data, -{ - let nfeatures = ds.nfeatures(); - let nsamples = ds.nsamples(); - let ntasks = ds.targets().ntasks(); - - let target = ds.targets().as_multi_targets(); - - if nsamples < nfeatures + 1 { - return Err(ElasticNetError::NotEnoughSamples); - } - - let var_target = - (&target - &y_est).mapv(|x| x * x).sum() / F::cast(ntasks * (nsamples - nfeatures)); - - let inv_cov = ds.records().t().dot(ds.records()).inv(); - - match inv_cov { - Ok(inv_cov) => Ok(inv_cov.diag().mapv(|x| var_target * x)), - Err(_) => Err(ElasticNetError::IllConditioned), - } -} - /// Compute the intercept as the mean of `y` and center `y` if an intercept should /// be used, use `0.0` as intercept and leave `y` unchanged otherwise. pub fn compute_intercept( @@ -610,7 +574,7 @@ mod tests { beta: &Array2, ) -> f64 { let mut resid = Array2::::zeros((x.shape()[0], y.shape()[1])); - let resid = x.dot(&beta); + let resid = x.dot(beta); resid = &resid * -1.; for i in 0..resid.shape()[0] { let _res = &resid.slice(s![i, ..]) - intercept; diff --git a/algorithms/linfa-elasticnet/src/hyperparams.rs b/algorithms/linfa-elasticnet/src/hyperparams.rs index 236fc596e..f8dc9a96d 100644 --- a/algorithms/linfa-elasticnet/src/hyperparams.rs +++ b/algorithms/linfa-elasticnet/src/hyperparams.rs @@ -113,7 +113,6 @@ impl ElasticNetValidParams { /// # Ok::<(), ElasticNetError>(()) /// ``` pub struct ElasticNetParams(ElasticNetValidParams); -pub struct MultiTaskElasticNetParams(MultiTaskElasticNetValidParams); impl Default for ElasticNetParams { fn default() -> Self { @@ -121,12 +120,6 @@ impl Default for ElasticNetParams { } } -impl Default for MultiTaskElasticNetParams { - fn default() -> Self { - Self::new() - } -} - /// Configure and fit a Elastic Net model impl ElasticNetParams { /// Create default elastic net hyper parameters @@ -193,43 +186,6 @@ impl ElasticNetParams { } } -impl MultiTaskElasticNetParams { - pub fn new() -> MultiTaskElasticNetParams { - Self(MultiTaskElasticNetValidParams { - penalty: F::one(), - l1_ratio: F::cast(0.5), - with_intercept: true, - max_iterations: 1000, - tolerance: F::cast(1e-4), - }) - } - - pub fn penalty(mut self, penalty: F) -> Self { - self.0.penalty = penalty; - self - } - - pub fn l1_ratio(mut self, l1_ratio: F) -> Self { - self.0.l1_ratio = l1_ratio; - self - } - - pub fn with_intercept(mut self, with_intercept: bool) -> Self { - self.0.with_intercept = with_intercept; - self - } - - pub fn tolerance(mut self, tolerance: F) -> Self { - self.0.tolerance = tolerance; - self - } - - pub fn max_iterations(mut self, max_iterations: u32) -> Self { - self.0.max_iterations = max_iterations; - self - } -} - impl ParamGuard for ElasticNetParams { type Checked = ElasticNetValidParams; type Error = ElasticNetError; @@ -258,32 +214,3 @@ impl ParamGuard for ElasticNetParams { Ok(self.0) } } - -impl ParamGuard for MultiTaskElasticNetParams { - type Checked = MultiTaskElasticNetValidParams; - type Error = ElasticNetError; - - /// Validate the hyper parameters - fn check_ref(&self) -> Result<&Self::Checked> { - if self.0.penalty.is_negative() { - Err(ElasticNetError::InvalidPenalty( - self.0.penalty.to_f32().unwrap(), - )) - } else if !(F::zero()..=F::one()).contains(&self.0.l1_ratio) { - Err(ElasticNetError::InvalidL1Ratio( - self.0.l1_ratio.to_f32().unwrap(), - )) - } else if self.0.tolerance.is_negative() { - Err(ElasticNetError::InvalidTolerance( - self.0.tolerance.to_f32().unwrap(), - )) - } else { - Ok(&self.0) - } - } - - fn check(self) -> Result { - self.check_ref()?; - Ok(self.0) - } -} diff --git a/algorithms/linfa-elasticnet/src/lib.rs b/algorithms/linfa-elasticnet/src/lib.rs index d2865093a..1ecf13de0 100644 --- a/algorithms/linfa-elasticnet/src/lib.rs +++ b/algorithms/linfa-elasticnet/src/lib.rs @@ -11,7 +11,7 @@ mod error; mod hyperparams; pub use error::{ElasticNetError, Result}; -pub use hyperparams::{ElasticNetParams, ElasticNetValidParams, MultiTaskElasticNetParams}; +pub use hyperparams::{ElasticNetParams, ElasticNetValidParams}; #[cfg_attr( feature = "serde", @@ -92,17 +92,17 @@ pub struct MultiTaskElasticNet { } impl MultiTaskElasticNet { - pub fn params() -> MultiTaskElasticNetParams { - MultiTaskElasticNetParams::new() + pub fn params() -> ElasticNetParams { + ElasticNetParams::new() } /// Create a multi-task ridge only model - pub fn ridge() -> MultiTaskElasticNetParams { - MultiTaskElasticNetParams::new().l1_ratio(F::zero()) + pub fn ridge() -> ElasticNetParams { + ElasticNetParams::new().l1_ratio(F::zero()) } /// Create a multi-task Lasso only model - pub fn lasso() -> MultiTaskElasticNetParams { - MultiTaskElasticNetParams::new().l1_ratio(F::one()) + pub fn lasso() -> ElasticNetParams { + ElasticNetParams::new().l1_ratio(F::one()) } } From 15b280b718e81e995ee02bbdf78557e77e3304ef Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Sun, 23 Jan 2022 21:39:46 +0100 Subject: [PATCH 17/28] cleaner implementation --- algorithms/linfa-elasticnet/src/algorithm.rs | 30 +++++++++---------- .../linfa-elasticnet/src/hyperparams.rs | 26 ++++++++++------ algorithms/linfa-elasticnet/src/lib.rs | 14 ++++----- 3 files changed, 38 insertions(+), 32 deletions(-) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index 868415cf5..84eadbbdd 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -11,13 +11,15 @@ use linfa::{ }; use super::{ - hyperparams::ElasticNetValidParams, ElasticNet, ElasticNetError, MultiTaskElasticNet, Result, + hyperparams::{ElasticNetValidParams, MultiTaskElasticNetValidParams}, + ElasticNet, ElasticNetError, MultiTaskElasticNet, Result, }; -impl Fit, ArrayBase, ElasticNetError> for ElasticNetValidParams +impl Fit, T, ElasticNetError> for ElasticNetValidParams where F: Float + Lapack, D: Data, + T: AsTargets, { type Object = ElasticNet; @@ -31,10 +33,7 @@ where /// Returns a `FittedElasticNet` object which contains the fitted /// parameters and can be used to `predict` values of the target variable /// for new feature values. - fn fit( - &self, - dataset: &DatasetBase, ArrayBase>, - ) -> Result { + fn fit(&self, dataset: &DatasetBase, T>) -> Result { let target = dataset.try_single_target()?; let (intercept, y) = compute_intercept(self.with_intercept(), target); @@ -62,7 +61,8 @@ where } } -impl Fit, ArrayBase, ElasticNetError> for ElasticNetValidParams +impl Fit, ArrayBase, ElasticNetError> + for MultiTaskElasticNetValidParams where F: Float + Lapack, D: Data, @@ -152,7 +152,7 @@ impl> PredictInplace, Array2> "The number of data points must match the number of output targets." ); - let y = x.dot(&self.hyperplane) + &self.intercept; + *y = x.dot(&self.hyperplane) + &self.intercept; } fn default_target(&self, x: &ArrayBase) -> Array2 { @@ -573,14 +573,9 @@ mod tests { intercept: &Array1, beta: &Array2, ) -> f64 { - let mut resid = Array2::::zeros((x.shape()[0], y.shape()[1])); - let resid = x.dot(beta); + let mut resid = x.dot(beta); resid = &resid * -1.; - for i in 0..resid.shape()[0] { - let _res = &resid.slice(s![i, ..]) - intercept; - resid.slice_mut(s![i, ..]).assign(&_res); - } - resid = resid + y; + resid = &resid - intercept + y; let mut datafit = resid.map(|r| r.powi(2)).sum(); datafit /= 2.0 * x.shape()[0] as f64; datafit @@ -704,7 +699,10 @@ mod tests { #[test] fn lasso_zero_works() { - let dataset = Dataset::from((array![[0.], [0.], [0.]], array![0., 0., 0.])); + // let dataset = Dataset::from((array![[0.], [0.], [0.]], array![0., 0., 0.])); + let x = array![[0.], [0.], [0.]]; + let y = array![0., 0., 0.]; + let dataset = Dataset::from((x, y)); let model = ElasticNet::params() .l1_ratio(1.0) diff --git a/algorithms/linfa-elasticnet/src/hyperparams.rs b/algorithms/linfa-elasticnet/src/hyperparams.rs index f8dc9a96d..abd0f8e3c 100644 --- a/algorithms/linfa-elasticnet/src/hyperparams.rs +++ b/algorithms/linfa-elasticnet/src/hyperparams.rs @@ -15,7 +15,7 @@ use super::Result; /// A verified hyper-parameter set ready for the estimation of a ElasticNet regression model /// /// See [`ElasticNetParams`](crate::ElasticNetParams) for more informations. -pub struct ElasticNetValidParams { +pub struct ElasticNetValidParamsBase { penalty: F, l1_ratio: F, with_intercept: bool, @@ -23,7 +23,10 @@ pub struct ElasticNetValidParams { tolerance: F, } -impl ElasticNetValidParams { +pub type ElasticNetValidParams = ElasticNetValidParamsBase; +pub type MultiTaskElasticNetValidParams = ElasticNetValidParamsBase; + +impl ElasticNetValidParamsBase { pub fn penalty(&self) -> F { self.penalty } @@ -112,16 +115,21 @@ impl ElasticNetValidParams { /// let model = checked_params.fit(&ds)?; /// # Ok::<(), ElasticNetError>(()) /// ``` -pub struct ElasticNetParams(ElasticNetValidParams); +pub struct ElasticNetParamsBase( + ElasticNetValidParamsBase, +); + +pub type ElasticNetParams = ElasticNetParamsBase; +pub type MultiTaskElasticNetParams = ElasticNetParamsBase; -impl Default for ElasticNetParams { +impl Default for ElasticNetParamsBase { fn default() -> Self { Self::new() } } /// Configure and fit a Elastic Net model -impl ElasticNetParams { +impl ElasticNetParamsBase { /// Create default elastic net hyper parameters /// /// By default, an intercept will be fitted. To disable fitting an @@ -130,8 +138,8 @@ impl ElasticNetParams { /// To additionally normalize the feature matrix before fitting, call /// `fit_intercept_and_normalize()` before calling `fit()`. The feature /// matrix will not be normalized by default. - pub fn new() -> ElasticNetParams { - Self(ElasticNetValidParams { + pub fn new() -> ElasticNetParamsBase { + Self(ElasticNetValidParamsBase { penalty: F::one(), l1_ratio: F::cast(0.5), with_intercept: true, @@ -186,8 +194,8 @@ impl ElasticNetParams { } } -impl ParamGuard for ElasticNetParams { - type Checked = ElasticNetValidParams; +impl ParamGuard for ElasticNetParamsBase { + type Checked = ElasticNetValidParamsBase; type Error = ElasticNetError; /// Validate the hyper parameters diff --git a/algorithms/linfa-elasticnet/src/lib.rs b/algorithms/linfa-elasticnet/src/lib.rs index 1ecf13de0..d2865093a 100644 --- a/algorithms/linfa-elasticnet/src/lib.rs +++ b/algorithms/linfa-elasticnet/src/lib.rs @@ -11,7 +11,7 @@ mod error; mod hyperparams; pub use error::{ElasticNetError, Result}; -pub use hyperparams::{ElasticNetParams, ElasticNetValidParams}; +pub use hyperparams::{ElasticNetParams, ElasticNetValidParams, MultiTaskElasticNetParams}; #[cfg_attr( feature = "serde", @@ -92,17 +92,17 @@ pub struct MultiTaskElasticNet { } impl MultiTaskElasticNet { - pub fn params() -> ElasticNetParams { - ElasticNetParams::new() + pub fn params() -> MultiTaskElasticNetParams { + MultiTaskElasticNetParams::new() } /// Create a multi-task ridge only model - pub fn ridge() -> ElasticNetParams { - ElasticNetParams::new().l1_ratio(F::zero()) + pub fn ridge() -> MultiTaskElasticNetParams { + MultiTaskElasticNetParams::new().l1_ratio(F::zero()) } /// Create a multi-task Lasso only model - pub fn lasso() -> ElasticNetParams { - ElasticNetParams::new().l1_ratio(F::one()) + pub fn lasso() -> MultiTaskElasticNetParams { + MultiTaskElasticNetParams::new().l1_ratio(F::one()) } } From d81a5cdac265673314cfb225cb27c928ed84261e Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Sun, 23 Jan 2022 21:47:15 +0100 Subject: [PATCH 18/28] cln tests --- algorithms/linfa-elasticnet/src/algorithm.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index 84eadbbdd..b7c1c4470 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -699,10 +699,7 @@ mod tests { #[test] fn lasso_zero_works() { - // let dataset = Dataset::from((array![[0.], [0.], [0.]], array![0., 0., 0.])); - let x = array![[0.], [0.], [0.]]; - let y = array![0., 0., 0.]; - let dataset = Dataset::from((x, y)); + let dataset = Dataset::from((array![[0.], [0.], [0.]], array![0., 0., 0.])); let model = ElasticNet::params() .l1_ratio(1.0) From a21bc1a2c9c74edc410036783bee7726842c1ecc Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Sat, 19 Feb 2022 19:45:42 +0100 Subject: [PATCH 19/28] changed map into fold --- algorithms/linfa-elasticnet/src/algorithm.rs | 38 ++++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index b7c1c4470..c09586a82 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -108,6 +108,7 @@ where // let y_est = dataset.records().dot(&hyperplane) + &intercept; // let variance = variance_params_multi_task(dataset, y_est); + // TODO: Compute variance let variance = Ok(Array1::::zeros(dataset.nfeatures())); Ok(MultiTaskElasticNet { @@ -302,18 +303,13 @@ fn coordinate_descent<'a, F: Float>( let old_w_j = w[j]; let x_j: ArrayView1 = x.slice(s![.., j]); if abs_diff_ne!(old_w_j, F::zero()) { - // for i in 0..x.shape()[0] { - // r[i] += x_j[i] * old_w_j; - // } r.scaled_add(old_w_j, &x_j); } let tmp: F = x_j.dot(&r); w[j] = tmp.signum() * F::max(tmp.abs() - n_samples * l1_ratio * penalty, F::zero()) / (norm_cols_x[j] + n_samples * (F::one() - l1_ratio) * penalty); if abs_diff_ne!(w[j], F::zero()) { - for i in 0..x.shape()[0] { - r[i] -= x_j[i] * w[j]; - } + r.scaled_add(-w[j], &x_j); } let d_w_j = (w[j] - old_w_j).abs(); d_w_max = F::max(d_w_max, d_w_j); @@ -353,7 +349,7 @@ fn block_coordinate_descent<'a, F: Float>( let norm_cols_x = x.map_axis(Axis(0), |col| col.dot(&col)); let mut gap = F::one() + tol; let d_w_tol = tol; - let tol = tol * y.map(|yij| yij.powi(2)).sum(); + let tol = tol * y.fold(F::zero(), |sum, &y_ij| sum + y_ij.powi(2)); while n_steps < max_steps { let mut w_max = F::zero(); let mut d_w_max = F::zero(); @@ -363,7 +359,9 @@ fn block_coordinate_descent<'a, F: Float>( } let old_w_j: ArrayView1 = w.slice(s![j, ..]); let x_j: ArrayView1 = x.slice(s![.., j]); - let norm_old_w_j = old_w_j.map(|wjt| wjt.powi(2)).sum().sqrt(); + let norm_old_w_j = old_w_j + .fold(F::zero(), |sum, &wjt| sum + wjt.powi(2)) + .sqrt(); if abs_diff_ne!(norm_old_w_j, F::zero()) { for i in 0..x.shape()[0] { for t in 0..n_tasks { @@ -376,7 +374,10 @@ fn block_coordinate_descent<'a, F: Float>( &(block_soft_thresholding(tmp.view(), n_samples * l1_ratio * penalty) / (norm_cols_x[j] + n_samples * (F::one() - l1_ratio) * penalty)), ); - let norm_w_j = w.slice(s![j, ..]).map(|wjt| wjt.powi(2)).sum().sqrt(); + let norm_w_j = w + .slice(s![j, ..]) + .fold(F::zero(), |sum, &wjt| sum + wjt.powi(2)) + .sqrt(); if abs_diff_ne!(norm_w_j, F::zero()) { for i in 0..x.shape()[0] { for t in 0..n_tasks { @@ -404,13 +405,12 @@ fn block_coordinate_descent<'a, F: Float>( } fn block_soft_thresholding<'a, F: Float>(x: ArrayView1<'a, F>, threshold: F) -> Array1 { - let norm_x = x.map(|&xi| xi.powi(2)).sum().sqrt(); - let mut _res = Array1::::zeros(x.len()); - if norm_x >= threshold { - let scal = F::one() - threshold / norm_x; - _res = &x * scal; + let norm_x = x.fold(F::zero(), |sum, &x_i| sum + x_i * x_i).sqrt(); + if norm_x < threshold { + return Array1::::zeros(x.len()); } - _res + let scale = F::one() - threshold / norm_x; + &x * scale } fn duality_gap<'a, F: Float>( @@ -460,8 +460,8 @@ fn duality_gap_mtl<'a, F: Float>( let dual_norm_xta = xta .map_axis(Axis(1), |x| x.dot(&x).sqrt()) .fold(F::zero(), |max_norm, &nrm| max_norm.max(nrm)); - let r_norm2 = r.map(|rij| rij.powi(2)).sum(); - let w_norm2 = w.map(|wij| wij.powi(2)).sum(); + let r_norm2 = r.fold(F::zero(), |sum, &rij| sum + rij.powi(2)); + let w_norm2 = w.fold(F::zero(), |sum, &wij| sum + wij.powi(2)); let (const_, mut gap) = if dual_norm_xta > l1_reg { let const_ = l1_reg / dual_norm_xta; let a_norm2 = r_norm2 * const_ * const_; @@ -576,7 +576,7 @@ mod tests { let mut resid = x.dot(beta); resid = &resid * -1.; resid = &resid - intercept + y; - let mut datafit = resid.map(|r| r.powi(2)).sum(); + let mut datafit = resid.fold(0., |sum, &r| sum + r.powi(2)); datafit /= 2.0 * x.shape()[0] as f64; datafit } @@ -590,7 +590,7 @@ mod tests { } fn elastic_net_mtl_penalty(beta: &Array2, alpha: f64) -> f64 { - let frob_norm = beta.map(|beta_ij| beta_ij.powi(2)).sum(); + let frob_norm = beta.fold(0., |sum, &beta_ij| sum + beta_ij.powi(2)); let l21_norm = beta .map_axis(Axis(1), |beta_j| (beta_j.dot(&beta_j)).sqrt()) .sum(); From 7f32afc59fae705dd9979ec9dc76997af7e4534f Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Sat, 19 Feb 2022 20:54:14 +0100 Subject: [PATCH 20/28] added tests for Enet and MTL --- algorithms/linfa-elasticnet/src/algorithm.rs | 114 ++++++++++++++++++- 1 file changed, 110 insertions(+), 4 deletions(-) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index c09586a82..97d8f476e 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -769,6 +769,30 @@ mod tests { array![[-1.0, 1.0], [0.0, -1.5], [1.0, 1.3]], ); + // no intercept fitting + let t = array![[2.0], [3.0], [4.0]]; + let model = MultiTaskElasticNet::lasso() + .with_intercept(false) + .penalty(0.01) + .fit(&dataset) + .unwrap(); + assert_abs_diff_eq!(model.intercept(), &array![0., 0.]); + assert_abs_diff_eq!( + model.hyperplane(), + &array![[0.9851659, 0.1477748]], + epsilon = 1e-6 + ); + assert_abs_diff_eq!( + model.predict(&t), + array![ + [1.9703319, 0.2955497], + [2.9554978, 0.4433246], + [3.9406638, 0.5910995] + ], + epsilon = 1e-6 + ); + assert_abs_diff_eq!(model.duality_gap(), 0.0); + // input for prediction let t = array![[2.0], [3.0], [4.0]]; let model = MultiTaskElasticNet::lasso() @@ -776,7 +800,7 @@ mod tests { .fit(&dataset) .unwrap(); assert_abs_diff_eq!(model.intercept(), &array![0., -1.5]); - assert_abs_diff_eq!(model.hyperplane(), &array![[0.], [2.65]], epsilon = 1e-6); + assert_abs_diff_eq!(model.hyperplane(), &array![[0., 2.65]], epsilon = 1e-6); assert_abs_diff_eq!( model.predict(&t), array![[0., 3.79999998], [0., 6.44999996], [0., 9.09999995]], @@ -789,7 +813,7 @@ mod tests { .fit(&dataset) .unwrap(); assert_abs_diff_eq!(model.intercept(), &array![0., -1.4]); - assert_abs_diff_eq!(model.hyperplane(), &array![[0.], [2.5]], epsilon = 1e-6); + assert_abs_diff_eq!(model.hyperplane(), &array![[0., 2.5]], epsilon = 1e-6); assert_abs_diff_eq!( model.predict(&t), &array![[0., 3.6], [0., 6.1], [0., 8.6]], @@ -802,7 +826,7 @@ mod tests { .fit(&dataset) .unwrap(); assert_abs_diff_eq!(model.intercept(), &array![0., -1.]); - assert_abs_diff_eq!(model.hyperplane(), &array![[0.], [1.9]], epsilon = 1e-6); + assert_abs_diff_eq!(model.hyperplane(), &array![[0., 1.9]], epsilon = 1e-6); assert_abs_diff_eq!( model.predict(&t), &array![[0., 2.8], [0., 4.7], [0., 6.6]], @@ -815,7 +839,7 @@ mod tests { .fit(&dataset) .unwrap(); assert_abs_diff_eq!(model.intercept(), &array![0., -0.5]); - assert_abs_diff_eq!(model.hyperplane(), &array![[0.0], [1.15]], epsilon = 1e-6); + assert_abs_diff_eq!(model.hyperplane(), &array![[0.0, 1.15]], epsilon = 1e-6); assert_abs_diff_eq!( model.predict(&t), &array![[0., 1.8], [0., 2.95], [0., 4.1]], @@ -861,6 +885,88 @@ mod tests { assert_abs_diff_eq!(model.duality_gap(), 0.0); } + #[test] + fn multitask_elasticnet_toy_example_works() { + // Test MultiTaskElasticNet on a toy example for various values of alpha + // and l1_ratio. When validating this against sklearn notice that sklearn + // divides it against n_samples. + let dataset = Dataset::new( + array![[-1.0], [0.0], [1.0]], + array![[-1.0, 1.0], [0.0, -1.5], [1.0, 1.3]], + ); + + // no intercept fitting + let t = array![[2.0], [3.0], [4.0]]; + let model = MultiTaskElasticNet::params() + .with_intercept(false) + .l1_ratio(0.3) + .penalty(0.1) + .fit(&dataset) + .unwrap(); + assert_abs_diff_eq!(model.intercept(), &array![0., 0.]); + assert_abs_diff_eq!( + model.hyperplane(), + &array![[0.86470395, 0.12970559]], + epsilon = 1e-6 + ); + assert_abs_diff_eq!( + model.predict(&t), + array![ + [1.7294079, 0.25941118], + [2.59411185, 0.38911678], + [3.4588158, 0.51882237] + ], + epsilon = 1e-6 + ); + assert_abs_diff_eq!(model.duality_gap(), 0.0, epsilon = 1e-12); + + // input for prediction + let t = array![[2.0], [3.0], [4.0]]; + let model = MultiTaskElasticNet::params() + .l1_ratio(0.3) + .penalty(0.1) + .fit(&dataset) + .unwrap(); + assert_abs_diff_eq!(model.intercept(), &array![0., 0.26666666], epsilon = 1e-6); + assert_abs_diff_eq!( + model.hyperplane(), + &array![[0.86470395, 0.12970559]], + epsilon = 1e-6 + ); + assert_abs_diff_eq!( + model.predict(&t), + array![ + [1.7294079, 0.52607785], + [2.59411185, 0.65578344], + [3.4588158, 0.78548904] + ], + epsilon = 1e-6 + ); + assert_abs_diff_eq!(model.duality_gap(), 0.0, epsilon = 1e-12); + + let model = MultiTaskElasticNet::params() + .l1_ratio(0.5) + .penalty(0.1) + .fit(&dataset) + .unwrap(); + assert_abs_diff_eq!(model.intercept(), &array![0., 0.2666666], epsilon = 1e-6); + assert_abs_diff_eq!( + model.hyperplane(), + &array![[0.861237, 0.12918555]], + epsilon = 1e-6 + ); + assert_abs_diff_eq!( + model.predict(&t), + &array![ + [1.722474, 0.52503777], + [2.583711, 0.65422332], + [3.44494799, 0.78340887] + ], + epsilon = 1e-6 + ); + assert_abs_diff_eq!(model.duality_gap(), 0.0, epsilon = 1e-12); + } + #[test] fn elastic_net_2d_toy_example_works() { let dataset = Dataset::new(array![[1.0, 0.0], [0.0, 1.0]], array![3.0, 2.0]); From 6c56e06764309237ae3673fb9a6ff1a00b48462a Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Sat, 19 Feb 2022 22:18:31 +0100 Subject: [PATCH 21/28] added incorrect target shape --- algorithms/linfa-elasticnet/src/error.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/algorithms/linfa-elasticnet/src/error.rs b/algorithms/linfa-elasticnet/src/error.rs index 9b44f258c..30fbe566b 100644 --- a/algorithms/linfa-elasticnet/src/error.rs +++ b/algorithms/linfa-elasticnet/src/error.rs @@ -25,6 +25,8 @@ pub enum ElasticNetError { InvalidPenalty(f32), #[error("invalid tolerance {0}")] InvalidTolerance(f32), + #[error("the target can either be a vector (ndim=1) or a matrix (ndim=2)")] + IncorrectTargetShape, #[error(transparent)] BaseCrate(#[from] linfa::Error), } From a07fb39a100b0437d188865d5af0fc0a3b8882b2 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Sat, 19 Feb 2022 22:19:01 +0100 Subject: [PATCH 22/28] WIP: made variance params generic over the number of tasks --- algorithms/linfa-elasticnet/src/algorithm.rs | 65 ++++++++------------ 1 file changed, 26 insertions(+), 39 deletions(-) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index 97d8f476e..57ae31379 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -1,6 +1,6 @@ use approx::{abs_diff_eq, abs_diff_ne}; use ndarray::{ - s, Array1, Array2, ArrayBase, ArrayView1, ArrayView2, Axis, CowArray, Data, Ix1, Ix2, + s, Array1, Array2, ArrayBase, ArrayView1, ArrayView2, Axis, CowArray, Data, Dimension, Ix1, Ix2, }; use ndarray_linalg::{Inverse, Lapack}; @@ -106,10 +106,8 @@ where self.penalty(), ); - // let y_est = dataset.records().dot(&hyperplane) + &intercept; - // let variance = variance_params_multi_task(dataset, y_est); - // TODO: Compute variance - let variance = Ok(Array1::::zeros(dataset.nfeatures())); + let y_est = dataset.records().dot(&hyperplane) + &intercept; + let variance = variance_params(dataset, y_est); Ok(MultiTaskElasticNet { hyperplane, @@ -242,34 +240,13 @@ impl MultiTaskElasticNet { } /// Calculate the Z score - pub fn z_score(&self) -> Result> { - self.variance - .as_ref() - .map(|variance| { - self.hyperplane - .iter() - .zip(variance.iter()) - .map(|(a, b)| *a / b.sqrt()) - .collect() - }) - .map_err(|err| err.clone()) + pub fn z_score(&self) -> Result> { + // TODO } /// Calculate the confidence level - pub fn confidence_95th(&self) -> Result> { - // the 95th percentile of our confidence level - let p = F::cast(1.645); - - self.variance - .as_ref() - .map(|variance| { - self.hyperplane - .iter() - .zip(variance.iter()) - .map(|(a, b)| (*a - p * b.sqrt(), *a + p * b.sqrt())) - .collect() - }) - .map_err(|err| err.clone()) + pub fn confidence_95th(&self) -> Result> { + // TODO } } @@ -364,9 +341,7 @@ fn block_coordinate_descent<'a, F: Float>( .sqrt(); if abs_diff_ne!(norm_old_w_j, F::zero()) { for i in 0..x.shape()[0] { - for t in 0..n_tasks { - r[[i, t]] += x_j[i] * old_w_j[t]; - } + r.slice_mut(s![i, ..]).scaled_add(x_j[i], &old_w_j); } } let tmp = x_j.dot(&r); @@ -477,22 +452,34 @@ fn duality_gap_mtl<'a, F: Float>( gap } -fn variance_params, D: Data>( - ds: &DatasetBase, T>, - y_est: Array1, +fn variance_params, D: Data, I: Dimension>( + ds: &DatasetBase, ArrayBase>, + y_est: ArrayBase, ) -> Result> { let nfeatures = ds.nfeatures(); let nsamples = ds.nsamples(); - // try to convert targets into a single target - let target = ds.try_single_target()?; + let target = ds.targets(); + let ndim = target.ndim(); + + let ntasks: usize = match ndim { + 1 => 1, + 2 => *target.shape().last().unwrap(), + _ => 0, + }; + + // check that the dimension of the target is either a vector or a matrix + if ntasks == 0 { + return Err(ElasticNetError::IncorrectTargetShape); + } // check that we have enough samples if nsamples < nfeatures + 1 { return Err(ElasticNetError::NotEnoughSamples); } - let var_target = (&target - &y_est).mapv(|x| x * x).sum() / F::cast(nsamples - nfeatures); + let var_target = + (target - &y_est).mapv(|x| x * x).sum() / F::cast(ntasks * (nsamples - nfeatures)); let inv_cov = ds.records().t().dot(ds.records()).inv(); From 75023d957846f8017218b13e14e792793d535a81 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Sun, 20 Feb 2022 13:27:04 +0100 Subject: [PATCH 23/28] added z_score and confidence_95th for MTL --- algorithms/linfa-elasticnet/src/algorithm.rs | 29 ++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index 57ae31379..328c7768e 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -241,12 +241,37 @@ impl MultiTaskElasticNet { /// Calculate the Z score pub fn z_score(&self) -> Result> { - // TODO + self.variance + .as_ref() + .map(|variance| { + self.hyperplane + .axis_iter(Axis(0)) + .zip(variance.iter()) + .map(|(a, b)| a.to_owned() / b.sqrt()) + .collect() + }) + .map_err(|err| err.clone()) } /// Calculate the confidence level pub fn confidence_95th(&self) -> Result> { - // TODO + // the 95th percentile of our confidence level + let p = F::cast(1.645); + + self.variance + .as_ref() + .map(|variance| { + self.hyperplane + .axis_iter(Axis(1)) + .map(|coef| { + coef.iter() + .zip(variance.iter()) + .map(|(a, b)| (*a - p * b.sqrt(), *a + p * b.sqrt())) + .collect() + }) + .collect() + }) + .map_err(|err| err.clone()) } } From ba3a574802bd79c18057784e22801e61dbc0c277 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Sun, 20 Feb 2022 14:35:21 +0100 Subject: [PATCH 24/28] map instead of fold --- algorithms/linfa-elasticnet/src/algorithm.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index 328c7768e..9b4fc563b 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -351,7 +351,7 @@ fn block_coordinate_descent<'a, F: Float>( let norm_cols_x = x.map_axis(Axis(0), |col| col.dot(&col)); let mut gap = F::one() + tol; let d_w_tol = tol; - let tol = tol * y.fold(F::zero(), |sum, &y_ij| sum + y_ij.powi(2)); + let tol = tol * y.iter().map(|&y_ij| y_ij * y_ij).sum(); while n_steps < max_steps { let mut w_max = F::zero(); let mut d_w_max = F::zero(); From 114fc038ff829f6975fd615c25af26bd9e4702b4 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Sun, 20 Feb 2022 21:12:00 +0100 Subject: [PATCH 25/28] fix confidence interval and z-score --- algorithms/linfa-elasticnet/src/algorithm.rs | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index 9b4fc563b..dfd94ea30 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -244,11 +244,9 @@ impl MultiTaskElasticNet { self.variance .as_ref() .map(|variance| { - self.hyperplane - .axis_iter(Axis(0)) - .zip(variance.iter()) - .map(|(a, b)| a.to_owned() / b.sqrt()) - .collect() + ndarray::Zip::from(&self.hyperplane) + .and_broadcast(variance) + .map_collect(|a, b| *a / b.sqrt()) }) .map_err(|err| err.clone()) } @@ -261,15 +259,9 @@ impl MultiTaskElasticNet { self.variance .as_ref() .map(|variance| { - self.hyperplane - .axis_iter(Axis(1)) - .map(|coef| { - coef.iter() - .zip(variance.iter()) - .map(|(a, b)| (*a - p * b.sqrt(), *a + p * b.sqrt())) - .collect() - }) - .collect() + ndarray::Zip::from(&self.hyperplane) + .and_broadcast(variance) + .map_collect(|a, b| (*a - p * b.sqrt(), *a + p * b.sqrt())) }) .map_err(|err| err.clone()) } From 30744c0a037d8c904e07e8ac3251a7c24ba2937b Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Mon, 21 Feb 2022 11:09:25 +0100 Subject: [PATCH 26/28] converted back fold to map --- algorithms/linfa-elasticnet/src/algorithm.rs | 25 +++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index dfd94ea30..0961f831c 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -107,6 +107,8 @@ where ); let y_est = dataset.records().dot(&hyperplane) + &intercept; + + // try to calculate the variance let variance = variance_params(dataset, y_est); Ok(MultiTaskElasticNet { @@ -353,9 +355,7 @@ fn block_coordinate_descent<'a, F: Float>( } let old_w_j: ArrayView1 = w.slice(s![j, ..]); let x_j: ArrayView1 = x.slice(s![.., j]); - let norm_old_w_j = old_w_j - .fold(F::zero(), |sum, &wjt| sum + wjt.powi(2)) - .sqrt(); + let norm_old_w_j = old_w_j.map(|wjt| wjt.powi(2)).sum().sqrt(); if abs_diff_ne!(norm_old_w_j, F::zero()) { for i in 0..x.shape()[0] { r.slice_mut(s![i, ..]).scaled_add(x_j[i], &old_w_j); @@ -366,10 +366,7 @@ fn block_coordinate_descent<'a, F: Float>( &(block_soft_thresholding(tmp.view(), n_samples * l1_ratio * penalty) / (norm_cols_x[j] + n_samples * (F::one() - l1_ratio) * penalty)), ); - let norm_w_j = w - .slice(s![j, ..]) - .fold(F::zero(), |sum, &wjt| sum + wjt.powi(2)) - .sqrt(); + let norm_w_j = w.slice(s![j, ..]).map(|wjt| wjt.powi(2)).sum().sqrt(); if abs_diff_ne!(norm_w_j, F::zero()) { for i in 0..x.shape()[0] { for t in 0..n_tasks { @@ -384,7 +381,7 @@ fn block_coordinate_descent<'a, F: Float>( n_steps += 1; if n_steps == max_steps - 1 || abs_diff_eq!(w_max, F::zero()) || d_w_max / w_max < d_w_tol { - // We've hit one potentiel stopping criteria + // We've hit one potential stopping criteria // check duality gap for ultimate stopping criterion gap = duality_gap_mtl(x.view(), y.view(), w.view(), r.view(), l1_ratio, penalty); if gap < tol { @@ -397,7 +394,7 @@ fn block_coordinate_descent<'a, F: Float>( } fn block_soft_thresholding<'a, F: Float>(x: ArrayView1<'a, F>, threshold: F) -> Array1 { - let norm_x = x.fold(F::zero(), |sum, &x_i| sum + x_i * x_i).sqrt(); + let norm_x = x.map(|x| x.powi(2)).sum().sqrt(); if norm_x < threshold { return Array1::::zeros(x.len()); } @@ -429,7 +426,7 @@ fn duality_gap<'a, F: Float>( } else { (F::one(), r_norm2) }; - let l1_norm = w.fold(F::zero(), |sum, w_i| sum + w_i.abs()); + let l1_norm = w.map(|w_i| w_i.abs()).sum(); gap += l1_reg * l1_norm - const_ * r.dot(&y) + half * l2_reg * (F::one() + const_ * const_) * w_norm2; gap @@ -452,8 +449,8 @@ fn duality_gap_mtl<'a, F: Float>( let dual_norm_xta = xta .map_axis(Axis(1), |x| x.dot(&x).sqrt()) .fold(F::zero(), |max_norm, &nrm| max_norm.max(nrm)); - let r_norm2 = r.fold(F::zero(), |sum, &rij| sum + rij.powi(2)); - let w_norm2 = w.fold(F::zero(), |sum, &wij| sum + wij.powi(2)); + let r_norm2 = r.map(|rij| rij.powi(2)).sum(); + let w_norm2 = w.map(|wij| wij.powi(2)).sum(); let (const_, mut gap) = if dual_norm_xta > l1_reg { let const_ = l1_reg / dual_norm_xta; let a_norm2 = r_norm2 * const_ * const_; @@ -580,7 +577,7 @@ mod tests { let mut resid = x.dot(beta); resid = &resid * -1.; resid = &resid - intercept + y; - let mut datafit = resid.fold(0., |sum, &r| sum + r.powi(2)); + let mut datafit = resid.map(|rij| rij.powi(2)).sum(); datafit /= 2.0 * x.shape()[0] as f64; datafit } @@ -594,7 +591,7 @@ mod tests { } fn elastic_net_mtl_penalty(beta: &Array2, alpha: f64) -> f64 { - let frob_norm = beta.fold(0., |sum, &beta_ij| sum + beta_ij.powi(2)); + let frob_norm = beta.map(|beta_ij| beta_ij.powi(2)).sum(); let l21_norm = beta .map_axis(Axis(1), |beta_j| (beta_j.dot(&beta_j)).sqrt()) .sum(); From 3b5f37dac97be59bc1e77cd5dec4bbb85767e443 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Thu, 3 Mar 2022 19:57:05 +0100 Subject: [PATCH 27/28] pass comments --- algorithms/linfa-elasticnet/src/algorithm.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index 0961f831c..2ae3887a4 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -355,7 +355,7 @@ fn block_coordinate_descent<'a, F: Float>( } let old_w_j: ArrayView1 = w.slice(s![j, ..]); let x_j: ArrayView1 = x.slice(s![.., j]); - let norm_old_w_j = old_w_j.map(|wjt| wjt.powi(2)).sum().sqrt(); + let norm_old_w_j = old_w_j.dot(&old_w_j).sqrt(); if abs_diff_ne!(norm_old_w_j, F::zero()) { for i in 0..x.shape()[0] { r.slice_mut(s![i, ..]).scaled_add(x_j[i], &old_w_j); @@ -366,7 +366,7 @@ fn block_coordinate_descent<'a, F: Float>( &(block_soft_thresholding(tmp.view(), n_samples * l1_ratio * penalty) / (norm_cols_x[j] + n_samples * (F::one() - l1_ratio) * penalty)), ); - let norm_w_j = w.slice(s![j, ..]).map(|wjt| wjt.powi(2)).sum().sqrt(); + let norm_w_j = w.slice(s![j, ..]).dot(&w.slice(s![j, ..])).sqrt(); if abs_diff_ne!(norm_w_j, F::zero()) { for i in 0..x.shape()[0] { for t in 0..n_tasks { @@ -394,7 +394,7 @@ fn block_coordinate_descent<'a, F: Float>( } fn block_soft_thresholding<'a, F: Float>(x: ArrayView1<'a, F>, threshold: F) -> Array1 { - let norm_x = x.map(|x| x.powi(2)).sum().sqrt(); + let norm_x = x.dot(&x).sqrt(); if norm_x < threshold { return Array1::::zeros(x.len()); } @@ -426,7 +426,7 @@ fn duality_gap<'a, F: Float>( } else { (F::one(), r_norm2) }; - let l1_norm = w.map(|w_i| w_i.abs()).sum(); + let l1_norm = w.iter().map(|w_i| w_i.abs()).sum(); gap += l1_reg * l1_norm - const_ * r.dot(&y) + half * l2_reg * (F::one() + const_ * const_) * w_norm2; gap @@ -449,8 +449,8 @@ fn duality_gap_mtl<'a, F: Float>( let dual_norm_xta = xta .map_axis(Axis(1), |x| x.dot(&x).sqrt()) .fold(F::zero(), |max_norm, &nrm| max_norm.max(nrm)); - let r_norm2 = r.map(|rij| rij.powi(2)).sum(); - let w_norm2 = w.map(|wij| wij.powi(2)).sum(); + let r_norm2 = r.iter().map(|rij| rij.powi(2)).sum(); + let w_norm2 = w.iter().map(|wij| wij.powi(2)).sum(); let (const_, mut gap) = if dual_norm_xta > l1_reg { let const_ = l1_reg / dual_norm_xta; let a_norm2 = r_norm2 * const_ * const_; @@ -577,7 +577,7 @@ mod tests { let mut resid = x.dot(beta); resid = &resid * -1.; resid = &resid - intercept + y; - let mut datafit = resid.map(|rij| rij.powi(2)).sum(); + let mut datafit = resid.iter().map(|rij| rij.powi(2)).sum(); datafit /= 2.0 * x.shape()[0] as f64; datafit } @@ -591,7 +591,7 @@ mod tests { } fn elastic_net_mtl_penalty(beta: &Array2, alpha: f64) -> f64 { - let frob_norm = beta.map(|beta_ij| beta_ij.powi(2)).sum(); + let frob_norm: f64 = beta.iter().map(|beta_ij| beta_ij.powi(2)).sum(); let l21_norm = beta .map_axis(Axis(1), |beta_j| (beta_j.dot(&beta_j)).sqrt()) .sum(); From bc1ad06b60d21b5fb4646e0fc861d05ce9bdeef3 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Sun, 20 Mar 2022 14:22:08 +0100 Subject: [PATCH 28/28] WIP make compute_variance generic over the dimension --- algorithms/linfa-elasticnet/src/algorithm.rs | 44 ++++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs index 5e1f615a5..ee759144c 100644 --- a/algorithms/linfa-elasticnet/src/algorithm.rs +++ b/algorithms/linfa-elasticnet/src/algorithm.rs @@ -1,10 +1,14 @@ use approx::{abs_diff_eq, abs_diff_ne}; -use linfa::dataset::AsSingleTargets; -use ndarray::{s, Array1, ArrayBase, ArrayView1, ArrayView2, Axis, CowArray, Data, Ix1, Ix2}; +use ndarray::{ + s, Array1, Array2, ArrayBase, ArrayView1, ArrayView2, Axis, CowArray, Data, Dimension, Ix1, Ix2, +}; use ndarray_linalg::{Inverse, Lapack}; use linfa::traits::{Fit, PredictInplace}; -use linfa::{dataset::Records, DatasetBase, Float}; +use linfa::{ + dataset::{AsMultiTargets, AsSingleTargets, AsTargets, Records}, + DatasetBase, Float, +}; use super::{ hyperparams::{ElasticNetValidParams, MultiTaskElasticNetValidParams}, @@ -57,10 +61,10 @@ where } } -impl Fit, ArrayBase, ElasticNetError> - for MultiTaskElasticNetValidParams +impl Fit, T, ElasticNetError> for MultiTaskElasticNetValidParams where F: Float + Lapack, + T: AsMultiTargets, D: Data, { type Object = MultiTaskElasticNet; @@ -75,20 +79,17 @@ where /// Returns a `FittedMultiTaskElasticNet` object which contains the fitted /// parameters and can be used to `predict` values of the target variables /// for new feature values. - fn fit( - &self, - dataset: &DatasetBase, ArrayBase>, - ) -> Result { - let target = dataset.targets(); + fn fit(&self, dataset: &DatasetBase, T>) -> Result { + let targets = dataset.targets().as_multi_targets(); let nsamples = dataset.nsamples(); - let ntasks = dataset.targets().ncols(); + let ntasks = targets.ncols(); let mut intercept = Array1::::zeros(ntasks); let mut y = Array2::::zeros((nsamples, ntasks)); for t in 0..ntasks { let (intercept_t, y_t) = - compute_intercept(self.with_intercept(), target.slice(s![.., t])); + compute_intercept(self.with_intercept(), targets.slice(s![.., t])); intercept[t] = intercept_t; y.slice_mut(s![.., t]).assign(&y_t); } @@ -462,26 +463,25 @@ fn duality_gap_mtl<'a, F: Float>( gap } -fn variance_params, D: Data, I: Dimension>( - ds: &DatasetBase, ArrayBase>, - y_est: ArrayBase, +fn variance_params, D: Data>( + ds: &DatasetBase, T>, + y_est: T, ) -> Result> { let nfeatures = ds.nfeatures(); let nsamples = ds.nsamples(); - let target = ds.targets(); + let target = ds.targets().as_targets(); let ndim = target.ndim(); let ntasks: usize = match ndim { 1 => 1, 2 => *target.shape().last().unwrap(), - _ => 0, + _ => { + return Err(ElasticNetError::IncorrectTargetShape); + } }; - // check that the dimension of the target is either a vector or a matrix - if ntasks == 0 { - return Err(ElasticNetError::IncorrectTargetShape); - } + let y_est = y_est.as_targets(); // check that we have enough samples if nsamples < nfeatures + 1 { @@ -489,7 +489,7 @@ fn variance_params, D: Data, } let var_target = - (target - &y_est).mapv(|x| x * x).sum() / F::cast(ntasks * (nsamples - nfeatures)); + (&target - &y_est).mapv(|x| x * x).sum() / F::cast(ntasks * (nsamples - nfeatures)); let inv_cov = ds.records().t().dot(ds.records()).inv();