From 81d0f1e5c2e0afde5ed8b7e4b441f6d85c9ae888 Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Wed, 19 Jan 2022 13:55:58 +0100
Subject: [PATCH 01/28] added block coordinate descent function

---
 algorithms/linfa-elasticnet/src/algorithm.rs | 71 +++++++++++++++++++-
 1 file changed, 70 insertions(+), 1 deletion(-)
diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index 6eb540426..bbb1f8db9 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -1,5 +1,7 @@
 use approx::{abs_diff_eq, abs_diff_ne};
-use ndarray::{s, Array1, ArrayBase, ArrayView1, ArrayView2, Axis, CowArray, Data, Ix1, Ix2};
+use ndarray::{
+    s, Array1, Array2, ArrayBase, ArrayView1, ArrayView2, Axis, CowArray, Data, Ix1, Ix2,
+};
 use ndarray_linalg::{Inverse, Lapack};
 
 use linfa::traits::{Fit, PredictInplace};
@@ -187,6 +189,73 @@ fn coordinate_descent<'a, F: Float>(
     (w, gap, n_steps)
 }
 
+fn block_coordinate_descent<'a, F: Float>(
+    x: ArrayView2<'a, F>,
+    y: ArrayView2<'a, F>,
+    tol: F,
+    max_steps: u32,
+    l1_ratio: F,
+    penalty: F,
+) -> (Array2<F>, F, u32) {
+    let n_samples = F::cast(x.shape()[0]);
+    let n_features = x.shape()[1];
+    let n_tasks = y.shape()[1];
+    // the parameters of the model
+    let mut w = Array2::<F>::zeros((n_features, n_tasks));
+    // the residuals: `Y - XW` (since W=0, this is just `Y` for now),
+    // the residuals are updated during the algorithm as the parameters change
+    let mut r = y.to_owned();
+    let mut n_steps = 0u32;
+    let norm_cols_x = x.map_axis(Axis(0), |col| col.dot(&col));
+    let mut gap = F::one() + tol;
+    let d_w_tol = tol;
+    let tol = tol * y.map(|yij| yij.powi(2)).sum();
+    while n_steps < max_steps {
+        let mut w_max = F::zero();
+        let mut d_w_max = F::zero();
+        for j in 0..n_features {
+            if abs_diff_eq!(norm_cols_x[j], F::zero()) {
+                continue;
+            }
+            let old_w_j: ArrayView1<F> = w.slice(s![j, ..]);
+            let x_j: ArrayView1<F> = x.slice(s![.., j]);
+            let norm_old_w_j = old_w_j.map(|wjt| wjt.powi(2)).sum().sqrt();
+            if abs_diff_ne!(norm_old_w_j, F::zero()) {
+                for i in 0..x.shape()[0] {
+                    for t in 0..n_tasks {
+                        r[[i, t]] += x_j[i] * old_w_j[t];
+                    }
+                }
+            }
+            let tmp = x_j.dot(&r);
+            // proximal step: TODO
+            let norm_w_j = w.slice(s![j, ..]).map(|wjt| wjt.powi(2)).sum().sqrt();
+            if abs_diff_ne!(norm_w_j, F::zero()) {
+                for i in 0..x.shape()[0] {
+                    for t in 0..n_tasks {
+                        r[[i, t]] -= x_j[i] * w[[j, t]];
+                    }
+                }
+            }
+            let d_w_j = (norm_w_j - norm_old_w_j).abs();
+            d_w_max = F::max(d_w_max, d_w_j);
+            w_max = F::max(w_max, norm_w_j);
+        }
+        n_steps += 1;
+
+        if n_steps == max_steps - 1 || abs_diff_eq!(w_max, F::zero()) || d_w_max / w_max < d_w_tol {
+            // We've hit one potentiel stopping criteria
+            // check duality gap for ultimate stopping criterion
+            gap = duality_gap_mtl(x.view(), y.view(), w.view(), r.view(), l1_ratio, penalty);
+            if gap < tol {
+                break;
+            }
+        }
+    }
+
+    (w, gap, n_steps)
+}
+
 fn duality_gap<'a, F: Float>(
     x: ArrayView2<'a, F>,
     y: ArrayView1<'a, F>,

From 7e8408e06512dacae696db525e3cff19b34e1e02 Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Wed, 19 Jan 2022 16:17:53 +0100
Subject: [PATCH 02/28] added duality_gap_mtl computation

---
 algorithms/linfa-elasticnet/src/algorithm.rs | 40 ++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index bbb1f8db9..111a669e7 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -1,4 +1,5 @@
 use approx::{abs_diff_eq, abs_diff_ne};
+use ndarray::linalg::general_mat_mul;
 use ndarray::{
     s, Array1, Array2, ArrayBase, ArrayView1, ArrayView2, Axis, CowArray, Data, Ix1, Ix2,
 };
@@ -286,6 +287,45 @@ fn duality_gap<'a, F: Float>(
     gap
 }
 
+fn duality_gap_mtl<'a, F: Float>(
+    x: ArrayView2<'a, F>,
+    y: ArrayView2<'a, F>,
+    w: ArrayView2<'a, F>,
+    r: ArrayView2<'a, F>,
+    l1_ratio: F,
+    penalty: F,
+) -> F {
+    let half = F::cast(0.5);
+    let n_samples = F::cast(x.shape()[0]);
+    let n_features = x.shape()[1];
+    let n_tasks = y.shape()[1];
+    let l1_reg = l1_ratio * penalty * n_samples;
+    let l2_reg = (F::one() - l1_ratio) * penalty * n_samples;
+    let mut xta = Array2::<F>::zeros((n_features, n_tasks));
+    general_mat_mul(F::one(), &x.t(), &r, F::one(), &mut xta);
+    xta = xta - &w * l2_reg;
+
+    let dual_norm_xta = xta
+        .map_axis(Axis(1), |x| x.dot(&x).sqrt())
+        .fold(F::zero(), |max_norm, &nrm| max_norm.max(nrm));
+    let r_norm2 = r.map(|rij| rij.powi(2)).sum();
+    let w_norm2 = w.map(|wij| wij.powi(2)).sum();
+    let (const_, mut gap) = if dual_norm_xta > l1_reg {
+        let const_ = l1_reg / dual_norm_xta;
+        let a_norm2 = r_norm2 * const_ * const_;
+        (const_, half * (r_norm2 + a_norm2))
+    } else {
+        (F::one(), r_norm2)
+    };
+    let mut rty = Array2::<F>::zeros((n_tasks, n_tasks));
+    general_mat_mul(F::one(), &r.t(), &y, F::one(), &mut rty);
+    let trace_rty = rty.diag().sum();
+    let l21_norm = w.map_axis(Axis(1), |wj| (wj.dot(&wj)).sqrt()).sum();
+    gap += l1_reg * l21_norm - const_ * trace_rty
+        + half * l2_reg * (F::one() + const_ * const_) * w_norm2;
+    gap
+}
+
 fn variance_params<F: Float + Lapack, T: AsTargets<Elem = F>, D: Data<Elem = F>>(
     ds: &DatasetBase<ArrayBase<D, Ix2>, T>,
     y_est: Array1<F>,

From 1b69b8e19d620105db44868f1abbc95154e59a9f Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Wed, 19 Jan 2022 16:23:24 +0100
Subject: [PATCH 03/28] ENH cd pass to be consistent with bcd

---
 algorithms/linfa-elasticnet/src/algorithm.rs | 33 +++++++++++---------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index 111a669e7..36e480e05 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -156,25 +156,28 @@ fn coordinate_descent<'a, F: Float>(
     while n_steps < max_steps {
         let mut w_max = F::zero();
         let mut d_w_max = F::zero();
-        for ii in 0..n_features {
-            if abs_diff_eq!(norm_cols_x[ii], F::zero()) {
+        for j in 0..n_features {
+            if abs_diff_eq!(norm_cols_x[j], F::zero()) {
                 continue;
             }
-            let w_ii = w[ii];
-            let x_slc: ArrayView1<F> = x.slice(s![.., ii]);
-            if abs_diff_ne!(w_ii, F::zero()) {
-                // FIXME: direct addition with loop might be faster as it does not have to allocate
-                r += &(&x_slc * w_ii);
+            let old_w_j = w[j];
+            let x_j: ArrayView1<F> = x.slice(s![.., j]);
+            if abs_diff_ne!(old_w_j, F::zero()) {
+                for i in 0..x.shape()[0] {
+                    r[i] += x_j[i] * old_w_j;
+                }
             }
-            let tmp: F = x_slc.dot(&r);
-            w[ii] = tmp.signum() * F::max(tmp.abs() - n_samples * l1_ratio * penalty, F::zero())
-                / (norm_cols_x[ii] + n_samples * (F::one() - l1_ratio) * penalty);
-            if abs_diff_ne!(w[ii], F::zero()) {
-                r -= &(&x_slc * w[ii]);
+            let tmp: F = x_j.dot(&r);
+            w[j] = tmp.signum() * F::max(tmp.abs() - n_samples * l1_ratio * penalty, F::zero())
+                / (norm_cols_x[j] + n_samples * (F::one() - l1_ratio) * penalty);
+            if abs_diff_ne!(w[j], F::zero()) {
+                for i in 0..x.shape()[0] {
+                    r[i] -= x_j[i] * old_w_j;
+                }
             }
-            let d_w_ii = (w[ii] - w_ii).abs();
-            d_w_max = F::max(d_w_max, d_w_ii);
-            w_max = F::max(w_max, w[ii].abs());
+            let d_w_j = (w[j] - old_w_j).abs();
+            d_w_max = F::max(d_w_max, d_w_j);
+            w_max = F::max(w_max, w[j].abs());
         }
         n_steps += 1;
 

From e5b76e74853c92fbed7851c2f6c78895da729f9a Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Wed, 19 Jan 2022 16:53:11 +0100
Subject: [PATCH 04/28] added prox operator for MTL Enet

---
 algorithms/linfa-elasticnet/src/algorithm.rs | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index 36e480e05..dbc7ba04d 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -232,7 +232,10 @@ fn block_coordinate_descent<'a, F: Float>(
                 }
             }
             let tmp = x_j.dot(&r);
-            // proximal step: TODO
+            w.slice_mut(s![j, ..]).assign(
+                &(block_soft_thresholding(tmp.view(), n_samples * l1_ratio * penalty)
+                    / (norm_cols_x[j] + n_samples * (F::one() - l1_ratio) * penalty)),
+            );
             let norm_w_j = w.slice(s![j, ..]).map(|wjt| wjt.powi(2)).sum().sqrt();
             if abs_diff_ne!(norm_w_j, F::zero()) {
                 for i in 0..x.shape()[0] {
@@ -260,6 +263,18 @@ fn block_coordinate_descent<'a, F: Float>(
     (w, gap, n_steps)
 }
 
+fn block_soft_thresholding<'a, F: Float>(x: ArrayView1<'a, F>, threshold: F) -> Array1<F> {
+    let norm_x = x.map(|&xi| xi.powi(2)).sum().sqrt();
+    let mut _res = Array1::<F>::zeros(x.len());
+    if norm_x >= threshold {
+        let scal = F::one() - threshold / norm_x;
+        for i in 0..x.len() {
+            _res[i] = scal * x[i];
+        }
+    }
+    _res
+}
+
 fn duality_gap<'a, F: Float>(
     x: ArrayView2<'a, F>,
     y: ArrayView1<'a, F>,

From 196a2a54b6041279427ceb60204095f8bdb81e75 Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Wed, 19 Jan 2022 17:10:39 +0100
Subject: [PATCH 05/28] added helper functions for tests

---
 algorithms/linfa-elasticnet/src/algorithm.rs | 49 +++++++++++++++++++-
 1 file changed, 47 insertions(+), 2 deletions(-)

diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index dbc7ba04d..84f9a7bae 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -386,9 +386,10 @@ pub fn compute_intercept<F: Float>(
 
 #[cfg(test)]
 mod tests {
-    use super::{coordinate_descent, ElasticNet};
+    use super::{block_coordinate_descent, coordinate_descent, ElasticNet};
     use approx::assert_abs_diff_eq;
-    use ndarray::{array, s, Array, Array1, Array2};
+    use ndarray::linalg::general_mat_mul;
+    use ndarray::{array, s, Array, Array1, Array2, Axis};
     use ndarray_rand::rand::SeedableRng;
     use ndarray_rand::rand_distr::Uniform;
     use ndarray_rand::RandomExt;
@@ -411,6 +412,17 @@ mod tests {
         squared_error(x, y, intercept, beta) + lambda * elastic_net_penalty(beta, alpha)
     }
 
+    fn elastic_net_multi_task_objective(
+        x: &Array2<f64>,
+        y: &Array2<f64>,
+        intercept: &Array1<f64>,
+        beta: &Array2<f64>,
+        alpha: f64,
+        lambda: f64,
+    ) -> f64 {
+        squared_error_mtl(x, y, intercept, beta) + lambda * elastic_net_mtl_penalty(beta, alpha)
+    }
+
     fn squared_error(x: &Array2<f64>, y: &Array1<f64>, intercept: f64, beta: &Array1<f64>) -> f64 {
         let mut resid = -x.dot(beta);
         resid -= intercept;
@@ -423,6 +435,22 @@ mod tests {
         result
     }
 
+    fn squared_error_mtl(
+        x: &Array2<f64>,
+        y: &Array2<f64>,
+        intercept: &Array1<f64>,
+        beta: &Array2<f64>,
+    ) -> f64 {
+        let mut resid = Array2::<f64>::zeros((x.shape()[0], y.shape()[1]));
+        general_mat_mul(1., &x, &beta, 1., &mut resid);
+        resid = &resid * -1.;
+        resid = resid - intercept;
+        resid = resid + y;
+        let mut datafit = resid.map(|r| r.powi(2)).sum();
+        datafit /= 2.0 * y.len() as f64;
+        datafit
+    }
+
     fn elastic_net_penalty(beta: &Array1<f64>, alpha: f64) -> f64 {
         let mut penalty = 0.0;
         for beta_j in beta {
@@ -431,6 +459,14 @@ mod tests {
         penalty
     }
 
+    fn elastic_net_mtl_penalty(beta: &Array2<f64>, alpha: f64) -> f64 {
+        let frob_norm = beta.map(|beta_ij| beta_ij.powi(2)).sum();
+        let l21_norm = beta
+            .map_axis(Axis(1), |beta_j| (beta_j.dot(&beta_j)).sqrt())
+            .sum();
+        (1.0 - alpha) / 2.0 * frob_norm + alpha * l21_norm
+    }
+
     #[test]
     fn elastic_net_penalty_works() {
         let beta = array![-2.0, 1.0];
@@ -448,6 +484,9 @@ mod tests {
         assert_abs_diff_eq!(elastic_net_penalty(&beta2, 0.0), 0.0);
     }
 
+    #[test]
+    fn elastic_net_mtl_penalty_works() {}
+
     #[test]
     fn squared_error_works() {
         let x = array![[2.0, 1.0], [-1.0, 2.0]];
@@ -456,6 +495,9 @@ mod tests {
         assert_abs_diff_eq!(squared_error(&x, &y, 0.0, &beta), 0.25);
     }
 
+    #[test]
+    fn squared_error_mtl_works() {}
+
     #[test]
     fn coordinate_descent_lowers_objective() {
         let x = array![[1.0, 0.0], [0.0, 1.0]];
@@ -470,6 +512,9 @@ mod tests {
         assert!(objective_start > objective_end);
     }
 
+    #[test]
+    fn block_coordinate_descent_lowers_objective() {}
+
     #[test]
     fn lasso_zero_works() {
         let dataset = Dataset::from((array![[0.], [0.], [0.]], array![0., 0., 0.]));

From dc91b627347a7ce99a67897eaed0e78ac7c8fc8a Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Wed, 19 Jan 2022 17:17:01 +0100
Subject: [PATCH 06/28] fix failing CD tests

---
 algorithms/linfa-elasticnet/src/algorithm.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index 84f9a7bae..746952599 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -172,7 +172,7 @@ fn coordinate_descent<'a, F: Float>(
                 / (norm_cols_x[j] + n_samples * (F::one() - l1_ratio) * penalty);
             if abs_diff_ne!(w[j], F::zero()) {
                 for i in 0..x.shape()[0] {
-                    r[i] -= x_j[i] * old_w_j;
+                    r[i] -= x_j[i] * w[j];
                 }
             }
             let d_w_j = (w[j] - old_w_j).abs();

From 3bb53338e3a52c808fd17d8f44e7f269d750e7ea Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Wed, 19 Jan 2022 20:01:38 +0100
Subject: [PATCH 07/28] working ent mtl penalties

---
 algorithms/linfa-elasticnet/src/algorithm.rs | 52 ++++++++++++++++++--
 1 file changed, 47 insertions(+), 5 deletions(-)

diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index 746952599..9a2d52246 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -444,10 +444,13 @@ mod tests {
         let mut resid = Array2::<f64>::zeros((x.shape()[0], y.shape()[1]));
         general_mat_mul(1., &x, &beta, 1., &mut resid);
         resid = &resid * -1.;
-        resid = resid - intercept;
+        for i in 0..resid.shape()[0] {
+            let _res = &resid.slice(s![i, ..]) - intercept;
+            resid.slice_mut(s![i, ..]).assign(&_res);
+        }
         resid = resid + y;
         let mut datafit = resid.map(|r| r.powi(2)).sum();
-        datafit /= 2.0 * y.len() as f64;
+        datafit /= 2.0 * x.shape()[0] as f64;
         datafit
     }
 
@@ -485,7 +488,29 @@ mod tests {
     }
 
     #[test]
-    fn elastic_net_mtl_penalty_works() {}
+    fn elastic_net_mtl_penalty_works() {
+        let beta = array![[-2.0, 1.0, 3.0], [3.0, 1.5, -1.7]];
+        assert_abs_diff_eq!(
+            elastic_net_mtl_penalty(&beta, 0.7),
+            9.472383565516601,
+            epsilon = 1e-12
+        );
+        assert_abs_diff_eq!(
+            elastic_net_mtl_penalty(&beta, 1.0),
+            7.501976522166574,
+            epsilon = 1e-12
+        );
+        assert_abs_diff_eq!(
+            elastic_net_mtl_penalty(&beta, 0.2),
+            12.756395304433315,
+            epsilon = 1e-12
+        );
+
+        let beta2 = array![[0., 0.], [0., 0.], [0., 0.]];
+        assert_abs_diff_eq!(elastic_net_mtl_penalty(&beta2, 0.8), 0.0);
+        assert_abs_diff_eq!(elastic_net_mtl_penalty(&beta2, 1.2), 0.0);
+        assert_abs_diff_eq!(elastic_net_mtl_penalty(&beta2, 0.8), 0.0);
+    }
 
     #[test]
     fn squared_error_works() {
@@ -496,7 +521,24 @@ mod tests {
     }
 
     #[test]
-    fn squared_error_mtl_works() {}
+    fn squared_error_mtl_works() {
+        let x = array![[1.2, 2.3], [-1.3, 0.3], [-1.3, 0.1]];
+        let y = array![
+            [0.2, 1.0, 0.0, 1.],
+            [-0.3, 0.7, 0.1, 2.],
+            [-0.3, 0.7, 2.3, 3.]
+        ];
+        let beta = array![[2.3, 4.5, 1.2, -3.4], [1.2, -3.4, 0.7, -1.2]];
+        assert_abs_diff_eq!(
+            squared_error_mtl(&x, &y, &array![0., 0., 0., 0.], &beta),
+            41.66298333333333
+        );
+        let intercept = array![1., 3., 2., 0.3];
+        assert_abs_diff_eq!(
+            squared_error_mtl(&x, &y, &intercept, &beta),
+            29.059983333333335
+        );
+    }
 
     #[test]
     fn coordinate_descent_lowers_objective() {
@@ -696,7 +738,7 @@ mod tests {
             [-5.514554978810590376e-03,5.068011873981870252e-02,-1.590626280073640167e-02,-6.764228304218700139e-02,4.934129593323050011e-02,7.916527725369119917e-02,-2.867429443567860031e-02,3.430885887772629900e-02,-1.811826730789670159e-02,4.448547856271539702e-02],
             [4.170844488444359899e-02,5.068011873981870252e-02,-1.590626280073640167e-02,1.728186074811709910e-02,-3.734373413344069942e-02,-1.383981589779990050e-02,-2.499265663159149983e-02,-1.107951979964190078e-02,-4.687948284421659950e-02,1.549073015887240078e-02],
             [-4.547247794002570037e-02,-4.464163650698899782e-02,3.906215296718960200e-02,1.215130832538269907e-03,1.631842733640340160e-02,1.528299104862660025e-02,-2.867429443567860031e-02,2.655962349378539894e-02,4.452837402140529671e-02,-2.593033898947460017e-02],
-            [-4.547247794002570037e-02,-4.464163650698899782e-02,-7.303030271642410587e-02,-8.141376581713200000e-02,8.374011738825870577e-02,2.780892952020790065e-02,1.738157847891100005e-01,-3.949338287409189657e-02,-4.219859706946029777e-03,3.064409414368320182e-03]         
+            [-4.547247794002570037e-02,-4.464163650698899782e-02,-7.303030271642410587e-02,-8.141376581713200000e-02,8.374011738825870577e-02,2.780892952020790065e-02,1.738157847891100005e-01,-3.949338287409189657e-02,-4.219859706946029777e-03,3.064409414368320182e-03]
         ];
         #[rustfmt::skip]
         let y = array![2.33e+02, 9.1e+01, 1.11e+02, 1.52e+02, 1.2e+02, 6.70e+01, 3.1e+02, 9.4e+01, 1.83e+02, 6.6e+01, 1.73e+02, 7.2e+01, 4.9e+01, 6.4e+01, 4.8e+01, 1.78e+02, 1.04e+02, 1.32e+02, 2.20e+02, 5.7e+01];

From f9f9959b6021951f200bcc5dad43534d8162f9fc Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Wed, 19 Jan 2022 20:09:42 +0100
Subject: [PATCH 08/28] bcd lower objective test pass

---
 algorithms/linfa-elasticnet/src/algorithm.rs | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index 9a2d52246..f1dfc7938 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -555,7 +555,20 @@ mod tests {
     }
 
     #[test]
-    fn block_coordinate_descent_lowers_objective() {}
+    fn block_coordinate_descent_lowers_objective() {
+        let x = array![[1.0, 0., -0.3, 3.2], [0.3, 1.2, -0.6, 1.2]];
+        let y = array![[0.3, -1.2, 0.7], [1.4, -3.2, 0.2]];
+        let beta = array![[0., 0., 0.], [0., 0., 0.], [0., 0., 0.], [0., 0., 0.]];
+        let intercept = array![0., 0., 0.];
+        let alpha = 0.4;
+        let lambda = 0.002;
+        let objective_start =
+            elastic_net_multi_task_objective(&x, &y, &intercept, &beta, alpha, lambda);
+        let opt_result = block_coordinate_descent(x.view(), y.view(), 1e-4, 3, alpha, lambda);
+        let objective_end =
+            elastic_net_multi_task_objective(&x, &y, &intercept, &opt_result.0, alpha, lambda);
+        assert!(objective_start > objective_end);
+    }
 
     #[test]
     fn lasso_zero_works() {

From e23d876930ca91f223bee38caeba667acea624e8 Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Wed, 19 Jan 2022 20:28:54 +0100
Subject: [PATCH 09/28] added MultiTaskEnet struct

---
 algorithms/linfa-elasticnet/src/lib.rs | 46 +++++++++++++++++++++++++-
 1 file changed, 45 insertions(+), 1 deletion(-)

diff --git a/algorithms/linfa-elasticnet/src/lib.rs b/algorithms/linfa-elasticnet/src/lib.rs
index 789f27c86..94d480cf6 100644
--- a/algorithms/linfa-elasticnet/src/lib.rs
+++ b/algorithms/linfa-elasticnet/src/lib.rs
@@ -1,7 +1,7 @@
 #![doc = include_str!("../README.md")]
 
 use linfa::Float;
-use ndarray::Array1;
+use ndarray::{Array1, Array2};
 
 #[cfg(feature = "serde")]
 use serde_crate::{Deserialize, Serialize};
@@ -65,3 +65,47 @@ impl<F: Float> ElasticNet<F> {
         ElasticNetParams::new().l1_ratio(F::one())
     }
 }
+
+/// MultiTask Elastic Net model
+///
+/// This struct contains the parameters of a fitted multi-task elastic net model. This includes the
+/// coefficients (a 2-dimensional array), (optionally) intercept (a 1-dimensional array), duality gaps
+/// and the number of steps needed in the computation.
+///
+/// ## Model implementation
+///
+/// The block coordinate descent is widely used to solve generalized linear models optimization problems,
+/// like Group Lasso, MultiTask Ridge or MultiTask Lasso. It cycles through a group of parameters and update
+/// the groups separately, holding all the others fixed. The optimization routine stops when a criterion is
+/// satisfied (dual sub-optimality gap or change in coefficients).
+pub struct MultiTaskElasticNet<F> {
+    hyperplane: Array2<F>,
+    intercept: Array1<F>,
+    duality_gap: F,
+    n_steps: u32,
+    variance: Result<Array2<F>>,
+}
+
+impl<F: Float> MultiTaskElasticNet<F> {
+    /// Create a default parameter set for construction of MultiTaskElasticNet model
+    ///
+    /// By default, an intercept will be fitted. To disable fitting an
+    /// intercept, call `.with_intercept(false)` before calling `.fit()`.
+    ///
+    /// To additionally normalize the feature matrix before fitting, call
+    /// `fit_intercept_and_normalize()` before calling `fit()`. The feature
+    /// matrix will not be normalized by default.
+    pub fn params() -> ElasticNetParams<F> {
+        ElasticNetParams::new()
+    }
+
+    /// Create a multi-task ridge only model
+    pub fn ridge() -> ElasticNetParams<F> {
+        ElasticNetParams::new().l1_ratio(F::zero())
+    }
+
+    /// Create a multi-task Lasso only model
+    pub fn lasso() -> ElasticNetParams<F> {
+        ElasticNetParams::new().l1_ratio(F::one())
+    }
+}

From f316c95527177ae99a992eb4a9649b521cdb828b Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Wed, 19 Jan 2022 20:32:51 +0100
Subject: [PATCH 10/28] added MTENET documentation

---
 algorithms/linfa-elasticnet/src/hyperparams.rs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/algorithms/linfa-elasticnet/src/hyperparams.rs b/algorithms/linfa-elasticnet/src/hyperparams.rs
index 8cb40c7d9..f8dc9a96d 100644
--- a/algorithms/linfa-elasticnet/src/hyperparams.rs
+++ b/algorithms/linfa-elasticnet/src/hyperparams.rs
@@ -54,6 +54,14 @@ impl<F: Float> ElasticNetValidParams<F> {
 ///     + 0.5 * penalty * (1 - l1_ratio) * ||w||^2_2
 /// ```
 ///
+/// The multi-task version (Y becomes a measurement matrix) is also supported and
+/// solves the following objective function:
+/// ```ignore
+/// 1 / (2 * n_samples) * || Y - XW ||^2_F
+///     + penalty * l1_ratio * ||W||_2,1
+///     + 0.5 * penalty * (1 - l1_ratio) * ||W||^2_F
+/// ```
+///
 /// The parameter set can be verified into a
 /// [`ElasticNetValidParams`](crate::hyperparams::ElasticNetValidParams) by calling
 /// [ParamGuard::check](Self::check). It is also possible to directly fit a model with

From 1ae652226e423903db95d0957acc91ac307ef8b0 Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Wed, 19 Jan 2022 20:39:44 +0100
Subject: [PATCH 11/28] added API MTENET

---
 algorithms/linfa-elasticnet/src/algorithm.rs | 33 +++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index f1dfc7938..67bcbdffe 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -11,7 +11,9 @@ use linfa::{
     DatasetBase, Float,
 };
 
-use super::{hyperparams::ElasticNetValidParams, ElasticNet, ElasticNetError, Result};
+use super::{
+    hyperparams::ElasticNetValidParams, ElasticNet, ElasticNetError, MultiTaskElasticNet, Result,
+};
 
 impl<F, D, T> Fit<ArrayBase<D, Ix2>, T, ElasticNetError> for ElasticNetValidParams<F>
 where
@@ -133,6 +135,35 @@ impl<F: Float> ElasticNet<F> {
     }
 }
 
+/// View the fitted parameters and make predictions with a fitted
+/// elastic net model
+impl<F: Float> MultiTaskElasticNet<F> {
+    /// Get the fitted hyperplane
+    pub fn hyperplane(&self) -> &Array2<F> {
+        &self.hyperplane
+    }
+
+    /// Get the fitted intercept, [0., ..., 0.] if no intercept was fitted
+    /// Note that there are as many intercepts as tasks
+    pub fn intercept(&self) -> &Array1<F> {
+        &self.intercept
+    }
+
+    /// Get the number of steps taken in optimization algorithm
+    pub fn n_steps(&self) -> u32 {
+        self.n_steps
+    }
+
+    /// Get the duality gap at the end of the optimization algorithm
+    pub fn duality_gap(&self) -> F {
+        self.duality_gap
+    }
+
+    // TODO: Z-score
+
+    // TODO: confidence level
+}
+
 fn coordinate_descent<'a, F: Float>(
     x: ArrayView2<'a, F>,
     y: ArrayView1<'a, F>,

From f13a150e4b3e84a5a89c6dc68e13edc11cad2872 Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Wed, 19 Jan 2022 22:02:23 +0100
Subject: [PATCH 12/28] added variance, z-score, conf interval for multitask
 ENET

---
 algorithms/linfa-elasticnet/src/algorithm.rs | 121 ++++++++++++++++++-
 src/dataset/impl_targets.rs                  |  14 ++-
 src/dataset/mod.rs                           |   8 ++
 3 files changed, 139 insertions(+), 4 deletions(-)

diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index 67bcbdffe..acc9a20af 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -7,7 +7,7 @@ use ndarray_linalg::{Inverse, Lapack};
 
 use linfa::traits::{Fit, PredictInplace};
 use linfa::{
-    dataset::{AsTargets, Records},
+    dataset::{AsTargets, MultiTaskTarget, Records},
     DatasetBase, Float,
 };
 
@@ -61,6 +61,64 @@ where
     }
 }
 
+// impl<F, D, T> Fit<ArrayBase<D, Ix2>, T, ElasticNetError> for ElasticNetValidParams<F>
+// where
+//     F: Float + Lapack,
+//     D: Data<Elem = F>,
+//     T: AsTargets<Elem = F>,
+// {
+//     type Object = MultiTaskElasticNet<F>;
+
+//     /// Fit a multi-task Elastic Net model given a feature matrix `x` and a target
+//     /// matrix `y`.
+//     ///
+//     /// The feature matrix `x` must have shape `(n_samples, n_features)`
+//     ///
+//     /// The target variable `y` must have shape `(n_samples, n_tasks)`
+//     ///
+//     /// Returns a `FittedMultiTaskElasticNet` object which contains the fitted
+//     /// parameters and can be used to `predict` values of the target variables
+//     /// for new feature values.
+//     fn fit(&self, dataset: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Result<Self::Object> {
+//         let target = dataset.targets();
+
+//         let (intercept, y) = compute_intercept_multi_task(self.with_intercept(), target);
+//         let (hyperplane, duality_gap, n_steps) = block_coordinate_descent(
+//             dataset.records().view(),
+//             y.view(),
+//             self.tolerance(),
+//             self.max_iterations(),
+//             self.l1_ratio(),
+//             self.penalty(),
+//         );
+
+//         let mut y_est = Array2::<F>::zeros((dataset.nsamples(), dataset.ntargets()));
+//         general_mat_mul(
+//             F::one(),
+//             &dataset.records(),
+//             &hyperplane,
+//             F::one(),
+//             &mut y_est,
+//         );
+//         // Adding intercept
+//         for i in 0..dataset.n_samples() {
+//             let _res = &y_est.slice(s![i, ..]) + intercept;
+//             y_est.slice_mut(s![i, ..]).assign(&_res);
+//         }
+
+//         // TODO: compute variance
+//         let variance = variance_params_multi_task(dataset, y_est);
+
+//         Ok(MultiTaskElasticNet {
+//             hyperplane,
+//             intercept,
+//             duality_gap,
+//             n_steps,
+//             variance,
+//         })
+//     }
+// }
+
 impl<F: Float, D: Data<Elem = F>> PredictInplace<ArrayBase<D, Ix2>, Array1<F>> for ElasticNet<F> {
     /// Given an input matrix `X`, with shape `(n_samples, n_features)`,
     /// `predict` returns the target variable according to elastic net
@@ -159,9 +217,36 @@ impl<F: Float> MultiTaskElasticNet<F> {
         self.duality_gap
     }
 
-    // TODO: Z-score
+    /// Calculate the Z score
+    pub fn z_score(&self) -> Result<Array1<F>> {
+        self.variance
+            .as_ref()
+            .map(|variance| {
+                self.hyperplane
+                    .iter()
+                    .zip(variance.iter())
+                    .map(|(a, b)| *a / b.sqrt())
+                    .collect()
+            })
+            .map_err(|err| err.clone())
+    }
+
+    /// Calculate the confidence level
+    pub fn confidence_95th(&self) -> Result<Array1<(F, F)>> {
+        // the 95th percentile of our confidence level
+        let p = F::cast(1.645);
 
-    // TODO: confidence level
+        self.variance
+            .as_ref()
+            .map(|variance| {
+                self.hyperplane
+                    .iter()
+                    .zip(variance.iter())
+                    .map(|(a, b)| (*a - p * b.sqrt(), *a + p * b.sqrt()))
+                    .collect()
+            })
+            .map_err(|err| err.clone())
+    }
 }
 
 fn coordinate_descent<'a, F: Float>(
@@ -400,6 +485,36 @@ fn variance_params<F: Float + Lapack, T: AsTargets<Elem = F>, D: Data<Elem = F>>
     }
 }
 
+fn variance_params_multi_task<F, T, D>(
+    ds: &DatasetBase<ArrayBase<D, Ix2>, T>,
+    y_est: Array2<F>,
+) -> Result<Array1<F>>
+where
+    F: Float + Lapack,
+    T: AsTargets<Elem = F> + MultiTaskTarget<Elem = F>,
+    D: Data<Elem = F>,
+{
+    let nfeatures = ds.nfeatures();
+    let nsamples = ds.nsamples();
+    let ntasks = ds.targets().ntasks();
+
+    let target = ds.targets().as_multi_targets();
+
+    if nsamples < nfeatures + 1 {
+        return Err(ElasticNetError::NotEnoughSamples);
+    }
+
+    let var_target =
+        (&target - &y_est).mapv(|x| x * x).sum() / F::cast(ntasks * (nsamples - nfeatures));
+
+    let inv_cov = ds.records().t().dot(ds.records()).inv();
+
+    match inv_cov {
+        Ok(inv_cov) => Ok(inv_cov.diag().mapv(|x| var_target * x)),
+        Err(_) => Err(ElasticNetError::IllConditioned),
+    }
+}
+
 /// Compute the intercept as the mean of `y` and center `y` if an intercept should
 /// be used, use `0.0` as intercept and leave `y` unchanged otherwise.
 pub fn compute_intercept<F: Float>(
diff --git a/src/dataset/impl_targets.rs b/src/dataset/impl_targets.rs
index a72170e46..6949acbd0 100644
--- a/src/dataset/impl_targets.rs
+++ b/src/dataset/impl_targets.rs
@@ -2,7 +2,7 @@ use std::collections::HashMap;
 
 use super::{
     AsProbabilities, AsTargets, AsTargetsMut, CountedTargets, DatasetBase, FromTargetArray, Label,
-    Labels, Pr, Records,
+    Labels, MultiTaskTarget, Pr, Records,
 };
 use ndarray::{
     Array1, Array2, ArrayBase, ArrayView2, ArrayViewMut2, Axis, CowArray, Data, DataMut, Dimension,
@@ -17,6 +17,18 @@ impl<'a, L, S: Data<Elem = L>> AsTargets for ArrayBase<S, Ix1> {
     }
 }
 
+impl<'a, L, S: Data<Elem = L>> MultiTaskTarget for ArrayBase<S, Ix2> {
+    type Elem = L;
+
+    fn nsamples(&self) -> usize {
+        self.shape()[0]
+    }
+
+    fn ntasks(&self) -> usize {
+        self.shape()[1]
+    }
+}
+
 impl<'a, L: Clone + 'a, S: Data<Elem = L>> FromTargetArray<'a, L> for ArrayBase<S, Ix2> {
     type Owned = ArrayBase<OwnedRepr<L>, Ix2>;
     type View = ArrayBase<ViewRepr<&'a L>, Ix2>;
diff --git a/src/dataset/mod.rs b/src/dataset/mod.rs
index dc19a3cc0..f21a45b95 100644
--- a/src/dataset/mod.rs
+++ b/src/dataset/mod.rs
@@ -198,6 +198,14 @@ pub trait Records: Sized {
     fn nfeatures(&self) -> usize;
 }
 
+/// MultiTask trait
+pub trait MultiTaskTarget: Sized {
+    type Elem;
+
+    fn nsamples(&self) -> usize;
+    fn ntasks(&self) -> usize;
+}
+
 /// Return a reference to single or multiple target variables
 pub trait AsTargets {
     type Elem;

From b3479634debfa677dfff173d88a23a9531f453e6 Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Sat, 22 Jan 2022 15:20:55 +0100
Subject: [PATCH 13/28] added multi-task estimators

---
 algorithms/linfa-elasticnet/src/algorithm.rs  | 130 ++++++++++--------
 .../linfa-elasticnet/src/hyperparams.rs       |  30 ++++
 algorithms/linfa-elasticnet/src/lib.rs        |   2 +-
 3 files changed, 103 insertions(+), 59 deletions(-)

diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index acc9a20af..c6b136d00 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -12,7 +12,8 @@ use linfa::{
 };
 
 use super::{
-    hyperparams::ElasticNetValidParams, ElasticNet, ElasticNetError, MultiTaskElasticNet, Result,
+    hyperparams::{ElasticNetValidParams, MultiTaskElasticNetValidParams},
+    ElasticNet, ElasticNetError, MultiTaskElasticNet, Result,
 };
 
 impl<F, D, T> Fit<ArrayBase<D, Ix2>, T, ElasticNetError> for ElasticNetValidParams<F>
@@ -61,63 +62,76 @@ where
     }
 }
 
-// impl<F, D, T> Fit<ArrayBase<D, Ix2>, T, ElasticNetError> for ElasticNetValidParams<F>
-// where
-//     F: Float + Lapack,
-//     D: Data<Elem = F>,
-//     T: AsTargets<Elem = F>,
-// {
-//     type Object = MultiTaskElasticNet<F>;
-
-//     /// Fit a multi-task Elastic Net model given a feature matrix `x` and a target
-//     /// matrix `y`.
-//     ///
-//     /// The feature matrix `x` must have shape `(n_samples, n_features)`
-//     ///
-//     /// The target variable `y` must have shape `(n_samples, n_tasks)`
-//     ///
-//     /// Returns a `FittedMultiTaskElasticNet` object which contains the fitted
-//     /// parameters and can be used to `predict` values of the target variables
-//     /// for new feature values.
-//     fn fit(&self, dataset: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Result<Self::Object> {
-//         let target = dataset.targets();
-
-//         let (intercept, y) = compute_intercept_multi_task(self.with_intercept(), target);
-//         let (hyperplane, duality_gap, n_steps) = block_coordinate_descent(
-//             dataset.records().view(),
-//             y.view(),
-//             self.tolerance(),
-//             self.max_iterations(),
-//             self.l1_ratio(),
-//             self.penalty(),
-//         );
-
-//         let mut y_est = Array2::<F>::zeros((dataset.nsamples(), dataset.ntargets()));
-//         general_mat_mul(
-//             F::one(),
-//             &dataset.records(),
-//             &hyperplane,
-//             F::one(),
-//             &mut y_est,
-//         );
-//         // Adding intercept
-//         for i in 0..dataset.n_samples() {
-//             let _res = &y_est.slice(s![i, ..]) + intercept;
-//             y_est.slice_mut(s![i, ..]).assign(&_res);
-//         }
-
-//         // TODO: compute variance
-//         let variance = variance_params_multi_task(dataset, y_est);
-
-//         Ok(MultiTaskElasticNet {
-//             hyperplane,
-//             intercept,
-//             duality_gap,
-//             n_steps,
-//             variance,
-//         })
-//     }
-// }
+impl<F, D> Fit<ArrayBase<D, Ix2>, ArrayBase<D, Ix2>, ElasticNetError>
+    for MultiTaskElasticNetValidParams<F>
+where
+    F: Float + Lapack,
+    D: Data<Elem = F>,
+{
+    type Object = MultiTaskElasticNet<F>;
+
+    /// Fit a multi-task Elastic Net model given a feature matrix `x` and a target
+    /// matrix `y`.
+    ///
+    /// The feature matrix `x` must have shape `(n_samples, n_features)`
+    ///
+    /// The target variable `y` must have shape `(n_samples, n_tasks)`
+    ///
+    /// Returns a `FittedMultiTaskElasticNet` object which contains the fitted
+    /// parameters and can be used to `predict` values of the target variables
+    /// for new feature values.
+    fn fit(
+        &self,
+        dataset: &DatasetBase<ArrayBase<D, Ix2>, ArrayBase<D, Ix2>>,
+    ) -> Result<Self::Object> {
+        let target = dataset.targets();
+        let nsamples = dataset.nsamples();
+        let ntasks = target.ntasks();
+
+        let mut intercept = Array1::<F>::zeros(ntasks);
+        let mut y = Array2::<F>::zeros((nsamples, ntasks));
+
+        for t in 0..target.ntasks() {
+            let (intercept_t, y_t) =
+                compute_intercept(self.with_intercept(), target.slice(s![.., t]));
+            intercept[t] = intercept_t;
+            y.slice_mut(s![.., t]).assign(&y_t);
+        }
+
+        let (hyperplane, duality_gap, n_steps) = block_coordinate_descent(
+            dataset.records().view(),
+            y.view(),
+            self.tolerance(),
+            self.max_iterations(),
+            self.l1_ratio(),
+            self.penalty(),
+        );
+
+        let mut y_est = Array2::<F>::zeros((dataset.nsamples(), dataset.ntargets()));
+        general_mat_mul(
+            F::one(),
+            &dataset.records(),
+            &hyperplane,
+            F::one(),
+            &mut y_est,
+        );
+        // Adding intercept
+        for i in 0..dataset.nsamples() {
+            let _res = &y_est.slice(s![i, ..]) + &intercept;
+            y_est.slice_mut(s![i, ..]).assign(&_res);
+        }
+
+        let variance = variance_params_multi_task(dataset, y_est);
+
+        Ok(MultiTaskElasticNet {
+            hyperplane,
+            intercept,
+            duality_gap,
+            n_steps,
+            variance,
+        })
+    }
+}
 
 impl<F: Float, D: Data<Elem = F>> PredictInplace<ArrayBase<D, Ix2>, Array1<F>> for ElasticNet<F> {
     /// Given an input matrix `X`, with shape `(n_samples, n_features)`,
diff --git a/algorithms/linfa-elasticnet/src/hyperparams.rs b/algorithms/linfa-elasticnet/src/hyperparams.rs
index f8dc9a96d..661c0edf1 100644
--- a/algorithms/linfa-elasticnet/src/hyperparams.rs
+++ b/algorithms/linfa-elasticnet/src/hyperparams.rs
@@ -23,6 +23,14 @@ pub struct ElasticNetValidParams<F> {
     tolerance: F,
 }
 
+pub struct MultiTaskElasticNetValidParams<F> {
+    penalty: F,
+    l1_ratio: F,
+    with_intercept: bool,
+    max_iterations: u32,
+    tolerance: F,
+}
+
 impl<F: Float> ElasticNetValidParams<F> {
     pub fn penalty(&self) -> F {
         self.penalty
@@ -45,6 +53,28 @@ impl<F: Float> ElasticNetValidParams<F> {
     }
 }
 
+impl<F: Float> MultiTaskElasticNetValidParams<F> {
+    pub fn penalty(&self) -> F {
+        self.penalty
+    }
+
+    pub fn l1_ratio(&self) -> F {
+        self.l1_ratio
+    }
+
+    pub fn with_intercept(&self) -> bool {
+        self.with_intercept
+    }
+
+    pub fn max_iterations(&self) -> u32 {
+        self.max_iterations
+    }
+
+    pub fn tolerance(&self) -> F {
+        self.tolerance
+    }
+}
+
 /// A hyper-parameter set during construction
 ///
 /// Configures and minimizes the following objective function:
diff --git a/algorithms/linfa-elasticnet/src/lib.rs b/algorithms/linfa-elasticnet/src/lib.rs
index 94d480cf6..04203000e 100644
--- a/algorithms/linfa-elasticnet/src/lib.rs
+++ b/algorithms/linfa-elasticnet/src/lib.rs
@@ -83,7 +83,7 @@ pub struct MultiTaskElasticNet<F> {
     intercept: Array1<F>,
     duality_gap: F,
     n_steps: u32,
-    variance: Result<Array2<F>>,
+    variance: Result<Array1<F>>,
 }
 
 impl<F: Float> MultiTaskElasticNet<F> {

From 80d9a023e5e86eaede3891c29f15633f283685c0 Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Sat, 22 Jan 2022 16:20:55 +0100
Subject: [PATCH 14/28] added tests for MTL

---
 algorithms/linfa-elasticnet/src/algorithm.rs  | 103 +++++++++++++++++-
 .../linfa-elasticnet/src/hyperparams.rs       |  73 +++++++++++++
 algorithms/linfa-elasticnet/src/lib.rs        |  25 ++---
 3 files changed, 185 insertions(+), 16 deletions(-)

diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index c6b136d00..2a45195e7 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -152,6 +152,28 @@ impl<F: Float, D: Data<Elem = F>> PredictInplace<ArrayBase<D, Ix2>, Array1<F>> f
     }
 }
 
+impl<F: Float, D: Data<Elem = F>> PredictInplace<ArrayBase<D, Ix2>, Array2<F>>
+    for MultiTaskElasticNet<F>
+{
+    /// Given an input matrix `X`, with shape `(n_samples, n_features)`,
+    /// `predict` returns the target variable according to elastic net
+    /// learned from the training data distribution.
+    fn predict_inplace(&self, x: &ArrayBase<D, Ix2>, y: &mut Array2<F>) {
+        assert_eq!(
+            x.nrows(),
+            y.nrows(),
+            "The number of data points must match the number of output targets."
+        );
+
+        general_mat_mul(F::one(), &x, &self.hyperplane, F::one(), y);
+        // *y = *y + self.intercept;
+    }
+
+    fn default_target(&self, x: &ArrayBase<D, Ix2>) -> Array2<F> {
+        Array2::zeros((x.nrows(), x.nrows()))
+    }
+}
+
 /// View the fitted parameters and make predictions with a fitted
 /// elastic net model
 impl<F: Float> ElasticNet<F> {
@@ -546,7 +568,7 @@ pub fn compute_intercept<F: Float>(
 
 #[cfg(test)]
 mod tests {
-    use super::{block_coordinate_descent, coordinate_descent, ElasticNet};
+    use super::{block_coordinate_descent, coordinate_descent, ElasticNet, MultiTaskElasticNet};
     use approx::assert_abs_diff_eq;
     use ndarray::linalg::general_mat_mul;
     use ndarray::{array, s, Array, Array1, Array2, Axis};
@@ -744,6 +766,20 @@ mod tests {
         assert_abs_diff_eq!(model.hyperplane(), &array![0.]);
     }
 
+    #[test]
+    fn mtl_lasso_zero_works() {
+        let dataset = Dataset::from((array![[0.], [0.], [0.]], array![[0.], [0.], [0.]]));
+
+        let model = MultiTaskElasticNet::params()
+            .l1_ratio(1.0)
+            .penalty(0.1)
+            .fit(&dataset)
+            .unwrap();
+
+        assert_abs_diff_eq!(model.intercept(), &array![0.]);
+        assert_abs_diff_eq!(model.hyperplane(), &array![[0.]]);
+    }
+
     #[test]
     fn lasso_toy_example_works() {
         // Test Lasso on a toy example for various values of alpha.
@@ -778,6 +814,71 @@ mod tests {
         assert_abs_diff_eq!(model.duality_gap(), 0.0);
     }
 
+    #[test]
+    fn multitask_lasso_toy_example_works() {
+        // Test MultiTaskLasso on a toy example for various values of alpha.
+        // When validating this against sklearn notice that sklearn divides it
+        // against n_samples.
+        let dataset = Dataset::new(
+            array![[-1.0], [0.0], [1.0]],
+            array![[-1.0, 1.0], [0.0, -1.5], [1.0, 1.3]],
+        );
+
+        // input for prediction
+        let t = array![[2.0], [3.0], [4.0]];
+        let model = MultiTaskElasticNet::lasso()
+            .penalty(1e-8)
+            .fit(&dataset)
+            .unwrap();
+        assert_abs_diff_eq!(model.intercept(), &array![0., -1.5]);
+        assert_abs_diff_eq!(model.hyperplane(), &array![[0.], [2.65]], epsilon = 1e-6);
+        assert_abs_diff_eq!(
+            model.predict(&t),
+            array![[0., 3.79999998], [0., 6.44999996], [0., 9.09999995]],
+            epsilon = 1e-6
+        );
+        assert_abs_diff_eq!(model.duality_gap(), 0.0);
+
+        let model = MultiTaskElasticNet::lasso()
+            .penalty(0.1)
+            .fit(&dataset)
+            .unwrap();
+        assert_abs_diff_eq!(model.intercept(), &array![0., -1.4]);
+        assert_abs_diff_eq!(model.hyperplane(), &array![[0.], [2.5]], epsilon = 1e-6);
+        assert_abs_diff_eq!(
+            model.predict(&t),
+            &array![[0., 3.6], [0., 6.1], [0., 8.6]],
+            epsilon = 1e-6
+        );
+        assert_abs_diff_eq!(model.duality_gap(), 0.0);
+
+        let model = MultiTaskElasticNet::lasso()
+            .penalty(0.5)
+            .fit(&dataset)
+            .unwrap();
+        assert_abs_diff_eq!(model.intercept(), &array![0., -1.]);
+        assert_abs_diff_eq!(model.hyperplane(), &array![[0.], [1.9]], epsilon = 1e-6);
+        assert_abs_diff_eq!(
+            model.predict(&t),
+            &array![[0., 2.8], [0., 4.7], [0., 6.6]],
+            epsilon = 1e-6
+        );
+        assert_abs_diff_eq!(model.duality_gap(), 0.0);
+
+        let model = MultiTaskElasticNet::lasso()
+            .penalty(1.0)
+            .fit(&dataset)
+            .unwrap();
+        assert_abs_diff_eq!(model.intercept(), &array![0., -0.5]);
+        assert_abs_diff_eq!(model.hyperplane(), &array![[0.0], [1.15]], epsilon = 1e-6);
+        assert_abs_diff_eq!(
+            model.predict(&t),
+            &array![[0., 1.8], [0., 2.95], [0., 4.1]],
+            epsilon = 1e-6
+        );
+        assert_abs_diff_eq!(model.duality_gap(), 0.0);
+    }
+
     #[test]
     fn elastic_net_toy_example_works() {
         let dataset = Dataset::new(array![[-1.0], [0.0], [1.0]], array![-1.0, 0.0, 1.0]);
diff --git a/algorithms/linfa-elasticnet/src/hyperparams.rs b/algorithms/linfa-elasticnet/src/hyperparams.rs
index 661c0edf1..1649ed843 100644
--- a/algorithms/linfa-elasticnet/src/hyperparams.rs
+++ b/algorithms/linfa-elasticnet/src/hyperparams.rs
@@ -143,6 +143,7 @@ impl<F: Float> MultiTaskElasticNetValidParams<F> {
 /// # Ok::<(), ElasticNetError>(())
 /// ```
 pub struct ElasticNetParams<F>(ElasticNetValidParams<F>);
+pub struct MultiTaskElasticNetParams<F>(MultiTaskElasticNetValidParams<F>);
 
 impl<F: Float> Default for ElasticNetParams<F> {
     fn default() -> Self {
@@ -150,6 +151,12 @@ impl<F: Float> Default for ElasticNetParams<F> {
     }
 }
 
+impl<F: Float> Default for MultiTaskElasticNetParams<F> {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 /// Configure and fit a Elastic Net model
 impl<F: Float> ElasticNetParams<F> {
     /// Create default elastic net hyper parameters
@@ -216,6 +223,43 @@ impl<F: Float> ElasticNetParams<F> {
     }
 }
 
+impl<F: Float> MultiTaskElasticNetParams<F> {
+    pub fn new() -> MultiTaskElasticNetParams<F> {
+        Self(MultiTaskElasticNetValidParams {
+            penalty: F::one(),
+            l1_ratio: F::cast(0.5),
+            with_intercept: true,
+            max_iterations: 1000,
+            tolerance: F::cast(1e-4),
+        })
+    }
+
+    pub fn penalty(mut self, penalty: F) -> Self {
+        self.0.penalty = penalty;
+        self
+    }
+
+    pub fn l1_ratio(mut self, l1_ratio: F) -> Self {
+        self.0.l1_ratio = l1_ratio;
+        self
+    }
+
+    pub fn with_intercept(mut self, with_intercept: bool) -> Self {
+        self.0.with_intercept = with_intercept;
+        self
+    }
+
+    pub fn tolerance(mut self, tolerance: F) -> Self {
+        self.0.tolerance = tolerance;
+        self
+    }
+
+    pub fn max_iterations(mut self, max_iterations: u32) -> Self {
+        self.0.max_iterations = max_iterations;
+        self
+    }
+}
+
 impl<F: Float> ParamGuard for ElasticNetParams<F> {
     type Checked = ElasticNetValidParams<F>;
     type Error = ElasticNetError;
@@ -244,3 +288,32 @@ impl<F: Float> ParamGuard for ElasticNetParams<F> {
         Ok(self.0)
     }
 }
+
+impl<F: Float> ParamGuard for MultiTaskElasticNetParams<F> {
+    type Checked = MultiTaskElasticNetValidParams<F>;
+    type Error = ElasticNetError;
+
+    /// Validate the hyper parameters
+    fn check_ref(&self) -> Result<&Self::Checked> {
+        if self.0.penalty.is_negative() {
+            Err(ElasticNetError::InvalidPenalty(
+                self.0.penalty.to_f32().unwrap(),
+            ))
+        } else if !(F::zero()..=F::one()).contains(&self.0.l1_ratio) {
+            Err(ElasticNetError::InvalidL1Ratio(
+                self.0.l1_ratio.to_f32().unwrap(),
+            ))
+        } else if self.0.tolerance.is_negative() {
+            Err(ElasticNetError::InvalidTolerance(
+                self.0.tolerance.to_f32().unwrap(),
+            ))
+        } else {
+            Ok(&self.0)
+        }
+    }
+
+    fn check(self) -> Result<Self::Checked> {
+        self.check_ref()?;
+        Ok(self.0)
+    }
+}
diff --git a/algorithms/linfa-elasticnet/src/lib.rs b/algorithms/linfa-elasticnet/src/lib.rs
index 04203000e..3ca803499 100644
--- a/algorithms/linfa-elasticnet/src/lib.rs
+++ b/algorithms/linfa-elasticnet/src/lib.rs
@@ -11,7 +11,10 @@ mod error;
 mod hyperparams;
 
 pub use error::{ElasticNetError, Result};
-pub use hyperparams::{ElasticNetParams, ElasticNetValidParams};
+pub use hyperparams::{
+    ElasticNetParams, ElasticNetValidParams, MultiTaskElasticNetParams,
+    MultiTaskElasticNetValidParams,
+};
 
 #[cfg_attr(
     feature = "serde",
@@ -87,25 +90,17 @@ pub struct MultiTaskElasticNet<F> {
 }
 
 impl<F: Float> MultiTaskElasticNet<F> {
-    /// Create a default parameter set for construction of MultiTaskElasticNet model
-    ///
-    /// By default, an intercept will be fitted. To disable fitting an
-    /// intercept, call `.with_intercept(false)` before calling `.fit()`.
-    ///
-    /// To additionally normalize the feature matrix before fitting, call
-    /// `fit_intercept_and_normalize()` before calling `fit()`. The feature
-    /// matrix will not be normalized by default.
-    pub fn params() -> ElasticNetParams<F> {
-        ElasticNetParams::new()
+    pub fn params() -> MultiTaskElasticNetParams<F> {
+        MultiTaskElasticNetParams::new()
     }
 
     /// Create a multi-task ridge only model
-    pub fn ridge() -> ElasticNetParams<F> {
-        ElasticNetParams::new().l1_ratio(F::zero())
+    pub fn ridge() -> MultiTaskElasticNetParams<F> {
+        MultiTaskElasticNetParams::new().l1_ratio(F::zero())
     }
 
     /// Create a multi-task Lasso only model
-    pub fn lasso() -> ElasticNetParams<F> {
-        ElasticNetParams::new().l1_ratio(F::one())
+    pub fn lasso() -> MultiTaskElasticNetParams<F> {
+        MultiTaskElasticNetParams::new().l1_ratio(F::one())
     }
 }

From 3d981ef42a574d0d667d00373878d6e7356add38 Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Sun, 23 Jan 2022 19:08:42 +0100
Subject: [PATCH 15/28] pass comments

---
 algorithms/linfa-elasticnet/src/algorithm.rs  | 54 +++++++------------
 .../linfa-elasticnet/src/hyperparams.rs       | 30 -----------
 algorithms/linfa-elasticnet/src/lib.rs        | 12 +++--
 src/dataset/impl_targets.rs                   | 14 +----
 src/dataset/mod.rs                            |  8 ---
 5 files changed, 28 insertions(+), 90 deletions(-)

diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index 2a45195e7..3d1a31e37 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -1,5 +1,4 @@
 use approx::{abs_diff_eq, abs_diff_ne};
-use ndarray::linalg::general_mat_mul;
 use ndarray::{
     s, Array1, Array2, ArrayBase, ArrayView1, ArrayView2, Axis, CowArray, Data, Ix1, Ix2,
 };
@@ -7,7 +6,7 @@ use ndarray_linalg::{Inverse, Lapack};
 
 use linfa::traits::{Fit, PredictInplace};
 use linfa::{
-    dataset::{AsTargets, MultiTaskTarget, Records},
+    dataset::{AsTargets, Records},
     DatasetBase, Float,
 };
 
@@ -16,11 +15,10 @@ use super::{
     ElasticNet, ElasticNetError, MultiTaskElasticNet, Result,
 };
 
-impl<F, D, T> Fit<ArrayBase<D, Ix2>, T, ElasticNetError> for ElasticNetValidParams<F>
+impl<F, D> Fit<ArrayBase<D, Ix2>, ArrayBase<D, Ix1>, ElasticNetError> for ElasticNetValidParams<F>
 where
     F: Float + Lapack,
     D: Data<Elem = F>,
-    T: AsTargets<Elem = F>,
 {
     type Object = ElasticNet<F>;
 
@@ -34,7 +32,10 @@ where
     /// Returns a `FittedElasticNet` object which contains the fitted
     /// parameters and can be used to `predict` values of the target variable
     /// for new feature values.
-    fn fit(&self, dataset: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Result<Self::Object> {
+    fn fit(
+        &self,
+        dataset: &DatasetBase<ArrayBase<D, Ix2>, ArrayBase<D, Ix1>>,
+    ) -> Result<Self::Object> {
         let target = dataset.try_single_target()?;
 
         let (intercept, y) = compute_intercept(self.with_intercept(), target);
@@ -62,8 +63,7 @@ where
     }
 }
 
-impl<F, D> Fit<ArrayBase<D, Ix2>, ArrayBase<D, Ix2>, ElasticNetError>
-    for MultiTaskElasticNetValidParams<F>
+impl<F, D> Fit<ArrayBase<D, Ix2>, ArrayBase<D, Ix2>, ElasticNetError> for ElasticNetValidParams<F>
 where
     F: Float + Lapack,
     D: Data<Elem = F>,
@@ -107,19 +107,9 @@ where
             self.penalty(),
         );
 
-        let mut y_est = Array2::<F>::zeros((dataset.nsamples(), dataset.ntargets()));
-        general_mat_mul(
-            F::one(),
-            &dataset.records(),
-            &hyperplane,
-            F::one(),
-            &mut y_est,
-        );
-        // Adding intercept
-        for i in 0..dataset.nsamples() {
-            let _res = &y_est.slice(s![i, ..]) + &intercept;
-            y_est.slice_mut(s![i, ..]).assign(&_res);
-        }
+        let y_est = dataset.records().dot(&hyperplane);
+
+        y_est = y_est + &intercept;
 
         let variance = variance_params_multi_task(dataset, y_est);
 
@@ -165,11 +155,11 @@ impl<F: Float, D: Data<Elem = F>> PredictInplace<ArrayBase<D, Ix2>, Array2<F>>
             "The number of data points must match the number of output targets."
         );
 
-        general_mat_mul(F::one(), &x, &self.hyperplane, F::one(), y);
-        // *y = *y + self.intercept;
+        let y = x.dot(&self.hyperplane) + &self.intercept;
     }
 
     fn default_target(&self, x: &ArrayBase<D, Ix2>) -> Array2<F> {
+        // TODO: fix, should be (x.nrows(), y.ncols())
         Array2::zeros((x.nrows(), x.nrows()))
     }
 }
@@ -315,9 +305,10 @@ fn coordinate_descent<'a, F: Float>(
             let old_w_j = w[j];
             let x_j: ArrayView1<F> = x.slice(s![.., j]);
             if abs_diff_ne!(old_w_j, F::zero()) {
-                for i in 0..x.shape()[0] {
-                    r[i] += x_j[i] * old_w_j;
-                }
+                // for i in 0..x.shape()[0] {
+                //     r[i] += x_j[i] * old_w_j;
+                // }
+                r.scaled_add(old_w_j, &x_j);
             }
             let tmp: F = x_j.dot(&r);
             w[j] = tmp.signum() * F::max(tmp.abs() - n_samples * l1_ratio * penalty, F::zero())
@@ -420,9 +411,7 @@ fn block_soft_thresholding<'a, F: Float>(x: ArrayView1<'a, F>, threshold: F) ->
     let mut _res = Array1::<F>::zeros(x.len());
     if norm_x >= threshold {
         let scal = F::one() - threshold / norm_x;
-        for i in 0..x.len() {
-            _res[i] = scal * x[i];
-        }
+        _res = x * scal;
     }
     _res
 }
@@ -471,9 +460,7 @@ fn duality_gap_mtl<'a, F: Float>(
     let n_tasks = y.shape()[1];
     let l1_reg = l1_ratio * penalty * n_samples;
     let l2_reg = (F::one() - l1_ratio) * penalty * n_samples;
-    let mut xta = Array2::<F>::zeros((n_features, n_tasks));
-    general_mat_mul(F::one(), &x.t(), &r, F::one(), &mut xta);
-    xta = xta - &w * l2_reg;
+    let xta = x.t().dot(&r) - &w * l2_reg;
 
     let dual_norm_xta = xta
         .map_axis(Axis(1), |x| x.dot(&x).sqrt())
@@ -488,7 +475,7 @@ fn duality_gap_mtl<'a, F: Float>(
         (F::one(), r_norm2)
     };
     let mut rty = Array2::<F>::zeros((n_tasks, n_tasks));
-    general_mat_mul(F::one(), &r.t(), &y, F::one(), &mut rty);
+    let rty = r.t().dot(&y);
     let trace_rty = rty.diag().sum();
     let l21_norm = w.map_axis(Axis(1), |wj| (wj.dot(&wj)).sqrt()).sum();
     gap += l1_reg * l21_norm - const_ * trace_rty
@@ -570,7 +557,6 @@ pub fn compute_intercept<F: Float>(
 mod tests {
     use super::{block_coordinate_descent, coordinate_descent, ElasticNet, MultiTaskElasticNet};
     use approx::assert_abs_diff_eq;
-    use ndarray::linalg::general_mat_mul;
     use ndarray::{array, s, Array, Array1, Array2, Axis};
     use ndarray_rand::rand::SeedableRng;
     use ndarray_rand::rand_distr::Uniform;
@@ -624,7 +610,7 @@ mod tests {
         beta: &Array2<f64>,
     ) -> f64 {
         let mut resid = Array2::<f64>::zeros((x.shape()[0], y.shape()[1]));
-        general_mat_mul(1., &x, &beta, 1., &mut resid);
+        let resid = x.dot(&beta);
         resid = &resid * -1.;
         for i in 0..resid.shape()[0] {
             let _res = &resid.slice(s![i, ..]) - intercept;
diff --git a/algorithms/linfa-elasticnet/src/hyperparams.rs b/algorithms/linfa-elasticnet/src/hyperparams.rs
index 1649ed843..236fc596e 100644
--- a/algorithms/linfa-elasticnet/src/hyperparams.rs
+++ b/algorithms/linfa-elasticnet/src/hyperparams.rs
@@ -23,14 +23,6 @@ pub struct ElasticNetValidParams<F> {
     tolerance: F,
 }
 
-pub struct MultiTaskElasticNetValidParams<F> {
-    penalty: F,
-    l1_ratio: F,
-    with_intercept: bool,
-    max_iterations: u32,
-    tolerance: F,
-}
-
 impl<F: Float> ElasticNetValidParams<F> {
     pub fn penalty(&self) -> F {
         self.penalty
@@ -53,28 +45,6 @@ impl<F: Float> ElasticNetValidParams<F> {
     }
 }
 
-impl<F: Float> MultiTaskElasticNetValidParams<F> {
-    pub fn penalty(&self) -> F {
-        self.penalty
-    }
-
-    pub fn l1_ratio(&self) -> F {
-        self.l1_ratio
-    }
-
-    pub fn with_intercept(&self) -> bool {
-        self.with_intercept
-    }
-
-    pub fn max_iterations(&self) -> u32 {
-        self.max_iterations
-    }
-
-    pub fn tolerance(&self) -> F {
-        self.tolerance
-    }
-}
-
 /// A hyper-parameter set during construction
 ///
 /// Configures and minimizes the following objective function:
diff --git a/algorithms/linfa-elasticnet/src/lib.rs b/algorithms/linfa-elasticnet/src/lib.rs
index 3ca803499..d2865093a 100644
--- a/algorithms/linfa-elasticnet/src/lib.rs
+++ b/algorithms/linfa-elasticnet/src/lib.rs
@@ -11,14 +11,11 @@ mod error;
 mod hyperparams;
 
 pub use error::{ElasticNetError, Result};
-pub use hyperparams::{
-    ElasticNetParams, ElasticNetValidParams, MultiTaskElasticNetParams,
-    MultiTaskElasticNetValidParams,
-};
+pub use hyperparams::{ElasticNetParams, ElasticNetValidParams, MultiTaskElasticNetParams};
 
 #[cfg_attr(
     feature = "serde",
-    derive(Serialize, Deserialize),
+    derive(Serialize, Deserialize, Debug, Clone, PartialEq),
     serde(crate = "serde_crate")
 )]
 /// Elastic Net model
@@ -69,6 +66,11 @@ impl<F: Float> ElasticNet<F> {
     }
 }
 
+#[cfg_attr(
+    feature = "serde",
+    derive(Serialize, Deserialize, Debug, Clone, PartialEq),
+    serde(crate = "serde_crate")
+)]
 /// MultiTask Elastic Net model
 ///
 /// This struct contains the parameters of a fitted multi-task elastic net model. This includes the
diff --git a/src/dataset/impl_targets.rs b/src/dataset/impl_targets.rs
index 6949acbd0..a72170e46 100644
--- a/src/dataset/impl_targets.rs
+++ b/src/dataset/impl_targets.rs
@@ -2,7 +2,7 @@ use std::collections::HashMap;
 
 use super::{
     AsProbabilities, AsTargets, AsTargetsMut, CountedTargets, DatasetBase, FromTargetArray, Label,
-    Labels, MultiTaskTarget, Pr, Records,
+    Labels, Pr, Records,
 };
 use ndarray::{
     Array1, Array2, ArrayBase, ArrayView2, ArrayViewMut2, Axis, CowArray, Data, DataMut, Dimension,
@@ -17,18 +17,6 @@ impl<'a, L, S: Data<Elem = L>> AsTargets for ArrayBase<S, Ix1> {
     }
 }
 
-impl<'a, L, S: Data<Elem = L>> MultiTaskTarget for ArrayBase<S, Ix2> {
-    type Elem = L;
-
-    fn nsamples(&self) -> usize {
-        self.shape()[0]
-    }
-
-    fn ntasks(&self) -> usize {
-        self.shape()[1]
-    }
-}
-
 impl<'a, L: Clone + 'a, S: Data<Elem = L>> FromTargetArray<'a, L> for ArrayBase<S, Ix2> {
     type Owned = ArrayBase<OwnedRepr<L>, Ix2>;
     type View = ArrayBase<ViewRepr<&'a L>, Ix2>;
diff --git a/src/dataset/mod.rs b/src/dataset/mod.rs
index f21a45b95..dc19a3cc0 100644
--- a/src/dataset/mod.rs
+++ b/src/dataset/mod.rs
@@ -198,14 +198,6 @@ pub trait Records: Sized {
     fn nfeatures(&self) -> usize;
 }
 
-/// MultiTask trait
-pub trait MultiTaskTarget: Sized {
-    type Elem;
-
-    fn nsamples(&self) -> usize;
-    fn ntasks(&self) -> usize;
-}
-
 /// Return a reference to single or multiple target variables
 pub trait AsTargets {
     type Elem;

From 36f4b70545dceefeb50131fe2b2128ca452ae35b Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Sun, 23 Jan 2022 19:57:26 +0100
Subject: [PATCH 16/28] CLN files

---
 algorithms/linfa-elasticnet/src/algorithm.rs  | 52 ++-----------
 .../linfa-elasticnet/src/hyperparams.rs       | 73 -------------------
 algorithms/linfa-elasticnet/src/lib.rs        | 14 ++--
 3 files changed, 15 insertions(+), 124 deletions(-)

diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index 3d1a31e37..868415cf5 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -11,8 +11,7 @@ use linfa::{
 };
 
 use super::{
-    hyperparams::{ElasticNetValidParams, MultiTaskElasticNetValidParams},
-    ElasticNet, ElasticNetError, MultiTaskElasticNet, Result,
+    hyperparams::ElasticNetValidParams, ElasticNet, ElasticNetError, MultiTaskElasticNet, Result,
 };
 
 impl<F, D> Fit<ArrayBase<D, Ix2>, ArrayBase<D, Ix1>, ElasticNetError> for ElasticNetValidParams<F>
@@ -86,12 +85,12 @@ where
     ) -> Result<Self::Object> {
         let target = dataset.targets();
         let nsamples = dataset.nsamples();
-        let ntasks = target.ntasks();
+        let ntasks = dataset.targets().ncols();
 
         let mut intercept = Array1::<F>::zeros(ntasks);
         let mut y = Array2::<F>::zeros((nsamples, ntasks));
 
-        for t in 0..target.ntasks() {
+        for t in 0..ntasks {
             let (intercept_t, y_t) =
                 compute_intercept(self.with_intercept(), target.slice(s![.., t]));
             intercept[t] = intercept_t;
@@ -107,11 +106,9 @@ where
             self.penalty(),
         );
 
-        let y_est = dataset.records().dot(&hyperplane);
-
-        y_est = y_est + &intercept;
-
-        let variance = variance_params_multi_task(dataset, y_est);
+        // let y_est = dataset.records().dot(&hyperplane) + &intercept;
+        // let variance = variance_params_multi_task(dataset, y_est);
+        let variance = Ok(Array1::<F>::zeros(dataset.nfeatures()));
 
         Ok(MultiTaskElasticNet {
             hyperplane,
@@ -411,7 +408,7 @@ fn block_soft_thresholding<'a, F: Float>(x: ArrayView1<'a, F>, threshold: F) ->
     let mut _res = Array1::<F>::zeros(x.len());
     if norm_x >= threshold {
         let scal = F::one() - threshold / norm_x;
-        _res = x * scal;
+        _res = &x * scal;
     }
     _res
 }
@@ -456,8 +453,6 @@ fn duality_gap_mtl<'a, F: Float>(
 ) -> F {
     let half = F::cast(0.5);
     let n_samples = F::cast(x.shape()[0]);
-    let n_features = x.shape()[1];
-    let n_tasks = y.shape()[1];
     let l1_reg = l1_ratio * penalty * n_samples;
     let l2_reg = (F::one() - l1_ratio) * penalty * n_samples;
     let xta = x.t().dot(&r) - &w * l2_reg;
@@ -474,7 +469,6 @@ fn duality_gap_mtl<'a, F: Float>(
     } else {
         (F::one(), r_norm2)
     };
-    let mut rty = Array2::<F>::zeros((n_tasks, n_tasks));
     let rty = r.t().dot(&y);
     let trace_rty = rty.diag().sum();
     let l21_norm = w.map_axis(Axis(1), |wj| (wj.dot(&wj)).sqrt()).sum();
@@ -508,36 +502,6 @@ fn variance_params<F: Float + Lapack, T: AsTargets<Elem = F>, D: Data<Elem = F>>
     }
 }
 
-fn variance_params_multi_task<F, T, D>(
-    ds: &DatasetBase<ArrayBase<D, Ix2>, T>,
-    y_est: Array2<F>,
-) -> Result<Array1<F>>
-where
-    F: Float + Lapack,
-    T: AsTargets<Elem = F> + MultiTaskTarget<Elem = F>,
-    D: Data<Elem = F>,
-{
-    let nfeatures = ds.nfeatures();
-    let nsamples = ds.nsamples();
-    let ntasks = ds.targets().ntasks();
-
-    let target = ds.targets().as_multi_targets();
-
-    if nsamples < nfeatures + 1 {
-        return Err(ElasticNetError::NotEnoughSamples);
-    }
-
-    let var_target =
-        (&target - &y_est).mapv(|x| x * x).sum() / F::cast(ntasks * (nsamples - nfeatures));
-
-    let inv_cov = ds.records().t().dot(ds.records()).inv();
-
-    match inv_cov {
-        Ok(inv_cov) => Ok(inv_cov.diag().mapv(|x| var_target * x)),
-        Err(_) => Err(ElasticNetError::IllConditioned),
-    }
-}
-
 /// Compute the intercept as the mean of `y` and center `y` if an intercept should
 /// be used, use `0.0` as intercept and leave `y` unchanged otherwise.
 pub fn compute_intercept<F: Float>(
@@ -610,7 +574,7 @@ mod tests {
         beta: &Array2<f64>,
     ) -> f64 {
         let mut resid = Array2::<f64>::zeros((x.shape()[0], y.shape()[1]));
-        let resid = x.dot(&beta);
+        let resid = x.dot(beta);
         resid = &resid * -1.;
         for i in 0..resid.shape()[0] {
             let _res = &resid.slice(s![i, ..]) - intercept;
diff --git a/algorithms/linfa-elasticnet/src/hyperparams.rs b/algorithms/linfa-elasticnet/src/hyperparams.rs
index 236fc596e..f8dc9a96d 100644
--- a/algorithms/linfa-elasticnet/src/hyperparams.rs
+++ b/algorithms/linfa-elasticnet/src/hyperparams.rs
@@ -113,7 +113,6 @@ impl<F: Float> ElasticNetValidParams<F> {
 /// # Ok::<(), ElasticNetError>(())
 /// ```
 pub struct ElasticNetParams<F>(ElasticNetValidParams<F>);
-pub struct MultiTaskElasticNetParams<F>(MultiTaskElasticNetValidParams<F>);
 
 impl<F: Float> Default for ElasticNetParams<F> {
     fn default() -> Self {
@@ -121,12 +120,6 @@ impl<F: Float> Default for ElasticNetParams<F> {
     }
 }
 
-impl<F: Float> Default for MultiTaskElasticNetParams<F> {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
 /// Configure and fit a Elastic Net model
 impl<F: Float> ElasticNetParams<F> {
     /// Create default elastic net hyper parameters
@@ -193,43 +186,6 @@ impl<F: Float> ElasticNetParams<F> {
     }
 }
 
-impl<F: Float> MultiTaskElasticNetParams<F> {
-    pub fn new() -> MultiTaskElasticNetParams<F> {
-        Self(MultiTaskElasticNetValidParams {
-            penalty: F::one(),
-            l1_ratio: F::cast(0.5),
-            with_intercept: true,
-            max_iterations: 1000,
-            tolerance: F::cast(1e-4),
-        })
-    }
-
-    pub fn penalty(mut self, penalty: F) -> Self {
-        self.0.penalty = penalty;
-        self
-    }
-
-    pub fn l1_ratio(mut self, l1_ratio: F) -> Self {
-        self.0.l1_ratio = l1_ratio;
-        self
-    }
-
-    pub fn with_intercept(mut self, with_intercept: bool) -> Self {
-        self.0.with_intercept = with_intercept;
-        self
-    }
-
-    pub fn tolerance(mut self, tolerance: F) -> Self {
-        self.0.tolerance = tolerance;
-        self
-    }
-
-    pub fn max_iterations(mut self, max_iterations: u32) -> Self {
-        self.0.max_iterations = max_iterations;
-        self
-    }
-}
-
 impl<F: Float> ParamGuard for ElasticNetParams<F> {
     type Checked = ElasticNetValidParams<F>;
     type Error = ElasticNetError;
@@ -258,32 +214,3 @@ impl<F: Float> ParamGuard for ElasticNetParams<F> {
         Ok(self.0)
     }
 }
-
-impl<F: Float> ParamGuard for MultiTaskElasticNetParams<F> {
-    type Checked = MultiTaskElasticNetValidParams<F>;
-    type Error = ElasticNetError;
-
-    /// Validate the hyper parameters
-    fn check_ref(&self) -> Result<&Self::Checked> {
-        if self.0.penalty.is_negative() {
-            Err(ElasticNetError::InvalidPenalty(
-                self.0.penalty.to_f32().unwrap(),
-            ))
-        } else if !(F::zero()..=F::one()).contains(&self.0.l1_ratio) {
-            Err(ElasticNetError::InvalidL1Ratio(
-                self.0.l1_ratio.to_f32().unwrap(),
-            ))
-        } else if self.0.tolerance.is_negative() {
-            Err(ElasticNetError::InvalidTolerance(
-                self.0.tolerance.to_f32().unwrap(),
-            ))
-        } else {
-            Ok(&self.0)
-        }
-    }
-
-    fn check(self) -> Result<Self::Checked> {
-        self.check_ref()?;
-        Ok(self.0)
-    }
-}
diff --git a/algorithms/linfa-elasticnet/src/lib.rs b/algorithms/linfa-elasticnet/src/lib.rs
index d2865093a..1ecf13de0 100644
--- a/algorithms/linfa-elasticnet/src/lib.rs
+++ b/algorithms/linfa-elasticnet/src/lib.rs
@@ -11,7 +11,7 @@ mod error;
 mod hyperparams;
 
 pub use error::{ElasticNetError, Result};
-pub use hyperparams::{ElasticNetParams, ElasticNetValidParams, MultiTaskElasticNetParams};
+pub use hyperparams::{ElasticNetParams, ElasticNetValidParams};
 
 #[cfg_attr(
     feature = "serde",
@@ -92,17 +92,17 @@ pub struct MultiTaskElasticNet<F> {
 }
 
 impl<F: Float> MultiTaskElasticNet<F> {
-    pub fn params() -> MultiTaskElasticNetParams<F> {
-        MultiTaskElasticNetParams::new()
+    pub fn params() -> ElasticNetParams<F> {
+        ElasticNetParams::new()
     }
 
     /// Create a multi-task ridge only model
-    pub fn ridge() -> MultiTaskElasticNetParams<F> {
-        MultiTaskElasticNetParams::new().l1_ratio(F::zero())
+    pub fn ridge() -> ElasticNetParams<F> {
+        ElasticNetParams::new().l1_ratio(F::zero())
     }
 
     /// Create a multi-task Lasso only model
-    pub fn lasso() -> MultiTaskElasticNetParams<F> {
-        MultiTaskElasticNetParams::new().l1_ratio(F::one())
+    pub fn lasso() -> ElasticNetParams<F> {
+        ElasticNetParams::new().l1_ratio(F::one())
     }
 }

From 15b280b718e81e995ee02bbdf78557e77e3304ef Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Sun, 23 Jan 2022 21:39:46 +0100
Subject: [PATCH 17/28] cleaner implementation

---
 algorithms/linfa-elasticnet/src/algorithm.rs  | 30 +++++++++----------
 .../linfa-elasticnet/src/hyperparams.rs       | 26 ++++++++++------
 algorithms/linfa-elasticnet/src/lib.rs        | 14 ++++-----
 3 files changed, 38 insertions(+), 32 deletions(-)

diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index 868415cf5..84eadbbdd 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -11,13 +11,15 @@ use linfa::{
 };
 
 use super::{
-    hyperparams::ElasticNetValidParams, ElasticNet, ElasticNetError, MultiTaskElasticNet, Result,
+    hyperparams::{ElasticNetValidParams, MultiTaskElasticNetValidParams},
+    ElasticNet, ElasticNetError, MultiTaskElasticNet, Result,
 };
 
-impl<F, D> Fit<ArrayBase<D, Ix2>, ArrayBase<D, Ix1>, ElasticNetError> for ElasticNetValidParams<F>
+impl<F, D, T> Fit<ArrayBase<D, Ix2>, T, ElasticNetError> for ElasticNetValidParams<F>
 where
     F: Float + Lapack,
     D: Data<Elem = F>,
+    T: AsTargets<Elem = F>,
 {
     type Object = ElasticNet<F>;
 
@@ -31,10 +33,7 @@ where
     /// Returns a `FittedElasticNet` object which contains the fitted
     /// parameters and can be used to `predict` values of the target variable
     /// for new feature values.
-    fn fit(
-        &self,
-        dataset: &DatasetBase<ArrayBase<D, Ix2>, ArrayBase<D, Ix1>>,
-    ) -> Result<Self::Object> {
+    fn fit(&self, dataset: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Result<Self::Object> {
         let target = dataset.try_single_target()?;
 
         let (intercept, y) = compute_intercept(self.with_intercept(), target);
@@ -62,7 +61,8 @@ where
     }
 }
 
-impl<F, D> Fit<ArrayBase<D, Ix2>, ArrayBase<D, Ix2>, ElasticNetError> for ElasticNetValidParams<F>
+impl<F, D> Fit<ArrayBase<D, Ix2>, ArrayBase<D, Ix2>, ElasticNetError>
+    for MultiTaskElasticNetValidParams<F>
 where
     F: Float + Lapack,
     D: Data<Elem = F>,
@@ -152,7 +152,7 @@ impl<F: Float, D: Data<Elem = F>> PredictInplace<ArrayBase<D, Ix2>, Array2<F>>
             "The number of data points must match the number of output targets."
         );
 
-        let y = x.dot(&self.hyperplane) + &self.intercept;
+        *y = x.dot(&self.hyperplane) + &self.intercept;
     }
 
     fn default_target(&self, x: &ArrayBase<D, Ix2>) -> Array2<F> {
@@ -573,14 +573,9 @@ mod tests {
         intercept: &Array1<f64>,
         beta: &Array2<f64>,
     ) -> f64 {
-        let mut resid = Array2::<f64>::zeros((x.shape()[0], y.shape()[1]));
-        let resid = x.dot(beta);
+        let mut resid = x.dot(beta);
         resid = &resid * -1.;
-        for i in 0..resid.shape()[0] {
-            let _res = &resid.slice(s![i, ..]) - intercept;
-            resid.slice_mut(s![i, ..]).assign(&_res);
-        }
-        resid = resid + y;
+        resid = &resid - intercept + y;
         let mut datafit = resid.map(|r| r.powi(2)).sum();
         datafit /= 2.0 * x.shape()[0] as f64;
         datafit
@@ -704,7 +699,10 @@ mod tests {
 
     #[test]
     fn lasso_zero_works() {
-        let dataset = Dataset::from((array![[0.], [0.], [0.]], array![0., 0., 0.]));
+        // let dataset = Dataset::from((array![[0.], [0.], [0.]], array![0., 0., 0.]));
+        let x = array![[0.], [0.], [0.]];
+        let y = array![0., 0., 0.];
+        let dataset = Dataset::from((x, y));
 
         let model = ElasticNet::params()
             .l1_ratio(1.0)
diff --git a/algorithms/linfa-elasticnet/src/hyperparams.rs b/algorithms/linfa-elasticnet/src/hyperparams.rs
index f8dc9a96d..abd0f8e3c 100644
--- a/algorithms/linfa-elasticnet/src/hyperparams.rs
+++ b/algorithms/linfa-elasticnet/src/hyperparams.rs
@@ -15,7 +15,7 @@ use super::Result;
 /// A verified hyper-parameter set ready for the estimation of a ElasticNet regression model
 ///
 /// See [`ElasticNetParams`](crate::ElasticNetParams) for more informations.
-pub struct ElasticNetValidParams<F> {
+pub struct ElasticNetValidParamsBase<F, const MULTI_TASK: bool> {
     penalty: F,
     l1_ratio: F,
     with_intercept: bool,
@@ -23,7 +23,10 @@ pub struct ElasticNetValidParams<F> {
     tolerance: F,
 }
 
-impl<F: Float> ElasticNetValidParams<F> {
+pub type ElasticNetValidParams<F> = ElasticNetValidParamsBase<F, false>;
+pub type MultiTaskElasticNetValidParams<F> = ElasticNetValidParamsBase<F, true>;
+
+impl<F: Float, const MULTI_TASK: bool> ElasticNetValidParamsBase<F, MULTI_TASK> {
     pub fn penalty(&self) -> F {
         self.penalty
     }
@@ -112,16 +115,21 @@ impl<F: Float> ElasticNetValidParams<F> {
 /// let model = checked_params.fit(&ds)?;
 /// # Ok::<(), ElasticNetError>(())
 /// ```
-pub struct ElasticNetParams<F>(ElasticNetValidParams<F>);
+pub struct ElasticNetParamsBase<F, const MULTI_TASK: bool>(
+    ElasticNetValidParamsBase<F, MULTI_TASK>,
+);
+
+pub type ElasticNetParams<F> = ElasticNetParamsBase<F, false>;
+pub type MultiTaskElasticNetParams<F> = ElasticNetParamsBase<F, true>;
 
-impl<F: Float> Default for ElasticNetParams<F> {
+impl<F: Float, const MULTI_TASK: bool> Default for ElasticNetParamsBase<F, MULTI_TASK> {
     fn default() -> Self {
         Self::new()
     }
 }
 
 /// Configure and fit a Elastic Net model
-impl<F: Float> ElasticNetParams<F> {
+impl<F: Float, const MULTI_TASK: bool> ElasticNetParamsBase<F, MULTI_TASK> {
     /// Create default elastic net hyper parameters
     ///
     /// By default, an intercept will be fitted. To disable fitting an
@@ -130,8 +138,8 @@ impl<F: Float> ElasticNetParams<F> {
     /// To additionally normalize the feature matrix before fitting, call
     /// `fit_intercept_and_normalize()` before calling `fit()`. The feature
     /// matrix will not be normalized by default.
-    pub fn new() -> ElasticNetParams<F> {
-        Self(ElasticNetValidParams {
+    pub fn new() -> ElasticNetParamsBase<F, MULTI_TASK> {
+        Self(ElasticNetValidParamsBase {
             penalty: F::one(),
             l1_ratio: F::cast(0.5),
             with_intercept: true,
@@ -186,8 +194,8 @@ impl<F: Float> ElasticNetParams<F> {
     }
 }
 
-impl<F: Float> ParamGuard for ElasticNetParams<F> {
-    type Checked = ElasticNetValidParams<F>;
+impl<F: Float, const MULTI_TASK: bool> ParamGuard for ElasticNetParamsBase<F, MULTI_TASK> {
+    type Checked = ElasticNetValidParamsBase<F, MULTI_TASK>;
     type Error = ElasticNetError;
 
     /// Validate the hyper parameters
diff --git a/algorithms/linfa-elasticnet/src/lib.rs b/algorithms/linfa-elasticnet/src/lib.rs
index 1ecf13de0..d2865093a 100644
--- a/algorithms/linfa-elasticnet/src/lib.rs
+++ b/algorithms/linfa-elasticnet/src/lib.rs
@@ -11,7 +11,7 @@ mod error;
 mod hyperparams;
 
 pub use error::{ElasticNetError, Result};
-pub use hyperparams::{ElasticNetParams, ElasticNetValidParams};
+pub use hyperparams::{ElasticNetParams, ElasticNetValidParams, MultiTaskElasticNetParams};
 
 #[cfg_attr(
     feature = "serde",
@@ -92,17 +92,17 @@ pub struct MultiTaskElasticNet<F> {
 }
 
 impl<F: Float> MultiTaskElasticNet<F> {
-    pub fn params() -> ElasticNetParams<F> {
-        ElasticNetParams::new()
+    pub fn params() -> MultiTaskElasticNetParams<F> {
+        MultiTaskElasticNetParams::new()
     }
 
     /// Create a multi-task ridge only model
-    pub fn ridge() -> ElasticNetParams<F> {
-        ElasticNetParams::new().l1_ratio(F::zero())
+    pub fn ridge() -> MultiTaskElasticNetParams<F> {
+        MultiTaskElasticNetParams::new().l1_ratio(F::zero())
     }
 
     /// Create a multi-task Lasso only model
-    pub fn lasso() -> ElasticNetParams<F> {
-        ElasticNetParams::new().l1_ratio(F::one())
+    pub fn lasso() -> MultiTaskElasticNetParams<F> {
+        MultiTaskElasticNetParams::new().l1_ratio(F::one())
     }
 }

From d81a5cdac265673314cfb225cb27c928ed84261e Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Sun, 23 Jan 2022 21:47:15 +0100
Subject: [PATCH 18/28] cln tests

---
 algorithms/linfa-elasticnet/src/algorithm.rs | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index 84eadbbdd..b7c1c4470 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -699,10 +699,7 @@ mod tests {
 
     #[test]
     fn lasso_zero_works() {
-        // let dataset = Dataset::from((array![[0.], [0.], [0.]], array![0., 0., 0.]));
-        let x = array![[0.], [0.], [0.]];
-        let y = array![0., 0., 0.];
-        let dataset = Dataset::from((x, y));
+        let dataset = Dataset::from((array![[0.], [0.], [0.]], array![0., 0., 0.]));
 
         let model = ElasticNet::params()
             .l1_ratio(1.0)

From a21bc1a2c9c74edc410036783bee7726842c1ecc Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Sat, 19 Feb 2022 19:45:42 +0100
Subject: [PATCH 19/28] changed map into fold

---
 algorithms/linfa-elasticnet/src/algorithm.rs | 38 ++++++++++----------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index b7c1c4470..c09586a82 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -108,6 +108,7 @@ where
 
         // let y_est = dataset.records().dot(&hyperplane) + &intercept;
         // let variance = variance_params_multi_task(dataset, y_est);
+        // TODO: Compute variance
         let variance = Ok(Array1::<F>::zeros(dataset.nfeatures()));
 
         Ok(MultiTaskElasticNet {
@@ -302,18 +303,13 @@ fn coordinate_descent<'a, F: Float>(
             let old_w_j = w[j];
             let x_j: ArrayView1<F> = x.slice(s![.., j]);
             if abs_diff_ne!(old_w_j, F::zero()) {
-                // for i in 0..x.shape()[0] {
-                //     r[i] += x_j[i] * old_w_j;
-                // }
                 r.scaled_add(old_w_j, &x_j);
             }
             let tmp: F = x_j.dot(&r);
             w[j] = tmp.signum() * F::max(tmp.abs() - n_samples * l1_ratio * penalty, F::zero())
                 / (norm_cols_x[j] + n_samples * (F::one() - l1_ratio) * penalty);
             if abs_diff_ne!(w[j], F::zero()) {
-                for i in 0..x.shape()[0] {
-                    r[i] -= x_j[i] * w[j];
-                }
+                r.scaled_add(-w[j], &x_j);
             }
             let d_w_j = (w[j] - old_w_j).abs();
             d_w_max = F::max(d_w_max, d_w_j);
@@ -353,7 +349,7 @@ fn block_coordinate_descent<'a, F: Float>(
     let norm_cols_x = x.map_axis(Axis(0), |col| col.dot(&col));
     let mut gap = F::one() + tol;
     let d_w_tol = tol;
-    let tol = tol * y.map(|yij| yij.powi(2)).sum();
+    let tol = tol * y.fold(F::zero(), |sum, &y_ij| sum + y_ij.powi(2));
     while n_steps < max_steps {
         let mut w_max = F::zero();
         let mut d_w_max = F::zero();
@@ -363,7 +359,9 @@ fn block_coordinate_descent<'a, F: Float>(
             }
             let old_w_j: ArrayView1<F> = w.slice(s![j, ..]);
             let x_j: ArrayView1<F> = x.slice(s![.., j]);
-            let norm_old_w_j = old_w_j.map(|wjt| wjt.powi(2)).sum().sqrt();
+            let norm_old_w_j = old_w_j
+                .fold(F::zero(), |sum, &wjt| sum + wjt.powi(2))
+                .sqrt();
             if abs_diff_ne!(norm_old_w_j, F::zero()) {
                 for i in 0..x.shape()[0] {
                     for t in 0..n_tasks {
@@ -376,7 +374,10 @@ fn block_coordinate_descent<'a, F: Float>(
                 &(block_soft_thresholding(tmp.view(), n_samples * l1_ratio * penalty)
                     / (norm_cols_x[j] + n_samples * (F::one() - l1_ratio) * penalty)),
             );
-            let norm_w_j = w.slice(s![j, ..]).map(|wjt| wjt.powi(2)).sum().sqrt();
+            let norm_w_j = w
+                .slice(s![j, ..])
+                .fold(F::zero(), |sum, &wjt| sum + wjt.powi(2))
+                .sqrt();
             if abs_diff_ne!(norm_w_j, F::zero()) {
                 for i in 0..x.shape()[0] {
                     for t in 0..n_tasks {
@@ -404,13 +405,12 @@ fn block_coordinate_descent<'a, F: Float>(
 }
 
 fn block_soft_thresholding<'a, F: Float>(x: ArrayView1<'a, F>, threshold: F) -> Array1<F> {
-    let norm_x = x.map(|&xi| xi.powi(2)).sum().sqrt();
-    let mut _res = Array1::<F>::zeros(x.len());
-    if norm_x >= threshold {
-        let scal = F::one() - threshold / norm_x;
-        _res = &x * scal;
+    let norm_x = x.fold(F::zero(), |sum, &x_i| sum + x_i * x_i).sqrt();
+    if norm_x < threshold {
+        return Array1::<F>::zeros(x.len());
     }
-    _res
+    let scale = F::one() - threshold / norm_x;
+    &x * scale
 }
 
 fn duality_gap<'a, F: Float>(
@@ -460,8 +460,8 @@ fn duality_gap_mtl<'a, F: Float>(
     let dual_norm_xta = xta
         .map_axis(Axis(1), |x| x.dot(&x).sqrt())
         .fold(F::zero(), |max_norm, &nrm| max_norm.max(nrm));
-    let r_norm2 = r.map(|rij| rij.powi(2)).sum();
-    let w_norm2 = w.map(|wij| wij.powi(2)).sum();
+    let r_norm2 = r.fold(F::zero(), |sum, &rij| sum + rij.powi(2));
+    let w_norm2 = w.fold(F::zero(), |sum, &wij| sum + wij.powi(2));
     let (const_, mut gap) = if dual_norm_xta > l1_reg {
         let const_ = l1_reg / dual_norm_xta;
         let a_norm2 = r_norm2 * const_ * const_;
@@ -576,7 +576,7 @@ mod tests {
         let mut resid = x.dot(beta);
         resid = &resid * -1.;
         resid = &resid - intercept + y;
-        let mut datafit = resid.map(|r| r.powi(2)).sum();
+        let mut datafit = resid.fold(0., |sum, &r| sum + r.powi(2));
         datafit /= 2.0 * x.shape()[0] as f64;
         datafit
     }
@@ -590,7 +590,7 @@ mod tests {
     }
 
     fn elastic_net_mtl_penalty(beta: &Array2<f64>, alpha: f64) -> f64 {
-        let frob_norm = beta.map(|beta_ij| beta_ij.powi(2)).sum();
+        let frob_norm = beta.fold(0., |sum, &beta_ij| sum + beta_ij.powi(2));
         let l21_norm = beta
             .map_axis(Axis(1), |beta_j| (beta_j.dot(&beta_j)).sqrt())
             .sum();

From 7f32afc59fae705dd9979ec9dc76997af7e4534f Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Sat, 19 Feb 2022 20:54:14 +0100
Subject: [PATCH 20/28] added tests for Enet and MTL

---
 algorithms/linfa-elasticnet/src/algorithm.rs | 114 ++++++++++++++++++-
 1 file changed, 110 insertions(+), 4 deletions(-)

diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index c09586a82..97d8f476e 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -769,6 +769,30 @@ mod tests {
             array![[-1.0, 1.0], [0.0, -1.5], [1.0, 1.3]],
         );
 
+        // no intercept fitting
+        let t = array![[2.0], [3.0], [4.0]];
+        let model = MultiTaskElasticNet::lasso()
+            .with_intercept(false)
+            .penalty(0.01)
+            .fit(&dataset)
+            .unwrap();
+        assert_abs_diff_eq!(model.intercept(), &array![0., 0.]);
+        assert_abs_diff_eq!(
+            model.hyperplane(),
+            &array![[0.9851659, 0.1477748]],
+            epsilon = 1e-6
+        );
+        assert_abs_diff_eq!(
+            model.predict(&t),
+            array![
+                [1.9703319, 0.2955497],
+                [2.9554978, 0.4433246],
+                [3.9406638, 0.5910995]
+            ],
+            epsilon = 1e-6
+        );
+        assert_abs_diff_eq!(model.duality_gap(), 0.0);
+
         // input for prediction
         let t = array![[2.0], [3.0], [4.0]];
         let model = MultiTaskElasticNet::lasso()
@@ -776,7 +800,7 @@ mod tests {
             .fit(&dataset)
             .unwrap();
         assert_abs_diff_eq!(model.intercept(), &array![0., -1.5]);
-        assert_abs_diff_eq!(model.hyperplane(), &array![[0.], [2.65]], epsilon = 1e-6);
+        assert_abs_diff_eq!(model.hyperplane(), &array![[0., 2.65]], epsilon = 1e-6);
         assert_abs_diff_eq!(
             model.predict(&t),
             array![[0., 3.79999998], [0., 6.44999996], [0., 9.09999995]],
@@ -789,7 +813,7 @@ mod tests {
             .fit(&dataset)
             .unwrap();
         assert_abs_diff_eq!(model.intercept(), &array![0., -1.4]);
-        assert_abs_diff_eq!(model.hyperplane(), &array![[0.], [2.5]], epsilon = 1e-6);
+        assert_abs_diff_eq!(model.hyperplane(), &array![[0., 2.5]], epsilon = 1e-6);
         assert_abs_diff_eq!(
             model.predict(&t),
             &array![[0., 3.6], [0., 6.1], [0., 8.6]],
@@ -802,7 +826,7 @@ mod tests {
             .fit(&dataset)
             .unwrap();
         assert_abs_diff_eq!(model.intercept(), &array![0., -1.]);
-        assert_abs_diff_eq!(model.hyperplane(), &array![[0.], [1.9]], epsilon = 1e-6);
+        assert_abs_diff_eq!(model.hyperplane(), &array![[0., 1.9]], epsilon = 1e-6);
         assert_abs_diff_eq!(
             model.predict(&t),
             &array![[0., 2.8], [0., 4.7], [0., 6.6]],
@@ -815,7 +839,7 @@ mod tests {
             .fit(&dataset)
             .unwrap();
         assert_abs_diff_eq!(model.intercept(), &array![0., -0.5]);
-        assert_abs_diff_eq!(model.hyperplane(), &array![[0.0], [1.15]], epsilon = 1e-6);
+        assert_abs_diff_eq!(model.hyperplane(), &array![[0.0, 1.15]], epsilon = 1e-6);
         assert_abs_diff_eq!(
             model.predict(&t),
             &array![[0., 1.8], [0., 2.95], [0., 4.1]],
@@ -861,6 +885,88 @@ mod tests {
         assert_abs_diff_eq!(model.duality_gap(), 0.0);
     }
 
+    #[test]
+    fn multitask_elasticnet_toy_example_works() {
+        // Test MultiTaskElasticNet on a toy example for various values of alpha
+        // and l1_ratio. When validating this against sklearn notice that sklearn
+        // divides it against n_samples.
+        let dataset = Dataset::new(
+            array![[-1.0], [0.0], [1.0]],
+            array![[-1.0, 1.0], [0.0, -1.5], [1.0, 1.3]],
+        );
+
+        // no intercept fitting
+        let t = array![[2.0], [3.0], [4.0]];
+        let model = MultiTaskElasticNet::params()
+            .with_intercept(false)
+            .l1_ratio(0.3)
+            .penalty(0.1)
+            .fit(&dataset)
+            .unwrap();
+        assert_abs_diff_eq!(model.intercept(), &array![0., 0.]);
+        assert_abs_diff_eq!(
+            model.hyperplane(),
+            &array![[0.86470395, 0.12970559]],
+            epsilon = 1e-6
+        );
+        assert_abs_diff_eq!(
+            model.predict(&t),
+            array![
+                [1.7294079, 0.25941118],
+                [2.59411185, 0.38911678],
+                [3.4588158, 0.51882237]
+            ],
+            epsilon = 1e-6
+        );
+        assert_abs_diff_eq!(model.duality_gap(), 0.0, epsilon = 1e-12);
+
+        // input for prediction
+        let t = array![[2.0], [3.0], [4.0]];
+        let model = MultiTaskElasticNet::params()
+            .l1_ratio(0.3)
+            .penalty(0.1)
+            .fit(&dataset)
+            .unwrap();
+        assert_abs_diff_eq!(model.intercept(), &array![0., 0.26666666], epsilon = 1e-6);
+        assert_abs_diff_eq!(
+            model.hyperplane(),
+            &array![[0.86470395, 0.12970559]],
+            epsilon = 1e-6
+        );
+        assert_abs_diff_eq!(
+            model.predict(&t),
+            array![
+                [1.7294079, 0.52607785],
+                [2.59411185, 0.65578344],
+                [3.4588158, 0.78548904]
+            ],
+            epsilon = 1e-6
+        );
+        assert_abs_diff_eq!(model.duality_gap(), 0.0, epsilon = 1e-12);
+
+        let model = MultiTaskElasticNet::params()
+            .l1_ratio(0.5)
+            .penalty(0.1)
+            .fit(&dataset)
+            .unwrap();
+        assert_abs_diff_eq!(model.intercept(), &array![0., 0.2666666], epsilon = 1e-6);
+        assert_abs_diff_eq!(
+            model.hyperplane(),
+            &array![[0.861237, 0.12918555]],
+            epsilon = 1e-6
+        );
+        assert_abs_diff_eq!(
+            model.predict(&t),
+            &array![
+                [1.722474, 0.52503777],
+                [2.583711, 0.65422332],
+                [3.44494799, 0.78340887]
+            ],
+            epsilon = 1e-6
+        );
+        assert_abs_diff_eq!(model.duality_gap(), 0.0, epsilon = 1e-12);
+    }
+
     #[test]
     fn elastic_net_2d_toy_example_works() {
         let dataset = Dataset::new(array![[1.0, 0.0], [0.0, 1.0]], array![3.0, 2.0]);

From 6c56e06764309237ae3673fb9a6ff1a00b48462a Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Sat, 19 Feb 2022 22:18:31 +0100
Subject: [PATCH 21/28] added incorrect target shape

---
 algorithms/linfa-elasticnet/src/error.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/algorithms/linfa-elasticnet/src/error.rs b/algorithms/linfa-elasticnet/src/error.rs
index 9b44f258c..30fbe566b 100644
--- a/algorithms/linfa-elasticnet/src/error.rs
+++ b/algorithms/linfa-elasticnet/src/error.rs
@@ -25,6 +25,8 @@ pub enum ElasticNetError {
     InvalidPenalty(f32),
     #[error("invalid tolerance {0}")]
     InvalidTolerance(f32),
+    #[error("the target can either be a vector (ndim=1) or a matrix (ndim=2)")]
+    IncorrectTargetShape,
     #[error(transparent)]
     BaseCrate(#[from] linfa::Error),
 }

From a07fb39a100b0437d188865d5af0fc0a3b8882b2 Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Sat, 19 Feb 2022 22:19:01 +0100
Subject: [PATCH 22/28] WIP: made variance params generic over the number of
 tasks

---
 algorithms/linfa-elasticnet/src/algorithm.rs | 65 ++++++++------------
 1 file changed, 26 insertions(+), 39 deletions(-)

diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index 97d8f476e..57ae31379 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -1,6 +1,6 @@
 use approx::{abs_diff_eq, abs_diff_ne};
 use ndarray::{
-    s, Array1, Array2, ArrayBase, ArrayView1, ArrayView2, Axis, CowArray, Data, Ix1, Ix2,
+    s, Array1, Array2, ArrayBase, ArrayView1, ArrayView2, Axis, CowArray, Data, Dimension, Ix1, Ix2,
 };
 use ndarray_linalg::{Inverse, Lapack};
 
@@ -106,10 +106,8 @@ where
             self.penalty(),
         );
 
-        // let y_est = dataset.records().dot(&hyperplane) + &intercept;
-        // let variance = variance_params_multi_task(dataset, y_est);
-        // TODO: Compute variance
-        let variance = Ok(Array1::<F>::zeros(dataset.nfeatures()));
+        let y_est = dataset.records().dot(&hyperplane) + &intercept;
+        let variance = variance_params(dataset, y_est);
 
         Ok(MultiTaskElasticNet {
             hyperplane,
@@ -242,34 +240,13 @@ impl<F: Float> MultiTaskElasticNet<F> {
     }
 
     /// Calculate the Z score
-    pub fn z_score(&self) -> Result<Array1<F>> {
-        self.variance
-            .as_ref()
-            .map(|variance| {
-                self.hyperplane
-                    .iter()
-                    .zip(variance.iter())
-                    .map(|(a, b)| *a / b.sqrt())
-                    .collect()
-            })
-            .map_err(|err| err.clone())
+    pub fn z_score(&self) -> Result<Array2<F>> {
+        // TODO
     }
 
     /// Calculate the confidence level
-    pub fn confidence_95th(&self) -> Result<Array1<(F, F)>> {
-        // the 95th percentile of our confidence level
-        let p = F::cast(1.645);
-
-        self.variance
-            .as_ref()
-            .map(|variance| {
-                self.hyperplane
-                    .iter()
-                    .zip(variance.iter())
-                    .map(|(a, b)| (*a - p * b.sqrt(), *a + p * b.sqrt()))
-                    .collect()
-            })
-            .map_err(|err| err.clone())
+    pub fn confidence_95th(&self) -> Result<Array2<(F, F)>> {
+        // TODO
     }
 }
 
@@ -364,9 +341,7 @@ fn block_coordinate_descent<'a, F: Float>(
                 .sqrt();
             if abs_diff_ne!(norm_old_w_j, F::zero()) {
                 for i in 0..x.shape()[0] {
-                    for t in 0..n_tasks {
-                        r[[i, t]] += x_j[i] * old_w_j[t];
-                    }
+                    r.slice_mut(s![i, ..]).scaled_add(x_j[i], &old_w_j);
                 }
             }
             let tmp = x_j.dot(&r);
@@ -477,22 +452,34 @@ fn duality_gap_mtl<'a, F: Float>(
     gap
 }
 
-fn variance_params<F: Float + Lapack, T: AsTargets<Elem = F>, D: Data<Elem = F>>(
-    ds: &DatasetBase<ArrayBase<D, Ix2>, T>,
-    y_est: Array1<F>,
+fn variance_params<F: Float + Lapack, T: AsTargets<Elem = F>, D: Data<Elem = F>, I: Dimension>(
+    ds: &DatasetBase<ArrayBase<D, Ix2>, ArrayBase<D, I>>,
+    y_est: ArrayBase<D, I>,
 ) -> Result<Array1<F>> {
     let nfeatures = ds.nfeatures();
     let nsamples = ds.nsamples();
 
-    // try to convert targets into a single target
-    let target = ds.try_single_target()?;
+    let target = ds.targets();
+    let ndim = target.ndim();
+
+    let ntasks: usize = match ndim {
+        1 => 1,
+        2 => *target.shape().last().unwrap(),
+        _ => 0,
+    };
+
+    // check that the dimension of the target is either a vector or a matrix
+    if ntasks == 0 {
+        return Err(ElasticNetError::IncorrectTargetShape);
+    }
 
     // check that we have enough samples
     if nsamples < nfeatures + 1 {
         return Err(ElasticNetError::NotEnoughSamples);
     }
 
-    let var_target = (&target - &y_est).mapv(|x| x * x).sum() / F::cast(nsamples - nfeatures);
+    let var_target =
+        (target - &y_est).mapv(|x| x * x).sum() / F::cast(ntasks * (nsamples - nfeatures));
 
     let inv_cov = ds.records().t().dot(ds.records()).inv();
 

From 75023d957846f8017218b13e14e792793d535a81 Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Sun, 20 Feb 2022 13:27:04 +0100
Subject: [PATCH 23/28] added z_score and confidence_95th for MTL

---
 algorithms/linfa-elasticnet/src/algorithm.rs | 29 ++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index 57ae31379..328c7768e 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -241,12 +241,37 @@ impl<F: Float> MultiTaskElasticNet<F> {
 
     /// Calculate the Z score
     pub fn z_score(&self) -> Result<Array2<F>> {
-        // TODO
+        self.variance
+            .as_ref()
+            .map(|variance| {
+                self.hyperplane
+                    .axis_iter(Axis(0))
+                    .zip(variance.iter())
+                    .map(|(a, b)| a.to_owned() / b.sqrt())
+                    .collect()
+            })
+            .map_err(|err| err.clone())
     }
 
     /// Calculate the confidence level
     pub fn confidence_95th(&self) -> Result<Array2<(F, F)>> {
-        // TODO
+        // the 95th percentile of our confidence level
+        let p = F::cast(1.645);
+
+        self.variance
+            .as_ref()
+            .map(|variance| {
+                self.hyperplane
+                    .axis_iter(Axis(1))
+                    .map(|coef| {
+                        coef.iter()
+                            .zip(variance.iter())
+                            .map(|(a, b)| (*a - p * b.sqrt(), *a + p * b.sqrt()))
+                            .collect()
+                    })
+                    .collect()
+            })
+            .map_err(|err| err.clone())
     }
 }
 

From ba3a574802bd79c18057784e22801e61dbc0c277 Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Sun, 20 Feb 2022 14:35:21 +0100
Subject: [PATCH 24/28] map instead of fold

---
 algorithms/linfa-elasticnet/src/algorithm.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index 328c7768e..9b4fc563b 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -351,7 +351,7 @@ fn block_coordinate_descent<'a, F: Float>(
     let norm_cols_x = x.map_axis(Axis(0), |col| col.dot(&col));
     let mut gap = F::one() + tol;
     let d_w_tol = tol;
-    let tol = tol * y.fold(F::zero(), |sum, &y_ij| sum + y_ij.powi(2));
+    let tol = tol * y.iter().map(|&y_ij| y_ij * y_ij).sum();
     while n_steps < max_steps {
         let mut w_max = F::zero();
         let mut d_w_max = F::zero();

From 114fc038ff829f6975fd615c25af26bd9e4702b4 Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Sun, 20 Feb 2022 21:12:00 +0100
Subject: [PATCH 25/28] fix confidence interval and z-score

---
 algorithms/linfa-elasticnet/src/algorithm.rs | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index 9b4fc563b..dfd94ea30 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -244,11 +244,9 @@ impl<F: Float> MultiTaskElasticNet<F> {
         self.variance
             .as_ref()
             .map(|variance| {
-                self.hyperplane
-                    .axis_iter(Axis(0))
-                    .zip(variance.iter())
-                    .map(|(a, b)| a.to_owned() / b.sqrt())
-                    .collect()
+                ndarray::Zip::from(&self.hyperplane)
+                    .and_broadcast(variance)
+                    .map_collect(|a, b| *a / b.sqrt())
             })
             .map_err(|err| err.clone())
     }
@@ -261,15 +259,9 @@ impl<F: Float> MultiTaskElasticNet<F> {
         self.variance
             .as_ref()
             .map(|variance| {
-                self.hyperplane
-                    .axis_iter(Axis(1))
-                    .map(|coef| {
-                        coef.iter()
-                            .zip(variance.iter())
-                            .map(|(a, b)| (*a - p * b.sqrt(), *a + p * b.sqrt()))
-                            .collect()
-                    })
-                    .collect()
+                ndarray::Zip::from(&self.hyperplane)
+                    .and_broadcast(variance)
+                    .map_collect(|a, b| (*a - p * b.sqrt(), *a + p * b.sqrt()))
             })
             .map_err(|err| err.clone())
     }

From 30744c0a037d8c904e07e8ac3251a7c24ba2937b Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Mon, 21 Feb 2022 11:09:25 +0100
Subject: [PATCH 26/28] converted back fold to map

---
 algorithms/linfa-elasticnet/src/algorithm.rs | 25 +++++++++-----------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index dfd94ea30..0961f831c 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -107,6 +107,8 @@ where
         );
 
         let y_est = dataset.records().dot(&hyperplane) + &intercept;
+
+        // try to calculate the variance
         let variance = variance_params(dataset, y_est);
 
         Ok(MultiTaskElasticNet {
@@ -353,9 +355,7 @@ fn block_coordinate_descent<'a, F: Float>(
             }
             let old_w_j: ArrayView1<F> = w.slice(s![j, ..]);
             let x_j: ArrayView1<F> = x.slice(s![.., j]);
-            let norm_old_w_j = old_w_j
-                .fold(F::zero(), |sum, &wjt| sum + wjt.powi(2))
-                .sqrt();
+            let norm_old_w_j = old_w_j.map(|wjt| wjt.powi(2)).sum().sqrt();
             if abs_diff_ne!(norm_old_w_j, F::zero()) {
                 for i in 0..x.shape()[0] {
                     r.slice_mut(s![i, ..]).scaled_add(x_j[i], &old_w_j);
@@ -366,10 +366,7 @@ fn block_coordinate_descent<'a, F: Float>(
                 &(block_soft_thresholding(tmp.view(), n_samples * l1_ratio * penalty)
                     / (norm_cols_x[j] + n_samples * (F::one() - l1_ratio) * penalty)),
             );
-            let norm_w_j = w
-                .slice(s![j, ..])
-                .fold(F::zero(), |sum, &wjt| sum + wjt.powi(2))
-                .sqrt();
+            let norm_w_j = w.slice(s![j, ..]).map(|wjt| wjt.powi(2)).sum().sqrt();
             if abs_diff_ne!(norm_w_j, F::zero()) {
                 for i in 0..x.shape()[0] {
                     for t in 0..n_tasks {
@@ -384,7 +381,7 @@ fn block_coordinate_descent<'a, F: Float>(
         n_steps += 1;
 
         if n_steps == max_steps - 1 || abs_diff_eq!(w_max, F::zero()) || d_w_max / w_max < d_w_tol {
-            // We've hit one potentiel stopping criteria
+            // We've hit one potential stopping criteria
             // check duality gap for ultimate stopping criterion
             gap = duality_gap_mtl(x.view(), y.view(), w.view(), r.view(), l1_ratio, penalty);
             if gap < tol {
@@ -397,7 +394,7 @@ fn block_coordinate_descent<'a, F: Float>(
 }
 
 fn block_soft_thresholding<'a, F: Float>(x: ArrayView1<'a, F>, threshold: F) -> Array1<F> {
-    let norm_x = x.fold(F::zero(), |sum, &x_i| sum + x_i * x_i).sqrt();
+    let norm_x = x.map(|x| x.powi(2)).sum().sqrt();
     if norm_x < threshold {
         return Array1::<F>::zeros(x.len());
     }
@@ -429,7 +426,7 @@ fn duality_gap<'a, F: Float>(
     } else {
         (F::one(), r_norm2)
     };
-    let l1_norm = w.fold(F::zero(), |sum, w_i| sum + w_i.abs());
+    let l1_norm = w.map(|w_i| w_i.abs()).sum();
     gap += l1_reg * l1_norm - const_ * r.dot(&y)
         + half * l2_reg * (F::one() + const_ * const_) * w_norm2;
     gap
@@ -452,8 +449,8 @@ fn duality_gap_mtl<'a, F: Float>(
     let dual_norm_xta = xta
         .map_axis(Axis(1), |x| x.dot(&x).sqrt())
         .fold(F::zero(), |max_norm, &nrm| max_norm.max(nrm));
-    let r_norm2 = r.fold(F::zero(), |sum, &rij| sum + rij.powi(2));
-    let w_norm2 = w.fold(F::zero(), |sum, &wij| sum + wij.powi(2));
+    let r_norm2 = r.map(|rij| rij.powi(2)).sum();
+    let w_norm2 = w.map(|wij| wij.powi(2)).sum();
     let (const_, mut gap) = if dual_norm_xta > l1_reg {
         let const_ = l1_reg / dual_norm_xta;
         let a_norm2 = r_norm2 * const_ * const_;
@@ -580,7 +577,7 @@ mod tests {
         let mut resid = x.dot(beta);
         resid = &resid * -1.;
         resid = &resid - intercept + y;
-        let mut datafit = resid.fold(0., |sum, &r| sum + r.powi(2));
+        let mut datafit = resid.map(|rij| rij.powi(2)).sum();
         datafit /= 2.0 * x.shape()[0] as f64;
         datafit
     }
@@ -594,7 +591,7 @@ mod tests {
     }
 
     fn elastic_net_mtl_penalty(beta: &Array2<f64>, alpha: f64) -> f64 {
-        let frob_norm = beta.fold(0., |sum, &beta_ij| sum + beta_ij.powi(2));
+        let frob_norm = beta.map(|beta_ij| beta_ij.powi(2)).sum();
         let l21_norm = beta
             .map_axis(Axis(1), |beta_j| (beta_j.dot(&beta_j)).sqrt())
             .sum();

From 3b5f37dac97be59bc1e77cd5dec4bbb85767e443 Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Thu, 3 Mar 2022 19:57:05 +0100
Subject: [PATCH 27/28] pass comments

---
 algorithms/linfa-elasticnet/src/algorithm.rs | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index 0961f831c..2ae3887a4 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -355,7 +355,7 @@ fn block_coordinate_descent<'a, F: Float>(
             }
             let old_w_j: ArrayView1<F> = w.slice(s![j, ..]);
             let x_j: ArrayView1<F> = x.slice(s![.., j]);
-            let norm_old_w_j = old_w_j.map(|wjt| wjt.powi(2)).sum().sqrt();
+            let norm_old_w_j = old_w_j.dot(&old_w_j).sqrt();
             if abs_diff_ne!(norm_old_w_j, F::zero()) {
                 for i in 0..x.shape()[0] {
                     r.slice_mut(s![i, ..]).scaled_add(x_j[i], &old_w_j);
@@ -366,7 +366,7 @@ fn block_coordinate_descent<'a, F: Float>(
                 &(block_soft_thresholding(tmp.view(), n_samples * l1_ratio * penalty)
                     / (norm_cols_x[j] + n_samples * (F::one() - l1_ratio) * penalty)),
             );
-            let norm_w_j = w.slice(s![j, ..]).map(|wjt| wjt.powi(2)).sum().sqrt();
+            let norm_w_j = w.slice(s![j, ..]).dot(&w.slice(s![j, ..])).sqrt();
             if abs_diff_ne!(norm_w_j, F::zero()) {
                 for i in 0..x.shape()[0] {
                     for t in 0..n_tasks {
@@ -394,7 +394,7 @@ fn block_coordinate_descent<'a, F: Float>(
 }
 
 fn block_soft_thresholding<'a, F: Float>(x: ArrayView1<'a, F>, threshold: F) -> Array1<F> {
-    let norm_x = x.map(|x| x.powi(2)).sum().sqrt();
+    let norm_x = x.dot(&x).sqrt();
     if norm_x < threshold {
         return Array1::<F>::zeros(x.len());
     }
@@ -426,7 +426,7 @@ fn duality_gap<'a, F: Float>(
     } else {
         (F::one(), r_norm2)
     };
-    let l1_norm = w.map(|w_i| w_i.abs()).sum();
+    let l1_norm = w.iter().map(|w_i| w_i.abs()).sum();
     gap += l1_reg * l1_norm - const_ * r.dot(&y)
         + half * l2_reg * (F::one() + const_ * const_) * w_norm2;
     gap
@@ -449,8 +449,8 @@ fn duality_gap_mtl<'a, F: Float>(
     let dual_norm_xta = xta
         .map_axis(Axis(1), |x| x.dot(&x).sqrt())
         .fold(F::zero(), |max_norm, &nrm| max_norm.max(nrm));
-    let r_norm2 = r.map(|rij| rij.powi(2)).sum();
-    let w_norm2 = w.map(|wij| wij.powi(2)).sum();
+    let r_norm2 = r.iter().map(|rij| rij.powi(2)).sum();
+    let w_norm2 = w.iter().map(|wij| wij.powi(2)).sum();
     let (const_, mut gap) = if dual_norm_xta > l1_reg {
         let const_ = l1_reg / dual_norm_xta;
         let a_norm2 = r_norm2 * const_ * const_;
@@ -577,7 +577,7 @@ mod tests {
         let mut resid = x.dot(beta);
         resid = &resid * -1.;
         resid = &resid - intercept + y;
-        let mut datafit = resid.map(|rij| rij.powi(2)).sum();
+        let mut datafit = resid.iter().map(|rij| rij.powi(2)).sum();
         datafit /= 2.0 * x.shape()[0] as f64;
         datafit
     }
@@ -591,7 +591,7 @@ mod tests {
     }
 
     fn elastic_net_mtl_penalty(beta: &Array2<f64>, alpha: f64) -> f64 {
-        let frob_norm = beta.map(|beta_ij| beta_ij.powi(2)).sum();
+        let frob_norm: f64 = beta.iter().map(|beta_ij| beta_ij.powi(2)).sum();
         let l21_norm = beta
             .map_axis(Axis(1), |beta_j| (beta_j.dot(&beta_j)).sqrt())
             .sum();

From bc1ad06b60d21b5fb4646e0fc861d05ce9bdeef3 Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Sun, 20 Mar 2022 14:22:08 +0100
Subject: [PATCH 28/28] WIP make compute_variance generic over the dimension

---
 algorithms/linfa-elasticnet/src/algorithm.rs | 44 ++++++++++----------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index 5e1f615a5..ee759144c 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -1,10 +1,14 @@
 use approx::{abs_diff_eq, abs_diff_ne};
-use linfa::dataset::AsSingleTargets;
-use ndarray::{s, Array1, ArrayBase, ArrayView1, ArrayView2, Axis, CowArray, Data, Ix1, Ix2};
+use ndarray::{
+    s, Array1, Array2, ArrayBase, ArrayView1, ArrayView2, Axis, CowArray, Data, Dimension, Ix1, Ix2,
+};
 use ndarray_linalg::{Inverse, Lapack};
 
 use linfa::traits::{Fit, PredictInplace};
-use linfa::{dataset::Records, DatasetBase, Float};
+use linfa::{
+    dataset::{AsMultiTargets, AsSingleTargets, AsTargets, Records},
+    DatasetBase, Float,
+};
 
 use super::{
     hyperparams::{ElasticNetValidParams, MultiTaskElasticNetValidParams},
@@ -57,10 +61,10 @@ where
     }
 }
 
-impl<F, D> Fit<ArrayBase<D, Ix2>, ArrayBase<D, Ix2>, ElasticNetError>
-    for MultiTaskElasticNetValidParams<F>
+impl<F, D, T> Fit<ArrayBase<D, Ix2>, T, ElasticNetError> for MultiTaskElasticNetValidParams<F>
 where
     F: Float + Lapack,
+    T: AsMultiTargets<Elem = F>,
     D: Data<Elem = F>,
 {
     type Object = MultiTaskElasticNet<F>;
@@ -75,20 +79,17 @@ where
     /// Returns a `FittedMultiTaskElasticNet` object which contains the fitted
     /// parameters and can be used to `predict` values of the target variables
     /// for new feature values.
-    fn fit(
-        &self,
-        dataset: &DatasetBase<ArrayBase<D, Ix2>, ArrayBase<D, Ix2>>,
-    ) -> Result<Self::Object> {
-        let target = dataset.targets();
+    fn fit(&self, dataset: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Result<Self::Object> {
+        let targets = dataset.targets().as_multi_targets();
         let nsamples = dataset.nsamples();
-        let ntasks = dataset.targets().ncols();
+        let ntasks = targets.ncols();
 
         let mut intercept = Array1::<F>::zeros(ntasks);
         let mut y = Array2::<F>::zeros((nsamples, ntasks));
 
         for t in 0..ntasks {
             let (intercept_t, y_t) =
-                compute_intercept(self.with_intercept(), target.slice(s![.., t]));
+                compute_intercept(self.with_intercept(), targets.slice(s![.., t]));
             intercept[t] = intercept_t;
             y.slice_mut(s![.., t]).assign(&y_t);
         }
@@ -462,26 +463,25 @@ fn duality_gap_mtl<'a, F: Float>(
     gap
 }
 
-fn variance_params<F: Float + Lapack, T: AsTargets<Elem = F>, D: Data<Elem = F>, I: Dimension>(
-    ds: &DatasetBase<ArrayBase<D, Ix2>, ArrayBase<D, I>>,
-    y_est: ArrayBase<D, I>,
+fn variance_params<F: Float + Lapack, T: AsTargets<Elem = F>, D: Data<Elem = F>>(
+    ds: &DatasetBase<ArrayBase<D, Ix2>, T>,
+    y_est: T,
 ) -> Result<Array1<F>> {
     let nfeatures = ds.nfeatures();
     let nsamples = ds.nsamples();
 
-    let target = ds.targets();
+    let target = ds.targets().as_targets();
     let ndim = target.ndim();
 
     let ntasks: usize = match ndim {
         1 => 1,
         2 => *target.shape().last().unwrap(),
-        _ => 0,
+        _ => {
+            return Err(ElasticNetError::IncorrectTargetShape);
+        }
     };
 
-    // check that the dimension of the target is either a vector or a matrix
-    if ntasks == 0 {
-        return Err(ElasticNetError::IncorrectTargetShape);
-    }
+    let y_est = y_est.as_targets();
 
     // check that we have enough samples
     if nsamples < nfeatures + 1 {
@@ -489,7 +489,7 @@ fn variance_params<F: Float + Lapack, T: AsTargets<Elem = F>, D: Data<Elem = F>,
     }
 
     let var_target =
-        (target - &y_est).mapv(|x| x * x).sum() / F::cast(ntasks * (nsamples - nfeatures));
+        (&target - &y_est).mapv(|x| x * x).sum() / F::cast(ntasks * (nsamples - nfeatures));
 
     let inv_cov = ds.records().t().dot(ds.records()).inv();