Skip to content

Commit

Permalink
Merge pull request #113 from lbluque/paper
Browse files Browse the repository at this point in the history
paper edits
  • Loading branch information
lbluque authored Oct 24, 2023
2 parents 1a9faa5 + 8cd5a01 commit 89bcdbe
Show file tree
Hide file tree
Showing 2 changed files with 138 additions and 33 deletions.
116 changes: 104 additions & 12 deletions paper/paper.bib
Original file line number Diff line number Diff line change
Expand Up @@ -230,18 +230,63 @@ @article{Zhu:2022
url = {http://jmlr.org/papers/v23/21-1060.html}
}

@article{Barroso-Luque:2022,
title = {Cluster Expansions of Multicomponent Ionic Materials: {{Formalism}} and Methodology},
shorttitle = {Cluster Expansions of Multicomponent Ionic Materials},
author = {Barroso-Luque, Luis and Zhong, Peichen and Yang, Julia H. and Xie, Fengyu and Chen, Tina and Ouyang, Bin and Ceder, Gerbrand},
date = {2022-10-12},
journaltitle = {Physical Review B},
shortjournal = {Phys. Rev. B},
volume = {106},
number = {14},
pages = {144202},
publisher = {{American Physical Society}},
doi = {10.1103/PhysRevB.106.144202},
@article{Athey:2017,
title = {The {{State}} of {{Applied Econometrics}}: {{Causality}} and {{Policy Evaluation}}},
author = {Athey, Susan and Imbens, Guido W.},
year = {2017},
journal = {Journal of Economic Perspectives},
volume = {31},
number = {2},
pages = {3--32},
issn = {0895-3309},
doi = {10.1257/jep.31.2.3},
}

@inproceedings{Chen:2021,
title = {Gene {{Selection}} from {{Biological Data}} via {{Group Lasso}} for {{Logistic Regression Model}}: {{Effects}} of {{Different Clustering Algorithms}}},
author = {Chen, Shunjie and Wang, Pei},
year = {2021},
pages = {6374--6379},
issn = {1934-1768},
doi = {10.23919/CCC52363.2021.9549471},
}

@article{Kim:2012,
title = {Analysis of {{Survival Data}} with {{Group Lasso}}},
author = {Kim, Jinseog and Sohn, Insuk and Jung, Sin-Ho and Kim, Sujong and Park, Changyi},
year = {2012},
journaltitle = {Communications in Statistics - Simulation and Computation},
volume = {41},
number = {9},
pages = {1593--1605},
publisher = {{Taylor \& Francis}},
issn = {0361-0918},
doi = {10.1080/03610918.2011.611311},
}

@article{Gu:2018,
title = {Thermochemistry of Gas-Phase and Surface Species via {{LASSO-assisted}} Subgraph Selection},
author = {Gu, Geun Ho and Plechac, Petr and Vlachos, Dionisios G.},
journaltitle = {Reaction Chemistry \& Engineering},
year = {2018},
volume = {3},
number = {4},
pages = {454--466},
publisher = {{The Royal Society of Chemistry}},
issn = {2058-9883},
doi = {10.1039/C7RE00210F},
}

@article{Ma:2007,
title = {Supervised Group {{Lasso}} with Applications to Microarray Data Analysis},
author = {Ma, Shuangge and Song, Xiao and Huang, Jian},
year = {2007},
journaltitle = {BMC Bioinformatics},
volume = {8},
number = {1},
pages = {60},
issn = {1471-2105},
doi = {10.1186/1471-2105-8-60},
}

@article{Leong:2019,
Expand All @@ -256,3 +301,50 @@ @article{Leong:2019
doi = {10.1103/PhysRevB.100.134108},
urldate = {2020-04-29}
}

@article{Xie:2023,
title = {Semigrand-Canonical {{Monte-Carlo}} Simulation Methods for Charge-Decorated Cluster Expansions},
author = {Xie, Fengyu and Zhong, Peichen and Barroso-Luque, Luis and Ouyang, Bin and Ceder, Gerbrand},
year = {2023},
journaltitle = {Computational Materials Science},
volume = {218},
pages = {112000},
issn = {0927-0256},
doi = {10.1016/j.commatsci.2022.112000},
}

@article{Zhong:2022,
title = {An \$\{\textbackslash ensuremath\{\textbackslash ell\}\}\_\{0\}\{\textbackslash ensuremath\{\textbackslash ell\}\}\_\{2\}\$-Norm Regularized Regression Model for Construction of Robust Cluster Expansions in Multicomponent Systems},
author = {Zhong, Peichen and Chen, Tina and Barroso-Luque, Luis and Xie, Fengyu and Ceder, Gerbrand},
year = {2022},
journaltitle = {Physical Review B},
volume = {106},
number = {2},
pages = {024203},
publisher = {{American Physical Society}},
doi = {10.1103/PhysRevB.106.024203},
}

@article{Zhong:2023,
title = {Modeling {{Intercalation Chemistry}} with {{Multiredox Reactions}} by {{Sparse Lattice Models}} in {{Disordered Rocksalt Cathodes}}},
author = {Zhong, Peichen and Xie, Fengyu and Barroso-Luque, Luis and Huang, Liliang and Ceder, Gerbrand},
year = {2023},
journaltitle = {PRX Energy},
volume = {2},
number = {4},
pages = {043005},
publisher = {{American Physical Society}},
doi = {10.1103/PRXEnergy.2.043005},
}

@article{Barroso-Luque:2022,
title = {Cluster Expansions of Multicomponent Ionic Materials: {{Formalism}} and Methodology},
author = {Barroso-Luque, Luis and Zhong, Peichen and Yang, Julia H. and Xie, Fengyu and Chen, Tina and Ouyang, Bin and Ceder, Gerbrand},
year = {2022},
journaltitle = {Physical Review B},
volume = {106},
number = {14},
pages = {144202},
publisher = {{American Physical Society}},
doi = {10.1103/PhysRevB.106.144202},
}
55 changes: 34 additions & 21 deletions paper/paper.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,13 @@ resulting in sparse linear models such as the Lasso [@Tibshirani:1996; @Zou:2006
Best Subset Selection [@Hocking:1967] have been widely used in a variety of fields.
However, many regression problems involve covariates that have a natural underlying
structure, such as group or hierarchical relationships between covariates, that can be
leveraged to obtain improved model performance and interpretability. A common example of
linear regression problems with sparsity structure occurs in chemistry and materials
science when fitting multi-body expansions that involve a hierarchy among the main
effects from chemical composition and higher order corrections
aiming to capture the effects of chemical interactions [@Leong:2019; @Barroso-Luque:2022].
Several generalizations of the Lasso [@Yuan:2006; @Friedman:2010; @Simon:2013; @Wang:2019]
and Best Subset Selection [@Bertsimas:2016-a; @Bertsimas:2016-b] have been developed to
effectively exploit additional structure in linear regression.
leveraged to obtain improved model performance and interpretability, such problems occur
in a wide range of fields including genomics [@Chen:2021], bioinformatics [@Ma:2007],
medicine [@Kim:2012], econometrics [@Athey:2017], chemistry [@Gu:2018], and materials
science [@Leong:2019]. Several generalizations of the Lasso
[@Yuan:2006; @Friedman:2010; @Simon:2013; @Wang:2019] and Best Subset Selection
[@Bertsimas:2016-a; @Bertsimas:2016-b] have been developed to effectively exploit
additional structure in linear regression.

# Statement of need

Expand Down Expand Up @@ -69,7 +68,7 @@ pseudo-norm regularization.
The pre-existing packages mentioned include highly performant implementations of the
specific models they implement. However, none of these packages implement the full range
of sparse linear models available in `sparse-lm`, nor do they support the flexibility
to modify the optimization objective and choose among many open-source and commerically
to modify the optimization objective and choose among many open-source and commercially
available solvers. `sparse-lm` satisfies the need for a flexible and comprehensive
library that enables easy experimentation and comparisons of different sparse
linear regression algorithms within a single package.
Expand Down Expand Up @@ -138,6 +137,18 @@ introduce hierarchical structure into the model. Finally, we have also included
$\ell_2$ regularization term controlled by the hyperparameter $\lambda$, which is useful
when dealing with poorly conditioned design matrices.

Statistical regression models with structured sparsity (involving grouped covariates,
sparse grouped covariates, and hierarchical relationships between covariates terms)
parametrized via Group Lasso or Best Subset Selection based objetives have been used in a
wide range of scientific disciplines, including genomics [@Chen:2021], bioinformatics [@Ma:2007],
medicine [@Kim:2012], econometrics [@Athey:2017], chemistry [@Gu:2018], and materials science
[@Leong:2019]. The flexible implementation of sparse linear regression models in `sparse-lm`
allows researchers to easily experiment and choose the best regression model for their
specific problem. `sparse-lm` has already been used to build linear models with
structured sparsity in a handful of material science studies
[@Barroso-Luque:2022; @Zhong:2022; @Xie:2023, @Zhong:2023].


# Usage

Since the linear regression models in `sparse-lm` are implemented to be compatible with
Expand All @@ -153,18 +164,20 @@ options are implemented. The implemented models are listed below:
The table below shows the regression models that are implemented in `sparse-lm` as well
as available implementations in other Python packages. $\checkmark$ indicates that the

| Model | `sparse-lm` | `celer` | `groupyr` | `group-lasso` | `skglm` | `abess` |
|:-----------------------------:|:---------------:|:---------:|:-----------:|:------------------:|:----------:|:--------:|
| (Adaptive) Lasso | $\checkmark$️ | $\checkmark$️ | | | $\checkmark$️ ||
| (Adaptive) Group Lasso | $\checkmark$️ | $\checkmark$️ | $\checkmark$️ | $\checkmark$️ | $\checkmark$ ||
| (Adaptive) Sparse Group Lasso | $\checkmark$️ | | $\checkmark$️ | $\checkmark$️ | $\checkmark$ ||
| (Adaptive) Ridged Group Lasso | $\checkmark$️ | | | | $\checkmark$ | |
| Best Subset Selection | $\checkmark$️ | | | | ||
| Ridged Best Subset Selection | $\checkmark$️ | | | | ||
| $\ell_0$ pseudo-norm | $\checkmark$️ | | | | ||
| $\ell_0\ell_2$ mixed-norm | $\checkmark$️ | | | | | |

Note that only `sparse-lm` includes adaptive versions of Lasso estimators. However, some of the third party packages,
| Model | `sparse-lm` | `celer` | `groupyr` | `group-lasso` | `skglm` | `abess` |
|:-----------------------------:|:------------:|:---------:|:-----------:|:-----------:|:------------:|:--------:|
| (Adaptive) Lasso | $\checkmark$️ | $\checkmark$️ | | | $\checkmark$️ ||
| (Adaptive) Group Lasso | $\checkmark$️ | $\checkmark$️ | $\checkmark$️ | $\checkmark$️ | $\checkmark$ ||
| (Adaptive) Sparse Group Lasso | $\checkmark$️ | | $\checkmark$️ | $\checkmark$️ | $\checkmark$ ||
| (Adaptive) Ridged Group Lasso | $\checkmark$️ | | | | $\checkmark$ | |
| Best Subset Selection | $\checkmark$️ | | | | ||
| Ridged Best Subset Selection | $\checkmark$️ | | | | ||
| $\ell_0$ pseudo-norm | $\checkmark$️ | | | | ||
| $\ell_0\ell_2$ mixed-norm | $\checkmark$️ | | | | | |
| $\ell_{1/2}$ psuedo-norm | | | | | $\checkmark$ | |
| $\ell_{2/3}$ psuedo-norm | | | | | $\checkmark$ | |

Note that only `sparse-lm` includes adaptive versions of Lasso based estimators. However, some of the third party packages,
notably `skglm` and `abess`, include additional penalties and regression objectives that are not implemented in `sparse-lm`.

## Implemented model selection and composition tools
Expand Down

0 comments on commit 89bcdbe

Please sign in to comment.