{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T17:59:19Z","timestamp":1764784759154,"version":"3.37.3"},"reference-count":72,"publisher":"Springer Science and Business Media LLC","issue":"11-12","license":[{"start":{"date-parts":[[2021,11,2]],"date-time":"2021-11-02T00:00:00Z","timestamp":1635811200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,11,2]],"date-time":"2021-11-02T00:00:00Z","timestamp":1635811200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2021,12]]},"DOI":"10.1007\/s10994-021-06085-5","type":"journal-article","created":{"date-parts":[[2021,11,2]],"date-time":"2021-11-02T17:51:20Z","timestamp":1635875480000},"page":"3177-3209","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":24,"title":["Sparse classification: a scalable discrete optimization perspective"],"prefix":"10.1007","volume":"110","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1985-1003","authenticated-orcid":false,"given":"Dimitris","family":"Bertsimas","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6352-0984","authenticated-orcid":false,"given":"Jean","family":"Pauphilet","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4177-4849","authenticated-orcid":false,"given":"Bart","family":"Van Parys","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,11,2]]},"reference":[{"key":"6085_CR1","unstructured":"Atamturk, A., & Gomez, A. (2019). Rank-one convexification for sparse regression. arXiv preprint arXiv:1901.10334"},{"key":"6085_CR2","unstructured":"Bach, F. (2009). High-dimensional non-linear variable selection through hierarchical kernel learning. arXiv preprint arXiv:0909.0844"},{"key":"6085_CR3","doi-asserted-by":"crossref","unstructured":"Bach, F., Jenatton, R., Mairal, J., & Obozinski, G. (2012). Optimization with sparsity-inducing penalties. Foundations and Trends\u00ae in Machine Learning, 4(1), 1\u2013106.","DOI":"10.1561\/2200000015"},{"issue":"2","key":"6085_CR4","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1137\/0320018","volume":"20","author":"DP Bertsekas","year":"1982","unstructured":"Bertsekas, D. P. (1982). Projected newton methods for optimization problems with simple constraints. SIAM Journal on Control and Optimization, 20(2), 221\u2013246.","journal-title":"SIAM Journal on Control and Optimization"},{"issue":"3","key":"6085_CR5","doi-asserted-by":"publisher","first-page":"931","DOI":"10.1016\/j.ejor.2017.03.051","volume":"270","author":"D Bertsimas","year":"2018","unstructured":"Bertsimas, D., & Copenhaver, M. S. (2018). Characterization of the equivalence of robustification and regularization in linear, median, and matrix regression. European Journal of Operations Research, 270(3), 931\u2013942.","journal-title":"European Journal of Operations Research"},{"key":"6085_CR6","doi-asserted-by":"publisher","first-page":"367","DOI":"10.1214\/16-STS602","volume":"32","author":"D Bertsimas","year":"2017","unstructured":"Bertsimas, D., & King, A. (2017). Logistic regression: From art to science. Statistical Science, 32, 367\u2013384.","journal-title":"Statistical Science"},{"issue":"1","key":"6085_CR7","doi-asserted-by":"publisher","first-page":"300","DOI":"10.1214\/18-AOS1804","volume":"48","author":"D Bertsimas","year":"2020","unstructured":"Bertsimas, D., & Van Parys, B. (2020). Sparse high-dimensional regression: Exact scalable algorithms and phase transitions. Annals of Statistics, 48(1), 300\u2013323.","journal-title":"Annals of Statistics"},{"issue":"2","key":"6085_CR8","doi-asserted-by":"publisher","first-page":"813","DOI":"10.1214\/15-AOS1388","volume":"44","author":"D Bertsimas","year":"2016","unstructured":"Bertsimas, D., King, A., & Mazumder, R. (2016). Best subset selection via a modern optimization lens. Annals of Statistics, 44(2), 813\u2013852.","journal-title":"Annals of Statistics"},{"issue":"4","key":"6085_CR9","first-page":"555","volume":"35","author":"D Bertsimas","year":"2020","unstructured":"Bertsimas, D., Pauphilet, J., & Van Parys, B. (2020). Sparse regression: Scalable algorithms and empirical performance. Statistical Science, 35(4), 555\u2013578.","journal-title":"Statistical Science"},{"issue":"2","key":"6085_CR10","doi-asserted-by":"publisher","first-page":"186","DOI":"10.1016\/j.disopt.2006.10.011","volume":"5","author":"P Bonami","year":"2008","unstructured":"Bonami, P., Biegler, L. T., Conn, A. R., Cornu\u00e9jols, G., Grossmann, I. E., Laird, C. D., et\u00a0al. (2008). An algorithmic framework for convex mixed integer nonlinear programs. Discrete Optimization, 5(2), 186\u2013204.","journal-title":"Discrete Optimization"},{"key":"6085_CR11","doi-asserted-by":"crossref","unstructured":"Boufounos, P. T., & Baraniuk R. G. (2008). 1-bit compressive sensing. In 42nd annual conference on information sciences and systems, 2008. CISS 2008. (pp. 16\u201321). IEEE.","DOI":"10.1109\/CISS.2008.4558487"},{"key":"6085_CR12","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511804441","volume-title":"Convex optimization","author":"S Boyd","year":"2004","unstructured":"Boyd, S., & Vandenberghe, L. (2004). Convex optimization. Cambridge University Press."},{"issue":"1","key":"6085_CR13","doi-asserted-by":"publisher","first-page":"232","DOI":"10.1214\/10-AOAS388","volume":"5","author":"P Breheny","year":"2011","unstructured":"Breheny, P., & Huang, J. (2011). Coordinate descent algorithms for nonconvex penalized regression, with applications to biological feature selection. Annals of Applied Statistics, 5(1), 232.","journal-title":"Annals of Applied Statistics"},{"issue":"3","key":"6085_CR14","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1111\/j.1467-9868.2011.00771.x","volume":"73","author":"P B\u00fchlmann","year":"2011","unstructured":"B\u00fchlmann, P. (2011). Invited discussion on \u201cregression shrinkage and selection via the lasso: A retrospective\u201d (r. tibshirani). Journal of the Royal Statistical Society: Series B (Statistical Methodology), 73(3), 273\u2013282.","journal-title":"Journal of the Royal Statistical Society: Series B (Statistical Methodology)"},{"issue":"1","key":"6085_CR15","doi-asserted-by":"publisher","first-page":"93","DOI":"10.1007\/BF02592073","volume":"39","author":"PH Calamai","year":"1987","unstructured":"Calamai, P. H., & Mor\u00e9, J. J. (1987). Projected gradient methods for linearly constrained problems. Mathematical Programming, 39(1), 93\u2013116.","journal-title":"Mathematical Programming"},{"key":"6085_CR16","first-page":"1","volume":"20","author":"Y Chen","year":"2019","unstructured":"Chen, Y., Ye, Y., & Wang, M. (2019). Approximation hardness for a class of sparse optimization problems. Journal of Machine Learning Research, 20, 1\u201327.","journal-title":"Journal of Machine Learning Research"},{"key":"6085_CR17","doi-asserted-by":"crossref","unstructured":"Chu, B.-Y., Ho, C.-H., Tsai, C.-H., Lin, C.-Y., & Lin, C.-J. (2015). Warm start for parameter selection of linear classifiers. In Proceedings of the 21th ACM SIGKDD international conference on knowledge discovery and data mining (pp. 149\u2013158). ACM.","DOI":"10.1145\/2783258.2783332"},{"issue":"3","key":"6085_CR18","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1007\/BF00994018","volume":"20","author":"C Cortes","year":"1995","unstructured":"Cortes, C., & Vapnik, V. (1995). Support-vector networks. Machine Learning, 20(3), 273\u2013297.","journal-title":"Machine Learning"},{"key":"6085_CR20","volume-title":"Mathematical methods of statistics (PMS-9)","author":"H Cram\u00e9r","year":"2016","unstructured":"Cram\u00e9r, H. (2016). Mathematical methods of statistics (PMS-9) (Vol. 9). Princeton University Press."},{"issue":"1\u20134","key":"6085_CR21","doi-asserted-by":"publisher","first-page":"131","DOI":"10.3233\/IDA-1997-1302","volume":"1","author":"M Dash","year":"1997","unstructured":"Dash, M., & Liu, H. (1997). Feature selection for classification. Intelligent Data Analysis, 1(1\u20134), 131\u2013156.","journal-title":"Intelligent Data Analysis"},{"issue":"135","key":"6085_CR22","first-page":"1","volume":"22","author":"A Dedieu","year":"2021","unstructured":"Dedieu, A., Hazimeh, H., & Mazumder, R. (2021). Learning sparse classifiers: Continuous and mixed integer optimization perspectives. Journal of Machine Learning Research, 22(135), 1\u201347.","journal-title":"Journal of Machine Learning Research"},{"key":"6085_CR23","doi-asserted-by":"crossref","unstructured":"Donoho, D., & Stodden, V. (2006). Breakdown point of model selection when the number of variables exceeds the number of observations. In International joint conference on neural networks (pp. 1916\u20131921). IEEE.","DOI":"10.1109\/IJCNN.2006.246934"},{"issue":"1906","key":"6085_CR24","first-page":"4273","volume":"367","author":"D Donoho","year":"2009","unstructured":"Donoho, D., & Tanner, J. (2009). Observed universality of phase transitions in high-dimensional geometry, with implications for modern data analysis and signal processing. Philosophical Transactions of the Royal Society of London A: Mathematical, Physical and Engineering Sciences, 367(1906), 4273\u20134293.","journal-title":"Philosophical Transactions of the Royal Society of London A: Mathematical, Physical and Engineering Sciences"},{"issue":"2","key":"6085_CR25","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1137\/15M1020575","volume":"59","author":"I Dunning","year":"2017","unstructured":"Dunning, I., Huchette, J., & Lubin, M. (2017). JuMP: A modeling language for mathematical optimization. SIAM Review, 59(2), 295\u2013320.","journal-title":"SIAM Review"},{"issue":"3","key":"6085_CR26","doi-asserted-by":"publisher","first-page":"307","DOI":"10.1007\/BF02592064","volume":"36","author":"MA Duran","year":"1986","unstructured":"Duran, M. A., & Grossmann, I. E. (1986). An outer-approximation algorithm for a class of mixed-integer nonlinear programs. Mathematical Programming, 36(3), 307\u2013339.","journal-title":"Mathematical Programming"},{"issue":"456","key":"6085_CR27","doi-asserted-by":"publisher","first-page":"1348","DOI":"10.1198\/016214501753382273","volume":"96","author":"J Fan","year":"2001","unstructured":"Fan, J., & Li, R. (2001). Variable selection via nonconcave penalized likelihood and its oracle properties. Journal of the American Statistical Association, 96(456), 1348\u20131360.","journal-title":"Journal of the American Statistical Association"},{"issue":"6","key":"6085_CR28","doi-asserted-by":"publisher","first-page":"3567","DOI":"10.1214\/10-AOS798","volume":"38","author":"J Fan","year":"2010","unstructured":"Fan, J., & Song, R. (2010). Sure independence screening in generalized linear models with np-dimensionality. Annals of Statistics, 38(6), 3567\u20133604.","journal-title":"Annals of Statistics"},{"issue":"2","key":"6085_CR29","doi-asserted-by":"publisher","first-page":"814","DOI":"10.1214\/17-AOS1568","volume":"46","author":"J Fan","year":"2018","unstructured":"Fan, J., Liu, H., Sun, Q., & Zhang, T. (2018). I-lamm for sparse learning: Simultaneous control of algorithmic complexity and statistical error. Annals of Statistics, 46(2), 814.","journal-title":"Annals of Statistics"},{"issue":"7","key":"6085_CR30","doi-asserted-by":"publisher","first-page":"2146","DOI":"10.1287\/mnsc.2016.2461","volume":"63","author":"M Fischetti","year":"2017","unstructured":"Fischetti, M., Ljubi\u0107, I., & Sinnl, M. (2017). Redesigning benders decomposition for large-scale facility location. Management Science, 63(7), 2146\u20132162.","journal-title":"Management Science"},{"issue":"1\u20133","key":"6085_CR31","doi-asserted-by":"publisher","first-page":"327","DOI":"10.1007\/BF01581153","volume":"66","author":"R Fletcher","year":"1994","unstructured":"Fletcher, R., & Leyffer, S. (1994). Solving mixed integer nonlinear programs by outer approximation. Mathematical Programming, 66(1\u20133), 327\u2013349.","journal-title":"Mathematical Programming"},{"issue":"2","key":"6085_CR32","doi-asserted-by":"publisher","first-page":"109","DOI":"10.1080\/00401706.1993.10485033","volume":"35","author":"LL Frank","year":"1993","unstructured":"Frank, L. L., & Friedman, J. (1993). A statistical view of some chemometrics regression tools. Technometrics, 35(2), 109\u2013135.","journal-title":"Technometrics"},{"issue":"1","key":"6085_CR33","doi-asserted-by":"publisher","first-page":"1","DOI":"10.18637\/jss.v033.i01","volume":"33","author":"J Friedman","year":"2010","unstructured":"Friedman, J., Hastie, T., & Tibshirani, R. (2010). Regularization paths for generalized linear models via coordinate descent. Journal of Statistical Software, 33(1), 1.","journal-title":"Journal of Statistical Software"},{"key":"6085_CR34","unstructured":"Friedman, J., Hastie, T., Tibshirani, R., et\u00a0al. (2013). GLMNet: Lasso and elastic-net regularized generalized linear models. r package version 1.9-5."},{"key":"6085_CR35","unstructured":"Gamarnik, D., & Zadik, I. (2017). High-dimensional regression with binary coefficients. Estimating squared error and a phase transition. In Conference on Learning Theory (pp. 948\u2013953). PMLR."},{"key":"6085_CR36","doi-asserted-by":"crossref","unstructured":"Goldfarb, D. (1994). On the complexity of the simplex method. In Advances in optimization and numerical analysis (pp. 25\u201338). Springer.","DOI":"10.1007\/978-94-015-8330-5_2"},{"key":"6085_CR37","doi-asserted-by":"crossref","unstructured":"Gupta, A., Nowak, R., & Recht, B. (2010). Sample complexity for 1-bit compressed sensing and sparse classification. In 2010 IEEE international symposium on information theory proceedings (ISIT) (pp. 1553\u20131557). IEEE.","DOI":"10.1109\/ISIT.2010.5513510"},{"issue":"1\u20133","key":"6085_CR39","doi-asserted-by":"publisher","first-page":"389","DOI":"10.1023\/A:1012487302797","volume":"46","author":"I Guyon","year":"2002","unstructured":"Guyon, I., Weston, J., Barnhill, S., & Vapnik, V. (2002). Gene selection for cancer classification using support vector machines. Machine Learning, 46(1\u20133), 389\u2013422.","journal-title":"Machine Learning"},{"issue":"4","key":"6085_CR40","first-page":"579","volume":"35","author":"T Hastie","year":"2020","unstructured":"Hastie, T., Tibshirani, R., & Tibshirani, R. (2020). Best subset, forward stepwise or lasso? Analysis and recommendations based on extensive comparisons. Statistical Science, 35(4), 579\u2013592.","journal-title":"Statistical Science"},{"issue":"5","key":"6085_CR41","doi-asserted-by":"publisher","first-page":"1517","DOI":"10.1287\/opre.2019.1919","volume":"68","author":"H Hazimeh","year":"2020","unstructured":"Hazimeh, H., & Mazumder, R. (2020). Fast best subset selection: Coordinate descent and local combinatorial optimization algorithms. Operations Research, 68(5), 1517\u20131537.","journal-title":"Operations Research"},{"key":"6085_CR42","doi-asserted-by":"crossref","unstructured":"Hsieh, C.-J., Chang, K.-W., Lin, C.-J., Keerthi, S. S., & Sundararajan, S. (2008). A dual coordinate descent method for large-scale linear SVM. In Proceedings of the 25th international conference on machine learning (pp. 408\u2013415). ACM.","DOI":"10.1145\/1390156.1390208"},{"issue":"4","key":"6085_CR43","doi-asserted-by":"publisher","first-page":"1617","DOI":"10.1214\/009053605000000200","volume":"33","author":"D Hunter","year":"2005","unstructured":"Hunter, D., & Li, R. (2005). Variable selection using MM algorithms. Annals of Statistics, 33(4), 1617.","journal-title":"Annals of Statistics"},{"key":"6085_CR19","unstructured":"IBM ILOG CPLEX. (2011). Cplex users manual."},{"key":"6085_CR38","unstructured":"Inc. Gurobi Optimization. (2016). Gurobi optimizer reference manual. http:\/\/www.gurobi.com"},{"issue":"4","key":"6085_CR44","doi-asserted-by":"publisher","first-page":"2082","DOI":"10.1109\/TIT.2012.2234823","volume":"59","author":"L Jacques","year":"2013","unstructured":"Jacques, L., Laska, J. N., Boufounos, P. T., & Baraniuk, R. G. (2013). Robust 1-bit compressive sensing via binary stable embeddings of sparse vectors. IEEE Transactions on Information Theory, 59(4), 2082\u20132102.","journal-title":"IEEE Transactions on Information Theory"},{"issue":"1\u20133","key":"6085_CR45","doi-asserted-by":"publisher","first-page":"151","DOI":"10.1007\/s10994-005-0768-5","volume":"61","author":"SS Keerthi","year":"2005","unstructured":"Keerthi, S. S., Duan, K. B., Shevade, S. K., & Poo, A. N. (2005). A fast dual algorithm for kernel logistic regression. Machine Learning, 61(1\u20133), 151\u2013165.","journal-title":"Machine Learning"},{"issue":"4","key":"6085_CR46","doi-asserted-by":"publisher","first-page":"703","DOI":"10.1137\/0108053","volume":"8","author":"JE Kelley Jr","year":"1960","unstructured":"Kelley, J. E., Jr. (1960). The cutting-plane method for solving convex programs. Journal of the Society for Industrial and Applied Mathematics, 8(4), 703\u2013712.","journal-title":"Journal of the Society for Industrial and Applied Mathematics"},{"key":"6085_CR47","unstructured":"Kenney, A., Chiaromonte, F., & Felici, G. (2018). Efficient and effective$$ l\\_0 $$feature selection. arXiv preprint arXiv:1808.02526"},{"issue":"Apr","key":"6085_CR48","first-page":"627","volume":"9","author":"C-J Lin","year":"2008","unstructured":"Lin, C.-J., Weng, R. C., & Keerthi, S. S. (2008). Trust region newton method for logistic regression. Journal of Machine Learning Research, 9(Apr), 627\u2013650.","journal-title":"Journal of Machine Learning Research"},{"issue":"1","key":"6085_CR49","first-page":"559","volume":"16","author":"P-L Loh","year":"2015","unstructured":"Loh, P.-L., & Wainwright, M. (2015). Regularized m-estimators with nonconvexity: Statistical and algorithmic theory for local optima. The Journal of Machine Learning Research, 16(1), 559\u2013616.","journal-title":"The Journal of Machine Learning Research"},{"issue":"6","key":"6085_CR50","doi-asserted-by":"publisher","first-page":"2455","DOI":"10.1214\/16-AOS1530","volume":"45","author":"P-L Loh","year":"2017","unstructured":"Loh, P.-L., Wainwright, M. J., et\u00a0al. (2017). Support recovery without incoherence: A case for nonconvex regularization. Annals of Statistics, 45(6), 2455\u20132482.","journal-title":"Annals of Statistics"},{"issue":"2","key":"6085_CR51","doi-asserted-by":"publisher","first-page":"238","DOI":"10.1287\/ijoc.2014.0623","volume":"27","author":"M Lubin","year":"2015","unstructured":"Lubin, M., & Dunning, I. (2015). Computing in operations research using Julia. INFORMS Journal on Computing, 27(2), 238\u2013248.","journal-title":"INFORMS Journal on Computing"},{"issue":"495","key":"6085_CR52","doi-asserted-by":"publisher","first-page":"1125","DOI":"10.1198\/jasa.2011.tm09738","volume":"106","author":"R Mazumder","year":"2011","unstructured":"Mazumder, R., Friedman, J., & Hastie, T. (2011). Sparsenet: Coordinate descent with nonconvex penalties. Journal of the American Statistical Association, 106(495), 1125\u20131138.","journal-title":"Journal of the American Statistical Association"},{"issue":"2","key":"6085_CR53","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1137\/S0097539792240406","volume":"24","author":"BK Natarajan","year":"1995","unstructured":"Natarajan, B. K. (1995). Sparse approximate solutions to linear systems. SIAM Journal on Computing, 24(2), 227\u2013234.","journal-title":"SIAM Journal on Computing"},{"issue":"1","key":"6085_CR54","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1007\/s10107-015-0894-1","volume":"151","author":"M Pilanci","year":"2015","unstructured":"Pilanci, M., Wainwright, M. J., & El Ghaoui, L. (2015). Sparse learning via Boolean relaxations. Mathematical Programming, 151(1), 63\u201387.","journal-title":"Mathematical Programming"},{"issue":"8","key":"6085_CR55","doi-asserted-by":"publisher","first-page":"1275","DOI":"10.1002\/cpa.21442","volume":"66","author":"Y Plan","year":"2013","unstructured":"Plan, Y., & Vershynin, R. (2013a). One-bit compressed sensing by linear programming. Communications on Pure and Applied Mathematics, 66(8), 1275\u20131297.","journal-title":"Communications on Pure and Applied Mathematics"},{"issue":"1","key":"6085_CR56","doi-asserted-by":"publisher","first-page":"482","DOI":"10.1109\/TIT.2012.2207945","volume":"59","author":"Y Plan","year":"2013","unstructured":"Plan, Y., & Vershynin, R. (2013b). Robust 1-bit compressed sensing and sparse logistic regression: A convex programming approach. IEEE Transactions on Information Theory, 59(1), 482\u2013494.","journal-title":"IEEE Transactions on Information Theory"},{"issue":"1","key":"6085_CR57","doi-asserted-by":"publisher","first-page":"593","DOI":"10.1109\/TIT.2016.2606605","volume":"63","author":"J Scarlett","year":"2017","unstructured":"Scarlett, J., & Cevher, V. (2017). Limits on support recovery with probabilistic models: An information-theoretic framework. IEEE Transactions on Information Theory, 63(1), 593\u2013620.","journal-title":"IEEE Transactions on Information Theory"},{"key":"6085_CR58","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/4175.001.0001","volume-title":"Learning with kernels: Support vector machines, regularization, optimization, and beyond","author":"B Sch\u00f6lkopf","year":"2001","unstructured":"Sch\u00f6lkopf, B., Smola, A. J., Bach, F., et\u00a0al. (2001). Learning with kernels: Support vector machines, regularization, optimization, and beyond. MIT Press."},{"issue":"1","key":"6085_CR59","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/s10107-010-0420-4","volume":"127","author":"S Shalev-Shwartz","year":"2011","unstructured":"Shalev-Shwartz, S., Singer, Y., Srebro, N., & Cotter, A. (2011). Pegasos: Primal estimated sub-gradient solver for SVM. Mathematical Programming, 127(1), 3\u201330.","journal-title":"Mathematical Programming"},{"issue":"3","key":"6085_CR60","doi-asserted-by":"publisher","first-page":"768","DOI":"10.1006\/jcom.2002.0642","volume":"18","author":"I Steinwart","year":"2002","unstructured":"Steinwart, I. (2002). Support vector machines are universally consistent. Journal of Complexity, 18(3), 768\u2013791.","journal-title":"Journal of Complexity"},{"key":"6085_CR61","doi-asserted-by":"publisher","first-page":"2133","DOI":"10.1214\/16-AOS1521","volume":"45","author":"W Su","year":"2017","unstructured":"Su, W., Bogdan, M., & Candes, E. (2017). False discoveries occur early on the lasso path. Annals of Statistics, 45, 2133\u20132150.","journal-title":"Annals of Statistics"},{"key":"6085_CR62","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1111\/j.2517-6161.1996.tb02080.x","volume":"58","author":"R Tibshirani","year":"1996","unstructured":"Tibshirani, R. (1996). Regression shrinkage and selection via the lasso. Journal of the Royal Statistical Society, Series B (Statistical Methodology), 58, 267\u2013288.","journal-title":"Journal of the Royal Statistical Society, Series B (Statistical Methodology)"},{"key":"6085_CR63","doi-asserted-by":"crossref","unstructured":"Vapnik, V. (1998). The support vector method of function estimation. In Nonlinear modeling (pp. 55\u201385). Springer.","DOI":"10.1007\/978-1-4615-5703-6_3"},{"issue":"12","key":"6085_CR64","doi-asserted-by":"publisher","first-page":"5728","DOI":"10.1109\/TIT.2009.2032816","volume":"55","author":"MJ Wainwright","year":"2009","unstructured":"Wainwright, M. J. (2009a). Information-theoretic limits on sparsity recovery in the high-dimensional and noisy setting. IEEE Transactions on Information Theory, 55(12), 5728\u20135741.","journal-title":"IEEE Transactions on Information Theory"},{"issue":"5","key":"6085_CR65","doi-asserted-by":"publisher","first-page":"2183","DOI":"10.1109\/TIT.2009.2016018","volume":"55","author":"MJ Wainwright","year":"2009","unstructured":"Wainwright, M. J. (2009b). Sharp thresholds for high-dimensional and noisy sparsity recovery using-constrained quadratic programming (Lasso). IEEE Transactions on Information Theory, 55(5), 2183\u20132202.","journal-title":"IEEE Transactions on Information Theory"},{"issue":"6","key":"6085_CR66","doi-asserted-by":"publisher","first-page":"2967","DOI":"10.1109\/TIT.2010.2046199","volume":"56","author":"W Wang","year":"2010","unstructured":"Wang, W., Wainwright, M. J., & Ramchandran, K. (2010). Information-theoretic limits on sparse signal recovery: Dense versus sparse measurement matrices. IEEE Transactions on Information Theory, 56(6), 2967\u20132979.","journal-title":"IEEE Transactions on Information Theory"},{"issue":"1\u20132","key":"6085_CR67","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1007\/s10994-010-5221-8","volume":"85","author":"H-F Yu","year":"2011","unstructured":"Yu, H.-F., Huang, F.-L., & Lin, C.-J. (2011). Dual coordinate descent methods for logistic regression and maximum entropy models. Machine Learning, 85(1\u20132), 41\u201375.","journal-title":"Machine Learning"},{"issue":"2","key":"6085_CR68","doi-asserted-by":"publisher","first-page":"894","DOI":"10.1214\/09-AOS729","volume":"38","author":"C-H Zhang","year":"2010","unstructured":"Zhang, C.-H. (2010a). Nearly unbiased variable selection under minimax concave penalty. Annals of Statistics, 38(2), 894\u2013942.","journal-title":"Annals of Statistics"},{"key":"6085_CR69","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1214\/aos\/1079120130","volume":"32","author":"T Zhang","year":"2004","unstructured":"Zhang, T. (2004). Statistical behavior and consistency of classification methods based on convex risk minimization. Annals of Statistics, 32, 56\u201385.","journal-title":"Annals of Statistics"},{"issue":"3","key":"6085_CR70","first-page":"1081","volume":"11","author":"T Zhang","year":"2010","unstructured":"Zhang, T. (2010b). Analysis of multi-stage convex relaxation for sparse regularization. Journal of Machine Learning Research, 11(3), 1081\u20131107.","journal-title":"Journal of Machine Learning Research"},{"issue":"5B","key":"6085_CR71","doi-asserted-by":"publisher","first-page":"2277","DOI":"10.3150\/12-BEJ452","volume":"19","author":"T Zhang","year":"2013","unstructured":"Zhang, T., et\u00a0al. (2013). Multi-stage convex relaxation for feature selection. Bernoulli, 19(5B), 2277\u20132293.","journal-title":"Bernoulli"},{"issue":"4","key":"6085_CR72","first-page":"1509","volume":"36","author":"H Zou","year":"2008","unstructured":"Zou, H., & Li, R. (2008). One-step sparse estimates in nonconcave penalized likelihood models. Annals of Statistics, 36(4), 1509.","journal-title":"Annals of Statistics"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-021-06085-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10994-021-06085-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-021-06085-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T09:19:12Z","timestamp":1726046352000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10994-021-06085-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,11,2]]},"references-count":72,"journal-issue":{"issue":"11-12","published-print":{"date-parts":[[2021,12]]}},"alternative-id":["6085"],"URL":"https:\/\/doi.org\/10.1007\/s10994-021-06085-5","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"type":"print","value":"0885-6125"},{"type":"electronic","value":"1573-0565"}],"subject":[],"published":{"date-parts":[[2021,11,2]]},"assertion":[{"value":"29 October 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 September 2021","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 September 2021","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 November 2021","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}