{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,22]],"date-time":"2026-03-22T17:36:29Z","timestamp":1774200989399,"version":"3.50.1"},"reference-count":88,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2022,1,22]],"date-time":"2022-01-22T00:00:00Z","timestamp":1642809600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,1,22]],"date-time":"2022-01-22T00:00:00Z","timestamp":1642809600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2022,6]]},"DOI":"10.1007\/s10994-021-06123-2","type":"journal-article","created":{"date-parts":[[2022,1,22]],"date-time":"2022-01-22T00:04:11Z","timestamp":1642809851000},"page":"2161-2212","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["The backbone method for ultra-high dimensional sparse machine learning"],"prefix":"10.1007","volume":"111","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1985-1003","authenticated-orcid":false,"given":"Dimitris","family":"Bertsimas","sequence":"first","affiliation":[]},{"suffix":"Jr","given":"Vassilis","family":"Digalakis","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,1,22]]},"reference":[{"key":"6123_CR1","unstructured":"Aghaei S., Gomez A., & Vayanos P. (2020). Learning optimal classification trees: Strong max-flow formulations. arXiv preprint arXiv:200209142."},{"issue":"1\u20132","key":"6123_CR2","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1016\/0004-3702(94)90084-1","volume":"69","author":"H Almuallim","year":"1994","unstructured":"Almuallim, H., & Dietterich, T. (1994). Learning Boolean concepts in the presence of many irrelevant features. Artificial Intelligence, 69(1\u20132), 279\u2013305.","journal-title":"Artificial Intelligence"},{"key":"6123_CR3","unstructured":"Atamturk A., & Gomez A. (2020). Safe screening rules for l0-regression from perspective relaxations. In: Singh, H.\u00a0D. A. III (eds) Proceedings of the 37th international conference on machine learning, PMLR, proceedings of machine learning research (Vol. 119, pp. 421\u2013430). http:\/\/proceedings.mlr.press\/v119\/atamturk20a.html."},{"issue":"3","key":"6123_CR4","doi-asserted-by":"publisher","first-page":"316","DOI":"10.1287\/opre.46.3.316","volume":"46","author":"C Barnhart","year":"1998","unstructured":"Barnhart, C., Johnson, E. L., Nemhauser, G. L., Savelsbergh, M. W., & Vance, P. H. (1998). Branch-and-price: Column generation for solving huge integer programs. Operations Research, 46(3), 316\u2013329.","journal-title":"Operations Research"},{"issue":"3\u20134","key":"6123_CR5","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1093\/biomet\/54.3-4.357","volume":"54","author":"E Beale","year":"1967","unstructured":"Beale, E., Kendall, M., & Mann, D. (1967). The discarding of variables in multivariate analysis. Biometrika, 54(3\u20134), 357\u2013366.","journal-title":"Biometrika"},{"issue":"1","key":"6123_CR6","doi-asserted-by":"publisher","first-page":"183","DOI":"10.1137\/080716542","volume":"2","author":"A Beck","year":"2009","unstructured":"Beck, A., & Teboulle, M. (2009). A fast iterative shrinkage-thresholding algorithm for linear inverse problems. SIAM Journal on Imaging Sciences, 2(1), 183\u2013202.","journal-title":"SIAM Journal on Imaging Sciences"},{"issue":"3","key":"6123_CR7","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1023\/A:1007600130808","volume":"41","author":"K Bennett","year":"2000","unstructured":"Bennett, K., Cristianini, N., Shawe-Taylor, J., & Wu, D. (2000). Enlarging the margins in perceptron decision trees. Machine Learning, 41(3), 295\u2013313.","journal-title":"Machine Learning"},{"issue":"2","key":"6123_CR8","doi-asserted-by":"publisher","first-page":"389","DOI":"10.1016\/j.ejor.2015.09.051","volume":"250","author":"P Bertolazzi","year":"2016","unstructured":"Bertolazzi, P., Felici, G., Festa, P., Fiscon, G., & Weitschek, E. (2016). Integer programming models for feature selection: New extensions and a randomized solution algorithm. European Journal of Operational Research, 250(2), 389\u2013399.","journal-title":"European Journal of Operational Research"},{"issue":"3","key":"6123_CR9","doi-asserted-by":"publisher","first-page":"931","DOI":"10.1016\/j.ejor.2017.03.051","volume":"270","author":"D Bertsimas","year":"2018","unstructured":"Bertsimas, D., & Copenhaver, M. (2018). Characterization of the equivalence of robustification and regularization in linear and matrix regression. European Journal of Operational Research, 270(3), 931\u2013942.","journal-title":"European Journal of Operational Research"},{"issue":"7","key":"6123_CR10","doi-asserted-by":"publisher","first-page":"1039","DOI":"10.1007\/s10994-017-5633-9","volume":"106","author":"D Bertsimas","year":"2017","unstructured":"Bertsimas, D., & Dunn, J. (2017). Optimal classification trees. Machine Learning, 106(7), 1039\u20131082.","journal-title":"Machine Learning"},{"key":"6123_CR11","unstructured":"Bertsimas D., & Dunn J. (2019). Machine learning under a modern optimization lens. Dynamic Ideas LLC."},{"issue":"1","key":"6123_CR12","doi-asserted-by":"publisher","first-page":"300","DOI":"10.1214\/18-AOS1804","volume":"48","author":"D Bertsimas","year":"2020","unstructured":"Bertsimas, D., & Van Parys, B. (2020). Sparse high-dimensional regression: Exact scalable algorithms and phase transitions. The Annals of Statistics, 48(1), 300\u2013323.","journal-title":"The Annals of Statistics"},{"issue":"2","key":"6123_CR13","doi-asserted-by":"publisher","first-page":"813","DOI":"10.1214\/15-AOS1388","volume":"44","author":"D Bertsimas","year":"2016","unstructured":"Bertsimas, D., King, A., & Mazumder, R. (2016). Best subset selection via a modern optimization lens. The Annals of Statistics, 44(2), 813\u2013852.","journal-title":"The Annals of Statistics"},{"issue":"1","key":"6123_CR14","doi-asserted-by":"publisher","first-page":"143","DOI":"10.1287\/opre.2018.1763","volume":"67","author":"D Bertsimas","year":"2019","unstructured":"Bertsimas, D., Jaillet, P., & Martin, S. (2019). Online vehicle routing: The edge of optimization in large-scale applications. Operations Research, 67(1), 143\u2013162.","journal-title":"Operations Research"},{"issue":"4","key":"6123_CR15","first-page":"555","volume":"35","author":"D Bertsimas","year":"2020","unstructured":"Bertsimas, D., Pauphilet, J., Van Parys, B., et al. (2020). Sparse regression: Scalable algorithms and empirical performance. Statistical Science, 35(4), 555\u2013578.","journal-title":"Statistical Science"},{"key":"6123_CR16","unstructured":"Bertsimas, D., Digalakis, V., Jr., Li, M., & Skali Lami, O. (2021). Slowly varying regression under sparsity. arXiv preprint arXiv:210210773."},{"issue":"1","key":"6123_CR17","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1016\/j.ejor.2019.12.002","volume":"284","author":"R Blanquero","year":"2020","unstructured":"Blanquero, R., Carrizosa, E., Molero-R\u00edo, C., & Morales, D. R. (2020). Sparsity in optimal randomized classification trees. European Journal of Operational Research, 284(1), 255\u2013272.","journal-title":"European Journal of Operational Research"},{"key":"6123_CR18","doi-asserted-by":"publisher","first-page":"105281","DOI":"10.1016\/j.cor.2021.105281","volume":"132","author":"R Blanquero","year":"2021","unstructured":"Blanquero, R., Carrizosa, E., Molero-R\u00edo, C., & Morales, D. R. (2021). Optimal randomized classification trees. Computers & Operations Research, 132, 105281.","journal-title":"Computers & Operations Research"},{"issue":"1","key":"6123_CR19","first-page":"1","volume":"3","author":"S Boyd","year":"2011","unstructured":"Boyd, S., Parikh, N., Chu, E., Peleato, B., & Eckstein, J. (2011). Distributed optimization and statistical learning via the alternating direction method of multipliers. Foundations and Trends\u00aein Machine Learning, 3(1), 1\u2013122.","journal-title":"Foundations and Trends\u00aein Machine Learning"},{"issue":"1","key":"6123_CR20","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L Breiman","year":"2001","unstructured":"Breiman, L. (2001). Random forests. Machine Learning, 45(1), 5\u201332.","journal-title":"Machine Learning"},{"key":"6123_CR21","volume-title":"Classification and regression trees","author":"L Breiman","year":"1984","unstructured":"Breiman, L., Friedman, J., Olshen, R., & Stone, C. (1984). Classification and regression trees. Monterey, CA: Wadsworth and Brooks."},{"key":"6123_CR22","doi-asserted-by":"publisher","first-page":"256","DOI":"10.1016\/j.ins.2015.09.031","volume":"329","author":"E Carrizosa","year":"2016","unstructured":"Carrizosa, E., Nogales-G\u00f3mez, A., & Morales, D. R. (2016). Strongly agree or strongly disagree? Rating features in support vector machines. Information Sciences, 329, 256\u2013273.","journal-title":"Information Sciences"},{"issue":"1","key":"6123_CR23","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1007\/s11750-021-00594-1","volume":"29","author":"E Carrizosa","year":"2021","unstructured":"Carrizosa, E., Molero-R\u00edo, C., & Morales, D. R. (2021). Mathematical optimization in classification and regression trees. Top, 29(1), 5\u201333.","journal-title":"Top"},{"key":"6123_CR24","first-page":"21","volume":"2011","author":"P Chen","year":"2012","unstructured":"Chen, P., Tsai, C., Chen, Y., Chou, K., et al. (2012). A linear ensemble of individual and blended models for music rating prediction. Proceedings of KDD-Cup, 2011, 21\u201360.","journal-title":"Proceedings of KDD-Cup"},{"key":"6123_CR25","doi-asserted-by":"crossref","unstructured":"Chen, T., & Guestrin, C. (2016). Xgboost: A scalable tree boosting system. In: Proceedings of the 22nd ACM SIGKDD international conference on knowledge discovery and data mining (pp. 785\u2013794).","DOI":"10.1145\/2939672.2939785"},{"key":"6123_CR26","unstructured":"Dua, D., & Graff, C. (2017). UCI machine learning repository. http:\/\/archive.ics.uci.edu\/ml."},{"issue":"3","key":"6123_CR27","doi-asserted-by":"publisher","first-page":"307","DOI":"10.1007\/BF02592064","volume":"36","author":"M Duran","year":"1986","unstructured":"Duran, M., & Grossmann, I. (1986). An outer-approximation algorithm for a class of mixed-integer nonlinear programs. Mathematical Programming, 36(3), 307\u2013339.","journal-title":"Mathematical Programming"},{"issue":"2","key":"6123_CR28","doi-asserted-by":"publisher","first-page":"407","DOI":"10.1214\/009053604000000067","volume":"32","author":"B Efron","year":"2004","unstructured":"Efron, B., Hastie, T., Johnstone, I., & Tibshirani, R. (2004). Least angle regression. The Annals of Statistics, 32(2), 407\u2013499.","journal-title":"The Annals of Statistics"},{"key":"6123_CR29","unstructured":"Efroymson, M. (1966). Stepwise regression\u2014A backward and forward look. In Eastern Regional Meetings of the Institute of Mathematical Statistics."},{"issue":"456","key":"6123_CR30","doi-asserted-by":"publisher","first-page":"1348","DOI":"10.1198\/016214501753382273","volume":"96","author":"J Fan","year":"2001","unstructured":"Fan, J., & Li, R. (2001). Variable selection via nonconcave penalized likelihood and its oracle properties. Journal of the American Statistical Association, 96(456), 1348\u20131360.","journal-title":"Journal of the American Statistical Association"},{"issue":"5","key":"6123_CR31","doi-asserted-by":"publisher","first-page":"849","DOI":"10.1111\/j.1467-9868.2008.00674.x","volume":"70","author":"J Fan","year":"2008","unstructured":"Fan, J., & Lv, J. (2008). Sure independence screening for ultrahigh dimensional feature space. Journal of the Royal Statistical Society: Series B (Statistical Methodology), 70(5), 849\u2013911.","journal-title":"Journal of the Royal Statistical Society: Series B (Statistical Methodology)"},{"key":"6123_CR32","doi-asserted-by":"crossref","unstructured":"Fan, J., & Lv, J. (2018). Sure independence screening. Wiley StatsRef: Statistics Reference Online.","DOI":"10.1002\/9781118445112.stat08043"},{"issue":"6","key":"6123_CR33","doi-asserted-by":"publisher","first-page":"3567","DOI":"10.1214\/10-AOS798","volume":"38","author":"J Fan","year":"2010","unstructured":"Fan, J., & Song, R. (2010). Sure independence screening in generalized linear models with np-dimensionality. The Annals of Statistics, 38(6), 3567\u20133604.","journal-title":"The Annals of Statistics"},{"key":"6123_CR34","first-page":"2013","volume":"10","author":"J Fan","year":"2009","unstructured":"Fan, J., Samworth, R., & Wu, Y. (2009). Ultrahigh dimensional feature selection: Beyond the linear model. Journal of Machine Learning Research, 10, 2013\u20132038.","journal-title":"Journal of Machine Learning Research"},{"issue":"494","key":"6123_CR35","doi-asserted-by":"publisher","first-page":"544","DOI":"10.1198\/jasa.2011.tm09779","volume":"106","author":"J Fan","year":"2011","unstructured":"Fan, J., Feng, Y., & Song, R. (2011). Nonparametric independence screening in sparse ultra-high-dimensional additive models. Journal of the American Statistical Association, 106(494), 544\u2013557.","journal-title":"Journal of the American Statistical Association"},{"issue":"1","key":"6123_CR36","doi-asserted-by":"publisher","first-page":"1","DOI":"10.18637\/jss.v033.i01","volume":"33","author":"J Friedman","year":"2010","unstructured":"Friedman, J., Hastie, T., & Tibshirani, R. (2010). Regularization paths for generalized linear models via coordinate descent. Journal of Statistical Software, 33(1), 1\u201322.","journal-title":"Journal of Statistical Software"},{"key":"6123_CR37","unstructured":"Friedman, J., Hastie, T., & Tibshirani, R. (2020). glmnet: Lasso and elastic-net regularized generalized linear models. R package version 4."},{"key":"6123_CR38","unstructured":"Gamarnik, D., & Zadik, I. (2017). High dimensional regression with binary coefficients. estimating squared error and a phase transtition. In: Conference on learning theory, PMLR (pp. 948\u2013953)."},{"issue":"3","key":"6123_CR39","doi-asserted-by":"publisher","first-page":"807","DOI":"10.1016\/j.ejor.2020.08.045","volume":"290","author":"C Gambella","year":"2021","unstructured":"Gambella, C., Ghaddar, B., & Naoum-Sawaya, J. (2021). Optimization problems for machine learning: A survey. European Journal of Operational Research, 290(3), 807\u2013828.","journal-title":"European Journal of Operational Research"},{"key":"6123_CR40","unstructured":"Gurobi Optimization Inc. (2016). Gurobi Optimizer Reference Manual. http:\/\/www.gurobi.com."},{"key":"6123_CR41","first-page":"1157","volume":"3","author":"I Guyon","year":"2003","unstructured":"Guyon, I., & Elisseeff, A. (2003). An introduction to variable and feature selection. Journal of Machine Learning Research, 3, 1157\u20131182.","journal-title":"Journal of Machine Learning Research"},{"issue":"1","key":"6123_CR42","doi-asserted-by":"publisher","first-page":"389","DOI":"10.1023\/A:1012487302797","volume":"46","author":"I Guyon","year":"2002","unstructured":"Guyon, I., Weston, J., Barnhill, S., & Vapnik, V. (2002). Gene selection for cancer classification using support vector machines. Machine learning, 46(1), 389\u2013422.","journal-title":"Machine learning"},{"key":"6123_CR43","doi-asserted-by":"publisher","DOI":"10.1201\/b18401","volume-title":"Statistical learning with sparsity: The lasso and generalizations","author":"T Hastie","year":"2015","unstructured":"Hastie, T., Tibshirani, R., & Wainwright, M. (2015). Statistical learning with sparsity: The lasso and generalizations. London: CRC Press."},{"issue":"5","key":"6123_CR44","doi-asserted-by":"publisher","first-page":"1517","DOI":"10.1287\/opre.2019.1919","volume":"68","author":"H Hazimeh","year":"2020","unstructured":"Hazimeh, H., & Mazumder, R. (2020). Fast best subset selection: Coordinate descent and local combinatorial optimization algorithms. Operations Research, 68(5), 1517\u20131537.","journal-title":"Operations Research"},{"key":"6123_CR45","doi-asserted-by":"crossref","unstructured":"Hazimeh, H., Mazumder, R., & Saab, A. (2020). Sparse regression at scale: Branch-and-bound rooted in first-order optimization. arXiv preprint arXiv:200406152.","DOI":"10.1007\/s10107-021-01712-4"},{"issue":"8","key":"6123_CR46","doi-asserted-by":"publisher","first-page":"832","DOI":"10.1109\/34.709601","volume":"20","author":"T Ho","year":"1998","unstructured":"Ho, T. (1998). The random subspace method for constructing decision forests. IEEE Transactions on Pattern Analysis and Machine Intelligence, 20(8), 832\u2013844.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"4","key":"6123_CR47","doi-asserted-by":"publisher","first-page":"531","DOI":"10.1080\/00401706.1967.10490502","volume":"9","author":"R Hocking","year":"1967","unstructured":"Hocking, R., & Leslie, R. (1967). Selection of the best subset in regression analysis. Technometrics, 9(4), 531\u2013540.","journal-title":"Technometrics"},{"issue":"1","key":"6123_CR48","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1080\/00401706.1970.10488634","volume":"12","author":"AE Hoerl","year":"1970","unstructured":"Hoerl, A. E., & Kennard, R. W. (1970). Ridge regression: Biased estimation for nonorthogonal problems. Technometrics, 12(1), 55\u201367.","journal-title":"Technometrics"},{"key":"6123_CR49","unstructured":"Hu, X., Rudin, C., & Seltzer, M. (2019). Optimal sparse decision trees. Advances in Neural Information Processing Systems (NeurIPS)."},{"key":"6123_CR50","unstructured":"Interpretable AI (2020). Interpretable AI Documentation. https:\/\/www.interpretable.ai."},{"key":"6123_CR51","unstructured":"Kenney, A., Chiaromonte, F., & Felici G. (2018). Efficient and effective $$l_0$$ feature selection. arXiv preprint arXiv:180802526."},{"issue":"4","key":"6123_CR52","doi-asserted-by":"publisher","first-page":"981","DOI":"10.1007\/s10044-017-0655-2","volume":"20","author":"M Koziarski","year":"2017","unstructured":"Koziarski, M., Krawczyk, B., & Wo\u017aniak, M. (2017). The deterministic subspace method for constructing classifier ensembles. Pattern Analysis and Applications, 20(4), 981\u2013990.","journal-title":"Pattern Analysis and Applications"},{"issue":"6","key":"6123_CR53","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3136625","volume":"50","author":"J Li","year":"2017","unstructured":"Li, J., Cheng, K., Wang, S., Morstatter, F., et al. (2017). Feature selection: A data perspective. ACM Computing Surveys (CSUR), 50(6), 1\u201345.","journal-title":"ACM Computing Surveys (CSUR)"},{"issue":"1","key":"6123_CR54","first-page":"6691","volume":"18","author":"H Lian","year":"2017","unstructured":"Lian, H., & Fan, Z. (2017). Divide-and-conquer for debiased l1-norm support vector machine in ultra-high dimensions. The Journal of Machine Learning Research, 18(1), 6691\u20136716.","journal-title":"The Journal of Machine Learning Research"},{"issue":"81","key":"6123_CR55","first-page":"1","volume":"18","author":"W Liu","year":"2017","unstructured":"Liu, W., & Tsang, I. (2017). Making decision trees feasible in ultrahigh feature and label dimensions. The Journal of Machine Learning Research, 18(81), 1\u201336.","journal-title":"The Journal of Machine Learning Research"},{"key":"6123_CR56","doi-asserted-by":"crossref","unstructured":"Liu, W., Liu, Z., Tsang, I., Zhang, W., & Lin X. (2018). Doubly approximate nearest neighbor classification. In Proceedings of the AAAI conference on artificial intelligence (Vol.\u00a032).","DOI":"10.1609\/aaai.v32i1.11690"},{"key":"6123_CR57","doi-asserted-by":"publisher","first-page":"134","DOI":"10.1016\/j.artmed.2019.04.004","volume":"96","author":"Z Liu","year":"2019","unstructured":"Liu, Z., Elashoff, D., & Piantadosi, S. (2019). Sparse support vector machines with l0 approximation for ultra-high dimensional omics data. Artificial intelligence in medicine, 96, 134\u2013141.","journal-title":"Artificial intelligence in medicine"},{"issue":"3","key":"6123_CR58","doi-asserted-by":"publisher","first-page":"329","DOI":"10.1111\/insr.12016","volume":"82","author":"W-Y Loh","year":"2014","unstructured":"Loh, W.-Y. (2014). Fifty years of classification and regression trees. International Statistical Review, 82(3), 329\u2013348.","journal-title":"International Statistical Review"},{"key":"6123_CR59","doi-asserted-by":"publisher","unstructured":"McSherry, F., & Talwar, K. (2007). Mechanism design via differential privacy. In 48th Annual IEEE Symposium on Foundations of Computer Science (FOCS\u201907) (pp. 94\u2013103). https:\/\/doi.org\/10.1109\/FOCS.2007.66.","DOI":"10.1109\/FOCS.2007.66"},{"issue":"2","key":"6123_CR60","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1137\/S0097539792240406","volume":"24","author":"B Natarajan","year":"1995","unstructured":"Natarajan, B. (1995). Sparse approximate solutions to linear systems. SIAM Journal on Computing, 24(2), 227\u2013234.","journal-title":"SIAM Journal on Computing"},{"key":"6123_CR61","unstructured":"Ng, A. (1998). On feature selection: Learning with exponentially many irrelevant features as training examples. In: Proceedings of the fifteenth international conference on machine learning (pp. 404\u2013412). Morgan Kaufmann Publishers Inc."},{"issue":"3","key":"6123_CR62","doi-asserted-by":"publisher","first-page":"515","DOI":"10.1080\/10485252.2016.1167206","volume":"28","author":"L Ni","year":"2016","unstructured":"Ni, L., & Fang, F. (2016). Entropy-based model-free feature screening for ultrahigh-dimensional multiclass classification. Journal of Nonparametric Statistics, 28(3), 515\u2013530.","journal-title":"Journal of Nonparametric Statistics"},{"key":"6123_CR63","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa, F., Varoquaux, G., Gramfort, A., Michel, V., et al. (2011). Scikit-learn: Machine learning in Python. Journal of Machine Learning Research, 12, 2825\u20132830.","journal-title":"Journal of Machine Learning Research"},{"issue":"1","key":"6123_CR64","first-page":"8279","volume":"17","author":"B Peng","year":"2016","unstructured":"Peng, B., Wang, L., & Wu, Y. (2016). An error bound for l1-norm support vector machine coefficients in ultra-high dimension. The Journal of Machine Learning Research, 17(1), 8279\u20138304.","journal-title":"The Journal of Machine Learning Research"},{"issue":"1","key":"6123_CR65","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1007\/s10107-015-0894-1","volume":"151","author":"M Pilanci","year":"2015","unstructured":"Pilanci, M., Wainwright, M. J., & El Ghaoui, L. (2015). Sparse learning via Boolean relaxations. Mathematical Programming, 151(1), 63\u201387.","journal-title":"Mathematical Programming"},{"key":"6123_CR66","doi-asserted-by":"crossref","unstructured":"Radovanovi\u0107, M., Nanopoulos, A., & Ivanovi\u0107, M. (2009). Nearest neighbors in high-dimensional data: The emergence and influence of hubs. In Proceedings of the 26th Annual International Conference on Machine Learning (pp. 865\u2013872).","DOI":"10.1145\/1553374.1553485"},{"issue":"3","key":"6123_CR67","doi-asserted-by":"publisher","first-page":"660","DOI":"10.1016\/S0377-2217(01)00264-8","volume":"141","author":"M Redmond","year":"2002","unstructured":"Redmond, M., & Baveja, A. (2002). A data-driven software tool for enabling cooperative information sharing among police departments. European Journal of Operational Research, 141(3), 660\u2013678.","journal-title":"European Journal of Operational Research"},{"key":"6123_CR68","unstructured":"Reeves, G., Xu, J., & Zadik, I. (2019). The all-or-nothing phenomenon in sparse linear regression. In Conference on Learning Theory, PMLR (pp. 2652\u20132663)."},{"issue":"5","key":"6123_CR69","doi-asserted-by":"publisher","first-page":"206","DOI":"10.1038\/s42256-019-0048-x","volume":"1","author":"C Rudin","year":"2019","unstructured":"Rudin, C. (2019). Stop explaining black box machine learning models for high stakes decisions and use interpretable models instead. Nature Machine Intelligence, 1(5), 206\u2013215.","journal-title":"Nature Machine Intelligence"},{"issue":"2\u20133","key":"6123_CR70","doi-asserted-by":"publisher","first-page":"173","DOI":"10.1016\/0010-4655(96)00062-8","volume":"96","author":"J Schneider","year":"1996","unstructured":"Schneider, J., Froschhammer, C., Morgenstern, I., Husslein, T., & Singer, J. (1996). Searching for backbones-an efficient parallel algorithm for the traveling salesman problem. Computer Physics Communications, 96(2\u20133), 173\u2013188.","journal-title":"Computer Physics Communications"},{"issue":"3","key":"6123_CR71","first-page":"262","volume":"10","author":"VG Sigillito","year":"1989","unstructured":"Sigillito, V. G., Wing, S. P., Hutton, L. V., & Baker, K. B. (1989). Classification of radar returns from the ionosphere using neural networks. Johns Hopkins APL Technical Digest, 10(3), 262\u2013266.","journal-title":"Johns Hopkins APL Technical Digest"},{"issue":"5","key":"6123_CR72","doi-asserted-by":"publisher","first-page":"947","DOI":"10.1111\/rssb.12095","volume":"77","author":"Q Song","year":"2015","unstructured":"Song, Q., & Liang, F. (2015). A split-and-merge Bayesian variable selection approach for ultrahigh dimensional regression. Journal of the Royal Statistical Society: Series B (Statistical Methodology), 77(5), 947\u2013972.","journal-title":"Journal of the Royal Statistical Society: Series B (Statistical Methodology)"},{"issue":"1","key":"6123_CR73","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1111\/j.2517-6161.1996.tb02080.x","volume":"58","author":"R Tibshirani","year":"1996","unstructured":"Tibshirani, R. (1996). Regression shrinkage and selection via the lasso. Journal of the Royal Statistical Society: Series B (Methodological), 58(1), 267\u2013288.","journal-title":"Journal of the Royal Statistical Society: Series B (Methodological)"},{"issue":"3","key":"6123_CR74","doi-asserted-by":"publisher","first-page":"349","DOI":"10.1007\/s10994-015-5528-6","volume":"102","author":"B Ustun","year":"2016","unstructured":"Ustun, B., & Rudin, C. (2016). Supersparse linear integer models for optimized medical scoring systems. Machine Learning, 102(3), 349\u2013391.","journal-title":"Machine Learning"},{"key":"6123_CR75","doi-asserted-by":"crossref","unstructured":"Verwer, S., & Zhang, Y. (2019). Learning optimal classification trees using a binary linear program formulation. In Proceedings of the AAAI conference on artificial intelligence (Vol. 33, pp. 1625\u20131632).","DOI":"10.1609\/aaai.v33i01.33011624"},{"key":"6123_CR76","unstructured":"Walsh, T., & Slaney, J. (2001). Backbones in optimization and approximation. In: Proceedings of the Seventeenth International Joint Conference on Artificial Intelligence, pp. 254\u2013259."},{"key":"6123_CR77","unstructured":"Wang, X., Dunson, D., & Leng, C. (2016). Decorrelated feature space partitioning for distributed sparse regression. In Advances in neural information processing systems (pp. 802\u2013810)."},{"key":"6123_CR78","doi-asserted-by":"crossref","unstructured":"Wang, Y., Shrivastava, A., Wang, J., & Ryu, J. (2018). Randomized algorithms accelerated over CPU\u2013GPU for ultra-high dimensional similarity search. In Proceedings of the 2018 international conference on management of data (pp. 889\u2013903).","DOI":"10.1145\/3183713.3196925"},{"issue":"23","key":"6123_CR79","doi-asserted-by":"publisher","first-page":"9193","DOI":"10.1073\/pnas.87.23.9193","volume":"87","author":"WH Wolberg","year":"1990","unstructured":"Wolberg, W. H., & Mangasarian, O. L. (1990). Multisurface method of pattern separation for medical diagnosis applied to breast cytology. Proceedings of the National Academy of Sciences, 87(23), 9193\u20139196.","journal-title":"Proceedings of the National Academy of Sciences"},{"issue":"4","key":"6123_CR80","doi-asserted-by":"publisher","first-page":"3359","DOI":"10.1137\/19M1245414","volume":"30","author":"W Xie","year":"2020","unstructured":"Xie, W., & Deng, X. (2020). Scalable algorithms for the sparse ridge regression. SIAM Journal on Optimization, 30(4), 3359\u20133386.","journal-title":"SIAM Journal on Optimization"},{"key":"6123_CR81","unstructured":"Xu, H., Caramanis, C., & Mannor, S. (2009). Robust regression and lasso. In Advances in Neural Information Processing Systems (pp. 1801\u20131808)."},{"key":"6123_CR82","unstructured":"Yang, J., Mahoney, M., Saunders, M., & Sun, Y. (2016). Feature-distributed sparse regression: a screen-and-clean approach. In Advances in neural information processing systems (pp. 2712\u20132720)."},{"key":"6123_CR83","doi-asserted-by":"crossref","unstructured":"Yang, W., Li, T., Fang, G., & Wei, H. (2020). Pase: Postgresql ultra-high-dimensional approximate nearest neighbor search extension. In Proceedings of the 2020 ACM SIGMOD international conference on management of data (pp. 2241\u20132253).","DOI":"10.1145\/3318464.3386131"},{"key":"6123_CR84","unstructured":"Zadik, I. (2019). Computational and statistical challenges in high dimensional statistical models. PhD thesis, Massachusetts Institute of Technology."},{"issue":"2","key":"6123_CR85","doi-asserted-by":"publisher","first-page":"894","DOI":"10.1214\/09-AOS729","volume":"38","author":"C Zhang","year":"2010","unstructured":"Zhang, C. (2010). Nearly unbiased variable selection under minimax concave penalty. The Annals of Statistics, 38(2), 894\u2013942.","journal-title":"The Annals of Statistics"},{"key":"6123_CR86","doi-asserted-by":"crossref","unstructured":"Zhang, J. (1992). Selecting typical instances in instance-based learning. In Machine learning proceedings 1992 (pp. 470\u2013479). Elsevier.","DOI":"10.1016\/B978-1-55860-247-2.50066-8"},{"key":"6123_CR87","unstructured":"Zhou, Y., Porwal, U., Zhang, C., Ngo, H., et\u00a0al. (2014). Parallel feature selection inspired by group testing. In Advances in neural information processing systems (pp. 3554\u20133562)."},{"issue":"2","key":"6123_CR88","doi-asserted-by":"publisher","first-page":"301","DOI":"10.1111\/j.1467-9868.2005.00503.x","volume":"67","author":"H Zou","year":"2005","unstructured":"Zou, H., & Hastie, T. (2005). Regularization and variable selection via the elastic net. Journal of the Royal Statistical Society: Series B (Statistical Methodology), 67(2), 301\u2013320.","journal-title":"Journal of the Royal Statistical Society: Series B (Statistical Methodology)"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-021-06123-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10994-021-06123-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-021-06123-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T18:49:38Z","timestamp":1726512578000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10994-021-06123-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,1,22]]},"references-count":88,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2022,6]]}},"alternative-id":["6123"],"URL":"https:\/\/doi.org\/10.1007\/s10994-021-06123-2","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,1,22]]},"assertion":[{"value":"3 May 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 October 2021","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 November 2021","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 January 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"No conflicts of interest or competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"Not applicable.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"Please email vvdig@mit.edu to request code for the proposed method.","order":6,"name":"Ethics","group":{"name":"EthicsHeading","label":"Code availability"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}