{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,8]],"date-time":"2026-01-08T19:11:42Z","timestamp":1767899502267,"version":"3.49.0"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2020,1,24]],"date-time":"2020-01-24T00:00:00Z","timestamp":1579824000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,24]],"date-time":"2020-01-24T00:00:00Z","timestamp":1579824000000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001711","name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung","doi-asserted-by":"crossref","award":["P2EZP2 165226"],"award-info":[{"award-number":["P2EZP2 165226"]}],"id":[{"id":"10.13039\/501100001711","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2020,5]]},"DOI":"10.1007\/s10994-020-05868-6","type":"journal-article","created":{"date-parts":[[2020,1,24]],"date-time":"2020-01-24T19:04:11Z","timestamp":1579892651000},"page":"973-997","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Sparse hierarchical regression with polynomials"],"prefix":"10.1007","volume":"109","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1985-1003","authenticated-orcid":false,"given":"Dimitris","family":"Bertsimas","sequence":"first","affiliation":[]},{"given":"Bart","family":"Van Parys","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,1,24]]},"reference":[{"issue":"3","key":"5868_CR1","doi-asserted-by":"crossref","first-page":"175","DOI":"10.1080\/00031305.1992.10475879","volume":"46","author":"NS Altman","year":"1992","unstructured":"Altman, N. S. (1992). An introduction to kernel and nearest-neighbor nonparametric regression. The American Statistician, 46(3), 175\u2013185.","journal-title":"The American Statistician"},{"issue":"Jun","key":"5868_CR2","first-page":"1179","volume":"9","author":"F Bach","year":"2008","unstructured":"Bach, F. (2008). Consistency of the group Lasso and multiple kernel learning. Journal of Machine Learning Research, 9(Jun), 1179\u20131225.","journal-title":"Journal of Machine Learning Research"},{"key":"5868_CR3","unstructured":"Bach, F. (2009). Exploring large feature spaces with hierarchical multiple kernel learning. In Advances in neural information processing systems (pp. 105\u2013112)."},{"issue":"3","key":"5868_CR4","doi-asserted-by":"crossref","first-page":"316","DOI":"10.1287\/opre.46.3.316","volume":"46","author":"C Barnhart","year":"1998","unstructured":"Barnhart, C., Johnson, E., Nemhauser, G., Savelsbergh, M., & Vance, P. (1998). Branch-and-price: Column generation for solving huge integer programs. Operations Research, 46(3), 316\u2013329.","journal-title":"Operations Research"},{"key":"5868_CR5","doi-asserted-by":"crossref","first-page":"931","DOI":"10.1016\/j.ejor.2017.03.051","volume":"270","author":"D Bertsimas","year":"2018","unstructured":"Bertsimas, D., & Copenhaver, M. (2018). Characterization of the equivalence of robustification and regularization in linear and matrix regression. European Journal of Operational Research, 270, 931\u2013942.","journal-title":"European Journal of Operational Research"},{"issue":"2","key":"5868_CR6","doi-asserted-by":"crossref","first-page":"813","DOI":"10.1214\/15-AOS1388","volume":"44","author":"D Bertsimas","year":"2016","unstructured":"Bertsimas, D., King, A., & Mazumder, R. (2016). Best subset selection via a modern optimization lens. Annals of Statistics, 44(2), 813\u2013852.","journal-title":"Annals of Statistics"},{"key":"5868_CR7","unstructured":"Bertsimas, D., & Van\u00a0Parys, B. (2017). Sparse high-dimensional regression: Exact scalable algorithms and phase transitions. Submitted to the Annals of Statistics. https:\/\/arxiv.org\/abs\/1709.10029."},{"key":"5868_CR8","doi-asserted-by":"crossref","DOI":"10.1201\/9781315139470","volume-title":"Classification and regression trees","author":"L Breiman","year":"2017","unstructured":"Breiman, L. (2017). Classification and regression trees. London: Routledge."},{"key":"5868_CR9","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-642-20192-9","volume-title":"Statistics for high-dimensional data: Methods, theory and applications","author":"P B\u00fchlmann","year":"2011","unstructured":"B\u00fchlmann, P., & van de Geer, S. (2011). Statistics for high-dimensional data: Methods, theory and applications. Berlin: Springer."},{"issue":"2","key":"5868_CR10","doi-asserted-by":"crossref","first-page":"489","DOI":"10.1109\/TIT.2005.862083","volume":"52","author":"E Cand\u00e8s","year":"2006","unstructured":"Cand\u00e8s, E., Romberg, J., & Tao, T. (2006). Robust uncertainty principles: Exact signal reconstruction from highly incomplete frequency information. IEEE Transactions on Information Theory, 52(2), 489\u2013509.","journal-title":"IEEE Transactions on Information Theory"},{"key":"5868_CR11","unstructured":"Donoho, D., & Stodden, V. (2006). Breakdown point of model selection when the number of variables exceeds the number of observations. In International joint conference on neural networks (pp. 1916\u20131921). IEEE."},{"issue":"Dec","key":"5868_CR12","first-page":"2153","volume":"6","author":"P Drineas","year":"2005","unstructured":"Drineas, P., & Mahoney, M. (2005). On the Nystr\u00f6m method for approximating a gram matrix for improved kernel-based learning. Journal of Machine Learning Research, 6(Dec), 2153\u20132175.","journal-title":"Journal of Machine Learning Research"},{"issue":"3","key":"5868_CR13","doi-asserted-by":"crossref","first-page":"307","DOI":"10.1007\/BF02592064","volume":"36","author":"M Duran","year":"1986","unstructured":"Duran, M., & Grossmann, I. (1986). An outer-approximation algorithm for a class of mixed-integer nonlinear programs. Mathematical Programming, 36(3), 307\u2013339.","journal-title":"Mathematical Programming"},{"issue":"5","key":"5868_CR14","doi-asserted-by":"crossref","first-page":"849","DOI":"10.1111\/j.1467-9868.2008.00674.x","volume":"70","author":"J Fan","year":"2008","unstructured":"Fan, J., & Lv, J. (2008). Sure independence screening for ultrahigh dimensional feature space. Journal of the Royal Statistical Society: Series B (Statistical Methodology), 70(5), 849\u2013911.","journal-title":"Journal of the Royal Statistical Society: Series B (Statistical Methodology)"},{"issue":"1","key":"5868_CR15","first-page":"101","volume":"20","author":"J Fan","year":"2010","unstructured":"Fan, J., & Lv, J. (2010). A selective overview of variable selection in high dimensional feature space. Statistica Sinica, 20(1), 101.","journal-title":"Statistica Sinica"},{"issue":"1","key":"5868_CR16","doi-asserted-by":"crossref","first-page":"327","DOI":"10.1007\/BF01581153","volume":"66","author":"R Fletcher","year":"1994","unstructured":"Fletcher, R., & Leyffer, S. (1994). Solving mixed integer nonlinear programs by outer approximation. Mathematical Programming, 66(1), 327\u2013349.","journal-title":"Mathematical Programming"},{"key":"5868_CR17","unstructured":"Gamarnik, D., & Zadik, I. (2017). High-dimensional regression with binary coefficients. Estimating squared error and a phase transition. https:\/\/arxiv.org\/abs\/1701.04455."},{"issue":"Mar","key":"5868_CR18","first-page":"1157","volume":"3","author":"I Guyon","year":"2003","unstructured":"Guyon, I., & Elisseeff, A. (2003). An introduction to variable and feature selection. Journal of Machine Learning Research, 3(Mar), 1157\u20131182.","journal-title":"Journal of Machine Learning Research"},{"key":"5868_CR19","doi-asserted-by":"crossref","first-page":"694","DOI":"10.1016\/j.csda.2012.10.010","volume":"71","author":"P Hall","year":"2014","unstructured":"Hall, P., & Xue, J. H. (2014). On selecting interacting features from high-dimensional data. Computational Statistics & Data Analysis, 71, 694\u2013708.","journal-title":"Computational Statistics & Data Analysis"},{"issue":"507","key":"5868_CR20","doi-asserted-by":"crossref","first-page":"1285","DOI":"10.1080\/01621459.2014.881741","volume":"109","author":"N Hao","year":"2014","unstructured":"Hao, N., & Zhang, H. (2014). Interaction screening for ultrahigh-dimensional data. Journal of the American Statistical Association, 109(507), 1285\u20131301.","journal-title":"Journal of the American Statistical Association"},{"key":"5868_CR21","doi-asserted-by":"crossref","DOI":"10.1201\/b18401","volume-title":"Statistical learning with sparsity: The lasso and generalizations","author":"T Hastie","year":"2015","unstructured":"Hastie, T., Tibshirani, R., & Wainwright, M. (2015). Statistical learning with sparsity: The lasso and generalizations. Boca Raton: CRC Press."},{"issue":"1","key":"5868_CR22","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1080\/00401706.1970.10488634","volume":"12","author":"A Hoerl","year":"1970","unstructured":"Hoerl, A., & Kennard, R. (1970). Ridge regression: Biased estimation for nonorthogonal problems. Technometrics, 12(1), 55\u201367.","journal-title":"Technometrics"},{"key":"5868_CR23","unstructured":"Huang, L., Jia, J., Yu, B., Chun, B. G., Maniatis, P., & Naik, M. (2010). Predicting execution time of computer programs using sparse polynomial regression. In Advances in neural information processing systems (pp. 883\u2013891)."},{"issue":"2","key":"5868_CR24","doi-asserted-by":"crossref","first-page":"897","DOI":"10.1214\/16-AOS1474","volume":"45","author":"Y Kong","year":"2017","unstructured":"Kong, Y., Li, D., Fan, Y., Lv, J., et al. (2017). Interaction pursuit in high-dimensional multi-response regression via distance correlation. The Annals of Statistics, 45(2), 897\u2013922.","journal-title":"The Annals of Statistics"},{"key":"5868_CR25","unstructured":"Kpotufe, S. (2011). k-NN regression adapts to local intrinsic dimension. In Advances in neural information processing systems (pp. 729\u2013737)."},{"issue":"2","key":"5868_CR26","doi-asserted-by":"crossref","first-page":"238","DOI":"10.1287\/ijoc.2014.0623","volume":"27","author":"M Lubin","year":"2015","unstructured":"Lubin, M., & Dunning, I. (2015). Computing in operations research using Julia. INFORMS Journal on Computing, 27(2), 238\u2013248.","journal-title":"INFORMS Journal on Computing"},{"issue":"12","key":"5868_CR27","doi-asserted-by":"crossref","first-page":"3397","DOI":"10.1109\/78.258082","volume":"41","author":"S Mallat","year":"1993","unstructured":"Mallat, S., & Zhang, Z. (1993). Matching pursuits with time-frequency dictionaries. IEEE Transactions on Signal Processing, 41(12), 3397\u20133415.","journal-title":"IEEE Transactions on Signal Processing"},{"key":"5868_CR28","doi-asserted-by":"crossref","first-page":"415","DOI":"10.1098\/rsta.1909.0016","volume":"209","author":"J Mercer","year":"1909","unstructured":"Mercer, J. (1909). Functions of positive and negative type, and their connection with the theory of integral equations. Philosophical Transactions of the Royal Society of London, 209, 415\u2013446.","journal-title":"Philosophical Transactions of the Royal Society of London"},{"key":"5868_CR29","doi-asserted-by":"crossref","DOI":"10.1201\/9781420035933","volume-title":"Subset selection in regression","author":"A Miller","year":"2002","unstructured":"Miller, A. (2002). Subset selection in regression. Boca Raton: Chapman and Hall\/CRC."},{"key":"5868_CR30","doi-asserted-by":"crossref","DOI":"10.1137\/1.9781611970791","volume-title":"Interior-point polynomial algorithms in convex programming","author":"Y Nesterov","year":"1994","unstructured":"Nesterov, Y., & Nemirovskii, A. (1994). Interior-point polynomial algorithms in convex programming. Philadelphia: SIAM."},{"key":"5868_CR31","unstructured":"Pelckmans, K., Suykens, J., Van\u00a0Gestel, T., De\u00a0Brabanter, J., Lukas, L., Hamers, B., De\u00a0Moor, B., & Vandewalle, J. (2002). LS-SVMlab: A Matlab\/C toolbox for least squares support vector machines. Technical report, K.U.Leuven"},{"issue":"4","key":"5868_CR32","doi-asserted-by":"crossref","first-page":"201","DOI":"10.1007\/BF02281970","volume":"19","author":"T Poggio","year":"1975","unstructured":"Poggio, T. (1975). On optimal nonlinear associative recall. Biological Cybernetics, 19(4), 201\u2013209.","journal-title":"Biological Cybernetics"},{"key":"5868_CR33","volume-title":"Learning with kernels: Support vector machines, regularization, optimization, and beyond","author":"B Sch\u00f6lkopf","year":"2002","unstructured":"Sch\u00f6lkopf, B., & Smola, A. (2002). Learning with kernels: Support vector machines, regularization, optimization, and beyond. Cambridge: MIT press."},{"issue":"1\/2","key":"5868_CR34","doi-asserted-by":"crossref","first-page":"1","DOI":"10.2307\/2331929","volume":"12","author":"K Smith","year":"1918","unstructured":"Smith, K. (1918). On the standard deviations of adjusted and interpolated values of an observed polynomial function and its constants and the guidance they give towards a proper choice of the distribution of observations. Biometrika, 12(1\/2), 1\u201385.","journal-title":"Biometrika"},{"issue":"5","key":"5868_CR35","doi-asserted-by":"crossref","first-page":"237","DOI":"10.2307\/3029337","volume":"21","author":"M Stone","year":"1948","unstructured":"Stone, M. (1948). The generalized Weierstrass approximation theorem. Mathematics Magazine, 21(5), 237\u2013254.","journal-title":"Mathematics Magazine"},{"issue":"3","key":"5868_CR36","doi-asserted-by":"crossref","first-page":"293","DOI":"10.1023\/A:1018628609742","volume":"9","author":"J Suykens","year":"1999","unstructured":"Suykens, J., & Vandewalle, J. (1999). Least squares support vector machine classifiers. Neural Processing Letters, 9(3), 293\u2013300.","journal-title":"Neural Processing Letters"},{"issue":"1","key":"5868_CR37","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1111\/j.2517-6161.1996.tb02080.x","volume":"58","author":"R Tibshirani","year":"1996","unstructured":"Tibshirani, R. (1996). Regression shrinkage and selection via the lasso. Journal of the Royal Statistical Society: Series B (Methodological), 58(1), 267\u2013288.","journal-title":"Journal of the Royal Statistical Society: Series B (Methodological)"},{"issue":"5","key":"5868_CR38","first-page":"195","volume":"39","author":"A Tikhonov","year":"1943","unstructured":"Tikhonov, A. (1943). On the stability of inverse problems. Doklady Akademii Nauk SSSR, 39(5), 195\u2013198.","journal-title":"Doklady Akademii Nauk SSSR"},{"key":"5868_CR39","doi-asserted-by":"crossref","unstructured":"Vapnik, V. (1998). The support vector method of function estimation. In Nonlinear modeling (pp. 55\u201385), Springer.","DOI":"10.1007\/978-1-4615-5703-6_3"},{"key":"5868_CR40","volume-title":"The nature of statistical learning theory","author":"V Vapnik","year":"2013","unstructured":"Vapnik, V. (2013). The nature of statistical learning theory. Berlin: Springer."},{"issue":"5","key":"5868_CR41","doi-asserted-by":"crossref","first-page":"2183","DOI":"10.1109\/TIT.2009.2016018","volume":"55","author":"M Wainwright","year":"2009","unstructured":"Wainwright, M. (2009). Sharp thresholds for high-dimensional and noisy sparsity recovery using-constrained quadratic programming (Lasso). IEEE Transactions on Information Theory, 55(5), 2183\u20132202.","journal-title":"IEEE Transactions on Information Theory"},{"key":"5868_CR42","doi-asserted-by":"crossref","first-page":"3468","DOI":"10.1214\/07-AOS584","volume":"37","author":"P Zhao","year":"2009","unstructured":"Zhao, P., Rocha, G., & Yu, B. (2009). The composite absolute penalties family for grouped and hierarchical variable selection. The Annals of Statistics, 37, 3468\u20133497.","journal-title":"The Annals of Statistics"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-020-05868-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10994-020-05868-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-020-05868-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,30]],"date-time":"2024-07-30T17:58:18Z","timestamp":1722362298000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10994-020-05868-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,1,24]]},"references-count":42,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2020,5]]}},"alternative-id":["5868"],"URL":"https:\/\/doi.org\/10.1007\/s10994-020-05868-6","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,1,24]]},"assertion":[{"value":"29 May 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 September 2019","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 January 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 January 2020","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}