{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T03:19:32Z","timestamp":1777000772167,"version":"3.51.4"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2019,2,4]],"date-time":"2019-02-04T00:00:00Z","timestamp":1549238400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2019,6]]},"DOI":"10.1007\/s10994-019-05787-1","type":"journal-article","created":{"date-parts":[[2019,2,4]],"date-time":"2019-02-04T21:03:29Z","timestamp":1549314209000},"page":"971-992","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":119,"title":["Accelerated gradient boosting"],"prefix":"10.1007","volume":"108","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8238-4471","authenticated-orcid":false,"given":"G.","family":"Biau","sequence":"first","affiliation":[]},{"given":"B.","family":"Cadre","sequence":"additional","affiliation":[]},{"given":"L.","family":"Rouvi\u00e8re","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,2,4]]},"reference":[{"key":"5787_CR1","first-page":"2347","volume":"8","author":"PL Bartlett","year":"2007","unstructured":"Bartlett, P. L., & Traskin, M. (2007). AdaBoost is consistent. Journal of Machine Learning Research, 8, 2347\u20132368.","journal-title":"Journal of Machine Learning Research"},{"key":"5787_CR2","doi-asserted-by":"publisher","first-page":"183","DOI":"10.1137\/080716542","volume":"2","author":"A Beck","year":"2009","unstructured":"Beck, A., & Teboulle, M. (2009). A fast iterative shrinkage-thresholding algorithm for linear inverse problems. SIAM Journal on Imaging Sciences, 2, 183\u2013202.","journal-title":"SIAM Journal on Imaging Sciences"},{"key":"5787_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1137\/090756855","volume":"4","author":"S Becker","year":"2011","unstructured":"Becker, S., Bobin, J., & Cand\u00e8s, E. J. (2011). NESTA: A fast and accurate first-order method for sparse recovery. SIAM Journal on Imaging Sciences, 4, 1\u201339.","journal-title":"SIAM Journal on Imaging Sciences"},{"key":"5787_CR4","unstructured":"Biau, G., & Cadre, B. (2017). Optimization by gradient boosting. arXiv:1707.05023 ."},{"key":"5787_CR5","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1016\/j.jmva.2015.04.007","volume":"146","author":"G Biau","year":"2016","unstructured":"Biau, G., Fischer, A., Guedj, B., & Malley, J. D. (2016). COBRA: A combined regression strategy. Journal of Multivariate Analysis, 146, 18\u201328.","journal-title":"Journal of Multivariate Analysis"},{"key":"5787_CR6","first-page":"705","volume":"7","author":"PJ Bickel","year":"2006","unstructured":"Bickel, P. J., Ritov, Y., & Zakai, A. (2006). Some theory for generalized boosting algorithms. Journal of Machine Learning Research, 7, 705\u2013732.","journal-title":"Journal of Machine Learning Research"},{"key":"5787_CR7","first-page":"861","volume":"4","author":"G Blanchard","year":"2003","unstructured":"Blanchard, G., Lugosi, G., & Vayatis, N. (2003). On the rate of convergence of regularized boosting classifiers. Journal of Machine Learning Research, 4, 861\u2013894.","journal-title":"Journal of Machine Learning Research"},{"key":"5787_CR8","unstructured":"Breiman, L. (1997). Arcing the edge. Technical Report 486, Statistics Department, University of California, Berkeley."},{"key":"5787_CR9","doi-asserted-by":"publisher","first-page":"801","DOI":"10.1214\/aos\/1024691079","volume":"26","author":"L Breiman","year":"1998","unstructured":"Breiman, L. (1998). Arcing classifiers (with discussion). The Annals of Statistics, 26, 801\u2013824.","journal-title":"The Annals of Statistics"},{"key":"5787_CR10","doi-asserted-by":"publisher","first-page":"1493","DOI":"10.1162\/089976699300016106","volume":"11","author":"L Breiman","year":"1999","unstructured":"Breiman, L. (1999). Prediction games and arcing algorithms. Neural Computation, 11, 1493\u20131517.","journal-title":"Neural Computation"},{"key":"5787_CR11","unstructured":"Breiman, L. (2000). Some infinite theory for predictor ensembles. Technical Report 577, Statistics Department, University of California, Berkeley."},{"key":"5787_CR12","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L Breiman","year":"2001","unstructured":"Breiman, L. (2001). Random forests. Machine Learning, 45, 5\u201332.","journal-title":"Machine Learning"},{"key":"5787_CR13","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1214\/aos\/1079120126","volume":"32","author":"L Breiman","year":"2004","unstructured":"Breiman, L. (2004). Population theory for boosting ensembles. The Annals of Statistics, 32, 1\u201311.","journal-title":"The Annals of Statistics"},{"key":"5787_CR14","unstructured":"Bubeck, S. (2013). ORF523: Nesterov\u2019s accelerated gradient descent. https:\/\/blogs.princeton.edu\/imabandit\/2013\/04\/01\/acceleratedgradientdescent ."},{"key":"5787_CR15","doi-asserted-by":"publisher","first-page":"231","DOI":"10.1561\/2200000050","volume":"8","author":"S Bubeck","year":"2015","unstructured":"Bubeck, S. (2015). Convex optimization: Algorithms and complexity. Foundations and Trends in Machine Learning, 8, 231\u2013357.","journal-title":"Foundations and Trends in Machine Learning"},{"key":"5787_CR16","doi-asserted-by":"publisher","first-page":"477","DOI":"10.1214\/07-STS242","volume":"22","author":"P B\u00fchlmann","year":"2007","unstructured":"B\u00fchlmann, P., & Hothorn, T. (2007). Boosting algorithms: Regularization, prediction and model fitting (with discussion). Statistical Science, 22, 477\u2013505.","journal-title":"Statistical Science"},{"key":"5787_CR17","doi-asserted-by":"publisher","first-page":"324","DOI":"10.1198\/016214503000125","volume":"98","author":"P B\u00fchlmann","year":"2003","unstructured":"B\u00fchlmann, P., & Yu, B. (2003). Boosting with the $$L_2$$ L 2 loss: Regression and classification. Journal of the American Statistical Association, 98, 324\u2013339.","journal-title":"Journal of the American Statistical Association"},{"key":"5787_CR18","unstructured":"Chen, T. & Guestrin, C. (2016). XGBoost: A scalable tree boosting system. In Proceedings of the 22nd ACM SIGKDD international conference on knowledge discovery and data mining (pp. 785\u2013794). New York: ACM."},{"key":"5787_CR19","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1007\/s10107-013-0677-5","volume":"146","author":"O Devolder","year":"2014","unstructured":"Devolder, O., Glineur, F., & Nesterov, Y. (2014). First-order methods of smooth convex optimization with inexact oracle. Mathematical Programming, 146, 37\u201375.","journal-title":"Mathematical Programming"},{"key":"5787_CR20","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4612-0711-5","volume-title":"A probabilistic theory of pattern recognition","author":"L Devroye","year":"1996","unstructured":"Devroye, L., Gy\u00f6rfi, L., & Lugosi, G. (1996). A probabilistic theory of pattern recognition. New York: Springer."},{"key":"5787_CR21","doi-asserted-by":"publisher","first-page":"256","DOI":"10.1006\/inco.1995.1136","volume":"121","author":"Y Freund","year":"1995","unstructured":"Freund, Y. (1995). Boosting a weak learning algorithm by majority. Information and Computation, 121, 256\u2013285.","journal-title":"Information and Computation"},{"key":"5787_CR22","unstructured":"Freund, Y., & Schapire, R. E. (1996). Experiments with a new boosting algorithm. In S. Lorenza (Ed.), Machine learning: Proceedings of the thirteenth international conference on machine learning (pp. 148\u2013156). San Francisco: Morgan Kaufmann Publishers."},{"key":"5787_CR23","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1006\/jcss.1997.1504","volume":"55","author":"Y Freund","year":"1997","unstructured":"Freund, Y., & Schapire, R. E. (1997). A decision-theoretic generalization of on-line learning and an application to boosting. Journal of Computer and System Sciences, 55, 119\u2013139.","journal-title":"Journal of Computer and System Sciences"},{"key":"5787_CR24","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1214\/aos\/1016218223","volume":"28","author":"J Friedman","year":"2000","unstructured":"Friedman, J., Hastie, T., & Tibshirani, R. (2000). Additive logistic regression: A statistical view of boosting (with discussion). The Annals of Statistics, 28, 337\u2013374.","journal-title":"The Annals of Statistics"},{"key":"5787_CR25","doi-asserted-by":"publisher","first-page":"1189","DOI":"10.1214\/aos\/1013203451","volume":"29","author":"JH Friedman","year":"2001","unstructured":"Friedman, J. H. (2001). Greedy function approximation: A gradient boosting machine. The Annals of Statistics, 29, 1189\u20131232.","journal-title":"The Annals of Statistics"},{"key":"5787_CR26","doi-asserted-by":"publisher","first-page":"367","DOI":"10.1016\/S0167-9473(01)00065-2","volume":"38","author":"JH Friedman","year":"2002","unstructured":"Friedman, J. H. (2002). Stochastic gradient boosting. Computational Statistics & Data Analysis, 38, 367\u2013378.","journal-title":"Computational Statistics & Data Analysis"},{"key":"5787_CR27","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-84858-7","volume-title":"The elements of statistical learning: Data mining, inference, and prediction","author":"T Hastie","year":"2009","unstructured":"Hastie, T., Tibshirani, R., & Friedman, J. (2009). The elements of statistical learning: Data mining, inference, and prediction (2nd ed.). New York: Springer.","edition":"2"},{"key":"5787_CR28","unstructured":"Jain, P., Netrapalli, P., Kakade, S. M., Kidambi, R., & Sidford, A. (2018). Accelerating stochastic gradient descent for least squares regression. In S.\u00a0Bubeck, V.\u00a0Perchet, & P.\u00a0Rigollet (Ed.), Proceedings of the 31st conference on learning theory (Vol. 75, pp. 545\u2013604). PMLR."},{"key":"5787_CR29","doi-asserted-by":"crossref","first-page":"30","DOI":"10.1214\/009053604000000463","volume":"32","author":"G Lugosi","year":"2004","unstructured":"Lugosi, G., & Vayatis, N. (2004). On the Bayes-risk consistency of regularized boosting methods. The Annals of Statistics, 32, 30\u201355.","journal-title":"The Annals of Statistics"},{"key":"5787_CR30","unstructured":"Mason, L., Baxter, J., Bartlett, P., & Frean, M. (1999). Boosting algorithms as gradient descent. In S. A. Solla, T. K. Leen, & K.\u00a0M\u00fcller (Eds.), Proceedings of the 12th international conference on neural information processing systems (pp. 512\u2013518). Cambridge, MA: The MIT Press."},{"key":"5787_CR31","doi-asserted-by":"crossref","first-page":"221","DOI":"10.7551\/mitpress\/1113.003.0017","volume-title":"Advances in large margin classifiers","author":"L Mason","year":"2000","unstructured":"Mason, L., Baxter, J., Bartlett, P., & Frean, M. (2000). Functional gradient techniques for combining hypotheses. In A. J. Smola, P. L. Bartlett, B. Sch\u00f6lkopf, & D. Schuurmans (Eds.), Advances in large margin classifiers (pp. 221\u2013246). Cambridge, MA: The MIT Press."},{"key":"5787_CR32","first-page":"372","volume":"27","author":"Y Nesterov","year":"1983","unstructured":"Nesterov, Y. (1983). A method of solving a convex programming problem with convergence rate $${\\rm O}(1\/k^2)$$ O ( 1 \/ k 2 ) . Soviet Mathematics Doklady, 27, 372\u2013376.","journal-title":"Soviet Mathematics Doklady"},{"key":"5787_CR33","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4419-8853-9","volume-title":"Introductory lectures on convex optimization: A basic course","author":"Y Nesterov","year":"2004","unstructured":"Nesterov, Y. (2004). Introductory lectures on convex optimization: A basic course. New York: Springer."},{"key":"5787_CR34","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1007\/s10107-004-0552-5","volume":"103","author":"Y Nesterov","year":"2005","unstructured":"Nesterov, Y. (2005). Smooth minimization of non-smooth functions. Mathematical Programming, 103, 127\u2013152.","journal-title":"Mathematical Programming"},{"key":"5787_CR35","doi-asserted-by":"publisher","first-page":"125","DOI":"10.1007\/s10107-012-0629-5","volume":"140","author":"Y Nesterov","year":"2013","unstructured":"Nesterov, Y. (2013). Gradient methods for minimizing composite functions. Mathematical Programming, 140, 125\u2013161.","journal-title":"Mathematical Programming"},{"key":"5787_CR36","unstructured":"Qu, G., & Li, N. (2016). Accelerated distributed Nesterov gradient descent. In 54th Annual Allerton conference on communication, control, and computing (pp. 209\u2013216). Red Hook: Curran Associates, Inc."},{"key":"5787_CR37","unstructured":"Ridgeway, G. (2007). Generalized boosted models: A guide to the gbm package. http:\/\/www.saedsayad.com\/docs\/gbm2.pdf ."},{"key":"5787_CR38","first-page":"197","volume":"5","author":"RE Schapire","year":"1990","unstructured":"Schapire, R. E. (1990). The strength of weak learnability. Machine Learning, 5, 197\u2013227.","journal-title":"Machine Learning"},{"key":"5787_CR39","first-page":"1","volume":"17","author":"W Su","year":"2016","unstructured":"Su, W., Boyd, S., & Cand\u00e8s, E. J. (2016). A differential equation for modeling Nesterov\u2019s accelerated gradient method: Theory and insights. Journal of Machine Learning Research, 17, 1\u201343.","journal-title":"Journal of Machine Learning Research"},{"key":"5787_CR40","unstructured":"Sutskever, I., Martens, J., Dahl, G., & Hinton, G. (2013). On the importance of initialization and momentum in deep learning. In S.\u00a0Dasgupta & D.\u00a0McAllester (Eds.), Proceedings of the 30th international conference on machine learning, proceedings of machine learning research (pp. 1139\u20131147)."},{"key":"5787_CR41","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1111\/j.2517-6161.1996.tb02080.x","volume":"58","author":"R Tibshirani","year":"1996","unstructured":"Tibshirani, R. (1996). Regression shrinkage and selection via the Lasso. Journal of the Royal Statistical Society Series B, 58, 267\u2013288.","journal-title":"Journal of the Royal Statistical Society Series B"},{"key":"5787_CR42","unstructured":"Tseng, P. (2008). On accelerated proximal gradient methods for convex-concave optimization. http:\/\/www.mit.edu\/~dimitrib\/PTseng\/papers\/apgm.pdf ."},{"key":"5787_CR43","doi-asserted-by":"publisher","first-page":"1538","DOI":"10.1214\/009053605000000255","volume":"33","author":"T Zhang","year":"2005","unstructured":"Zhang, T., & Yu, B. (2005). Boosting with early stopping: Convergence and consistency. The Annals of Statistics, 33, 1538\u20131579.","journal-title":"The Annals of Statistics"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10994-019-05787-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-019-05787-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-019-05787-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,14]],"date-time":"2024-07-14T14:50:04Z","timestamp":1720968604000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10994-019-05787-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,2,4]]},"references-count":43,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2019,6]]}},"alternative-id":["5787"],"URL":"https:\/\/doi.org\/10.1007\/s10994-019-05787-1","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,2,4]]},"assertion":[{"value":"5 March 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 January 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 February 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}