{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,10]],"date-time":"2026-07-10T14:04:11Z","timestamp":1783692251633,"version":"3.55.0"},"reference-count":31,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2016,10,4]],"date-time":"2016-10-04T00:00:00Z","timestamp":1475539200000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2017,1]]},"DOI":"10.1007\/s10994-016-5592-6","type":"journal-article","created":{"date-parts":[[2016,10,4]],"date-time":"2016-10-04T21:53:50Z","timestamp":1475618030000},"page":"119-141","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":31,"title":["Optimal learning with Bernstein online aggregation"],"prefix":"10.1007","volume":"106","author":[{"given":"Olivier","family":"Wintenberger","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2016,10,4]]},"reference":[{"key":"5592_CR1","unstructured":"Abernethy, J., Agarwal, A., Bartlett, P. L., & Rakhlin, A. (2009). A stochastic view of optimal regret through minimax duality. In COLT."},{"key":"5592_CR2","doi-asserted-by":"crossref","first-page":"573","DOI":"10.1109\/TIT.2012.2212414","volume":"59","author":"A Agarwal","year":"2013","unstructured":"Agarwal, A., & Duchi, J. C. (2013). The generalization ability of online algorithms for dependent data. IEEE Transactions on Information Theory, 59, 573\u2013587.","journal-title":"IEEE Transactions on Information Theory"},{"key":"5592_CR3","doi-asserted-by":"crossref","first-page":"65","DOI":"10.2478\/demo-2013-0004","volume":"1","author":"P Alquier","year":"2013","unstructured":"Alquier, P., Li, X., & Wintenberger, O. (2013). Prediction of time series by statistical learning: General losses and fast rates. Dependence Modeling, 1, 65\u201393.","journal-title":"Dependence Modeling"},{"key":"5592_CR4","unstructured":"Audibert, J. Y., Munos, R., & Szepesvari, C. (2006). Use of variance estimation in the multi-armed bandit problem. In NIPS."},{"key":"5592_CR5","first-page":"41","volume-title":"Advances in neural information processing systems","author":"J-Y Audibert","year":"2007","unstructured":"Audibert, J.-Y. (2007). Progressive mixture rules are deviation suboptimal. In J. C. Platt, D. Koller, Y. Singer, & S. Roweis (Eds.), Advances in neural information processing systems (Vol. 20, pp. 41\u201348). Cambridge, MA: MIT Press."},{"key":"5592_CR6","doi-asserted-by":"crossref","first-page":"1591","DOI":"10.1214\/08-AOS623","volume":"37","author":"J-Y Audibert","year":"2009","unstructured":"Audibert, J.-Y. (2009). Fast learning rates in statistical inference through aggregation. The Annals of Statistics, 37, 1591\u20131646.","journal-title":"The Annals of Statistics"},{"key":"5592_CR7","doi-asserted-by":"crossref","unstructured":"Blum, A., & Mansour, Y. (2005). From external to internal regret. In Proceedings of the 18th annual conference on learning theory (New York) (pp.\u00a0621\u2013636). Springer.","DOI":"10.1007\/11503415_42"},{"key":"5592_CR8","doi-asserted-by":"crossref","unstructured":"Catoni, O. (2004). Statistical learning theory and stochastic optimization. Lecture notes in mathematics (Vol. 1851), Springer-Verlag, Berlin. Lecture notes from the 31st summer school on probability theory held in Saint-Flour, July 8\u201325, 2001. MR 2163920.","DOI":"10.1007\/b99352"},{"key":"5592_CR9","volume-title":"Pac-bayesian supervised classification: The thermodynamics of statistical learning","author":"O Catoni","year":"2007","unstructured":"Catoni, O. (2007). Pac-bayesian supervised classification: The thermodynamics of statistical learning. Beachwood, OH: Institute of Mathematical Statistics."},{"key":"5592_CR10","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511546921","volume-title":"Prediction, learning, and games","author":"N Cesa-Bianchi","year":"2006","unstructured":"Cesa-Bianchi, N., & Lugosi, G. (2006). Prediction, learning, and games. Cambridge, NY: Cambridge University Press."},{"key":"5592_CR11","doi-asserted-by":"crossref","first-page":"321","DOI":"10.1007\/s10994-006-5001-7","volume":"66","author":"N Cesa-Bianchi","year":"2007","unstructured":"Cesa-Bianchi, N., Mansour, Y., & Stoltz, G. (2007). Improved second-order bounds for prediction with expert advice. Machine Learning, 66, 321\u2013352.","journal-title":"Machine Learning"},{"key":"5592_CR12","doi-asserted-by":"crossref","first-page":"1878","DOI":"10.1214\/12-AOS1025","volume":"40","author":"D Dai","year":"2012","unstructured":"Dai, D., Rigollet, P., Xia, L., & Zhang, T. (2012). Deviation optimal learning using greedy Q-aggregation. The Annals of Statistics, 40, 1878\u20131905.","journal-title":"The Annals of Statistics"},{"key":"5592_CR13","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1002\/cpa.3160280102","volume":"28","author":"MD Donsker","year":"1975","unstructured":"Donsker, M. D., & Varadhan, S. S. (1975). Asymptotic evaluation of certain markov process expectations for large time, I. Communications on Pure and Applied Mathematics, 28, 1\u201347.","journal-title":"Communications on Pure and Applied Mathematics"},{"key":"5592_CR14","doi-asserted-by":"crossref","first-page":"100","DOI":"10.1214\/aop\/1176996452","volume":"3","author":"DA Freedman","year":"1975","unstructured":"Freedman, D. A. (1975). On tail probabilities for martingales. The Annals of Probability, 3, 100\u2013118.","journal-title":"The Annals of Probability"},{"key":"5592_CR15","unstructured":"Gaillard, P., Stoltz, G., & Van Erven, T. (2014). A second-order bound with excess losses. In COLT. arXiv:1402.2044 ."},{"key":"5592_CR16","first-page":"729","volume":"14","author":"S Gerchinovitz","year":"2013","unstructured":"Gerchinovitz, S. (2013). Sparsity regret bounds for individual sequences in online linear regression. JMLR, 14, 729\u2013769.","journal-title":"JMLR"},{"key":"5592_CR17","doi-asserted-by":"crossref","first-page":"1906","DOI":"10.1109\/18.705569","volume":"44","author":"D Haussler","year":"1998","unstructured":"Haussler, D., Kivinen, J., & Warmuth, M. K. (1998). Sequential prediction of individual sequences under general loss functions. IEEE Transactions on Information Theory, 44, 1906\u20131925.","journal-title":"IEEE Transactions on Information Theory"},{"key":"5592_CR18","doi-asserted-by":"crossref","first-page":"165","DOI":"10.1007\/s10994-010-5175-x","volume":"80","author":"E Hazan","year":"2010","unstructured":"Hazan, E., & Kale, S. (2010). Extracting certainty from uncertainty: Regret bounded by variation in costs. Machine Learning, 80, 165\u2013188.","journal-title":"Machine Learning"},{"key":"5592_CR19","doi-asserted-by":"crossref","first-page":"2183","DOI":"10.1214\/07-AOS546","volume":"36","author":"A Juditsky","year":"2008","unstructured":"Juditsky, A., Rigollet, P., & Tsybakov, A. B. (2008). Learning by mirror averaging. The Annals of Statistics, 36, 2183\u20132206.","journal-title":"The Annals of Statistics"},{"key":"5592_CR20","unstructured":"Kakade, S. M., & Tewari, A. (2008). On the generalization ability of online strongly convex programming algorithms. In NIPS."},{"key":"5592_CR21","unstructured":"Koolen, W., & Van Erven, T. (2015). Second-order quantile methods for experts and combinatorial games. In COLT (pp. 1155\u20131175)."},{"key":"5592_CR22","first-page":"591","volume":"145","author":"G Lecu\u00e9","year":"2009","unstructured":"Lecu\u00e9, G., & Mendelson, S. (2009). Aggregation via empirical risk minimization. PTRF, 145, 591\u2013613.","journal-title":"PTRF"},{"issue":"1","key":"5592_CR23","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1214\/13-AOS1190","volume":"42","author":"G Lecu\u00e9","year":"2014","unstructured":"Lecu\u00e9, G., & Rigollet, P. (2014). Optimal learning with Q-aggregation. The Annals of Statistics, 42(1), 211\u2013224.","journal-title":"The Annals of Statistics"},{"key":"5592_CR24","unstructured":"Luo, H., & Schapire, R. E. (2015). Achieving all with no parameters: Adanormalhedge. In Proceedings of the 28th conference on learning theory (pp.\u00a01286\u20131304)."},{"key":"5592_CR25","unstructured":"Maurer, A., & Pontil, M. (2009). Empirical bernstein bounds and sample variance penalization. In COLT."},{"key":"5592_CR26","first-page":"1","volume":"4","author":"M Mohri","year":"2010","unstructured":"Mohri, M., & Rostamizadeh, A. (2010). Stability bounds and for $$\\phi $$ \u03d5 -mixing and $$\\beta $$ \u03b2 -mixing processes. JMLR, 4, 1\u201326.","journal-title":"JMLR"},{"key":"5592_CR27","unstructured":"Nemirovski, A. (2000). Topics in non-parametric statistics. Lectures on probability theory and statistics (Saint-Flour, 1998), Lecture Notes in Math. (Vol. 1738, pp.\u00a085\u2013277). Berlin: Springer. MR 1775640."},{"issue":"2","key":"5592_CR28","doi-asserted-by":"crossref","first-page":"639","DOI":"10.1214\/11-AOS961","volume":"40","author":"P Rigollet","year":"2012","unstructured":"Rigollet, P. (2012). Kullback-Leibler aggregation and misspecified generalized linear models. The Annals of Statistics, 40(2), 639\u2013665.","journal-title":"The Annals of Statistics"},{"key":"5592_CR29","doi-asserted-by":"crossref","unstructured":"Tsybakov, A. B. (2003). Optimal rates of aggregation. In COLT. Berlin, Heidelberg: Springer.","DOI":"10.1007\/978-3-540-45167-9_23"},{"key":"5592_CR30","doi-asserted-by":"crossref","unstructured":"Vovk, V. G. (1990). Aggregating strategies. In COLT.","DOI":"10.1016\/B978-1-55860-146-8.50032-1"},{"key":"5592_CR31","doi-asserted-by":"crossref","unstructured":"Zhang, T. (2005). Data dependent concentration bounds for sequential prediction algorithms. In Proceedings of the 18th annual conference on learning theory. Berlin, Heidelberg: Springer.","DOI":"10.1007\/11503415_12"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-016-5592-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10994-016-5592-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-016-5592-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,25]],"date-time":"2017-06-25T00:21:49Z","timestamp":1498350109000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10994-016-5592-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,10,4]]},"references-count":31,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2017,1]]}},"alternative-id":["5592"],"URL":"https:\/\/doi.org\/10.1007\/s10994-016-5592-6","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,10,4]]}}}