{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T07:05:40Z","timestamp":1758265540653,"version":"3.40.3"},"publisher-location":"Berlin, Heidelberg","reference-count":24,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783662448472"},{"type":"electronic","value":"9783662448489"}],"license":[{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-662-44848-9_8","type":"book-chapter","created":{"date-parts":[[2014,9,1]],"date-time":"2014-09-01T01:42:21Z","timestamp":1409535741000},"page":"115-131","source":"Crossref","is-referenced-by-count":7,"title":["Sub-sampling for Multi-armed Bandits"],"prefix":"10.1007","author":[{"given":"Akram","family":"Baransi","sequence":"first","affiliation":[]},{"given":"Odalric-Ambrym","family":"Maillard","sequence":"additional","affiliation":[]},{"given":"Shie","family":"Mannor","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"issue":"4","key":"8_CR1","doi-asserted-by":"publisher","first-page":"1054","DOI":"10.2307\/1427934","volume":"27","author":"R. Agrawal","year":"1995","unstructured":"Agrawal, R.: Sample mean based index policies with O(log n) regret for the multi-armed bandit problem. Advances in Applied Probability\u00a027(4), 1054\u20131078 (1995)","journal-title":"Advances in Applied Probability"},{"key":"8_CR2","unstructured":"Agrawal, S., Goyal, N.: Further optimal regret bounds for thompson sampling. In: International Conference on Artificial Intelligence and Statistics, Scottsdale, AZ, US. JMLR W&CP, vol.\u00a031 (2013)"},{"key":"8_CR3","unstructured":"Audibert, J.-Y., Bubeck, S.: Minimax policies for adversarial and stochastic bandits"},{"key":"8_CR4","doi-asserted-by":"publisher","first-page":"1876","DOI":"10.1016\/j.tcs.2009.01.016","volume":"410","author":"J.-Y. Audibert","year":"2009","unstructured":"Audibert, J.-Y., Munos, R., Szepesv\u00e1ri, C.: Exploration-exploitation trade-off using variance estimates in multi-armed bandits. Theoretical Computer Science\u00a0410, 1876\u20131902 (2009)","journal-title":"Theoretical Computer Science"},{"key":"8_CR5","first-page":"397","volume":"3","author":"P. Auer","year":"2003","unstructured":"Auer, P.: Using confidence bounds for exploitation-exploration trade-offs. Journal of Machine Learning Research\u00a03, 397\u2013422 (2003)","journal-title":"Journal of Machine Learning Research"},{"issue":"1-2","key":"8_CR6","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1007\/s10998-010-3055-6","volume":"61","author":"P. Auer","year":"2010","unstructured":"Auer, P., Ortner, R.: UCB revisited: Improved regret bounds for the stochastic multi-armed bandit problem. Periodica Mathematica Hungarica\u00a061(1-2), 55\u201365 (2010)","journal-title":"Periodica Mathematica Hungarica"},{"key":"8_CR7","first-page":"967","volume":"18","author":"P.J. Bickel","year":"2008","unstructured":"Bickel, P.J., Sakov, A.: On the choice of m in the m out of n bootstrap and confidence bounds for extrema. Statistica Sinica\u00a018, 967\u2013985 (2008)","journal-title":"Statistica Sinica"},{"issue":"2","key":"8_CR8","doi-asserted-by":"publisher","first-page":"122","DOI":"10.1006\/aama.1996.0007","volume":"17","author":"A.N. Burnetas","year":"1996","unstructured":"Burnetas, A.N., Katehakis, M.N.: Optimal adaptive policies for sequential allocation problems. Adv. Appl. Math.\u00a017(2), 122\u2013142 (1996)","journal-title":"Adv. Appl. Math."},{"issue":"3","key":"8_CR9","doi-asserted-by":"publisher","first-page":"1516","DOI":"10.1214\/13-AOS1119","volume":"41","author":"O. Capp\u00e9","year":"2013","unstructured":"Capp\u00e9, O., Garivier, A., Maillard, O.-A., Munos, R., Stoltz, G.: Kullback\u2013leibler upper confidence bounds for optimal sequential allocation. Ann. Statist.\u00a041(3), 1516\u20131541 (2013)","journal-title":"Ann. Statist."},{"issue":"4","key":"8_CR10","doi-asserted-by":"publisher","first-page":"829","DOI":"10.2307\/1427104","volume":"19","author":"F. Chang","year":"1987","unstructured":"Chang, F., Lai, T.L.: Optimal stopping and dynamic allocation. Advances in Applied Probability\u00a019(4), 829\u2013853 (1987)","journal-title":"Advances in Applied Probability"},{"key":"8_CR11","unstructured":"Garivier, A., Capp\u00e9, O.: The KL-UCB algorithm for bounded stochastic bandits and beyond. In: Proceedings of the 24th annual Conference on Learning Theory, COLT 2011 (2011)"},{"issue":"2","key":"8_CR12","doi-asserted-by":"crossref","first-page":"148","DOI":"10.1111\/j.2517-6161.1979.tb01068.x","volume":"41","author":"J.C. Gittins","year":"1979","unstructured":"Gittins, J.C.: Bandit processes and dynamic allocation indices. Journal of the Royal Statistical Society. Series B (Methodological)\u00a041(2), 148\u2013177 (1979)","journal-title":"Journal of the Royal Statistical Society. Series B (Methodological)"},{"issue":"3","key":"8_CR13","doi-asserted-by":"publisher","first-page":"561","DOI":"10.1093\/biomet\/66.3.561","volume":"66","author":"J.C. Gittins","year":"1979","unstructured":"Gittins, J.C., Jones, D.M.: A dynamic allocation index for the discounted multiarmed bandit problem. Biometrika\u00a066(3), 561\u2013565 (1979)","journal-title":"Biometrika"},{"key":"8_CR14","unstructured":"Gittins, J.C., Weber, R., Glazebrook, K.: Multi-armed Bandit Allocation Indices. Wiley (1989)"},{"key":"8_CR15","unstructured":"Honda, J., Takemura, A.: An asymptotically optimal bandit algorithm for bounded support models, pp. 67\u201379"},{"key":"8_CR16","series-title":"LNAI","doi-asserted-by":"publisher","first-page":"199","DOI":"10.1007\/978-3-642-34106-9_18","volume-title":"Algorithmic Learning Theory","author":"E. Kaufmann","year":"2012","unstructured":"Kaufmann, E., Korda, N., Munos, R.: Thompson sampling: an asymptotically optimal finite-time analysis. In: Bshouty, N.H., Stoltz, G., Vayatis, N., Zeugmann, T. (eds.) ALT 2012. LNCS (LNAI), vol.\u00a07568, pp. 199\u2013213. Springer, Heidelberg (2012)"},{"key":"8_CR17","unstructured":"Korda, N., Kaufmann, E., Munos, R.: Thompson sampling for 1-dimensional exponential family bandits. In: Burges, C.J.C., Bottou, L., Ghahramani, Z., Weinberger, K.Q. (eds.) NIPS, Lake Tahoe, Nevada, United States, vol.\u00a026, pp. 1448\u20131456 (2013)"},{"issue":"1","key":"8_CR18","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1016\/0196-8858(85)90002-8","volume":"6","author":"T.L. Lai","year":"1985","unstructured":"Lai, T.L., Robbins, H.: Asymptotically efficient adaptive allocation rules. Advances in Applied Mathematics\u00a06(1), 4\u201322 (1985)","journal-title":"Advances in Applied Mathematics"},{"key":"8_CR19","unstructured":"Maillard, O.-A., Munos, R., Stoltz, G.: Finite-time analysis of multi-armed bandits problems with kullback-leibler divergences. In: Proceedings of the 24th Annual Conference on Learning Theory, COLT 2011 (2011)"},{"key":"8_CR20","doi-asserted-by":"publisher","first-page":"527","DOI":"10.1090\/S0002-9904-1952-09620-8","volume":"58","author":"H. Robbins","year":"1952","unstructured":"Robbins, H.: Some aspects of the sequential design of experiments. Bulletin of the American Mathematics Society\u00a058, 527\u2013535 (1952)","journal-title":"Bulletin of the American Mathematics Society"},{"issue":"6","key":"8_CR21","doi-asserted-by":"publisher","first-page":"2798","DOI":"10.1214\/12-AOS1051","volume":"40","author":"J.P. Romano","year":"2012","unstructured":"Romano, J.P., Shaikh, A.M.: On the uniform asymptotic validity of subsampling and the bootstrap. The Annals of Statistics\u00a040(6), 2798\u20132822 (2012)","journal-title":"The Annals of Statistics"},{"issue":"1","key":"8_CR22","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1214\/aos\/1176342611","volume":"2","author":"R.J. Serfling","year":"1974","unstructured":"Serfling, R.J.: Probability inequalities for the sum in sampling without replacement. The Annals of Statistics\u00a02(1), 39\u201348 (1974)","journal-title":"The Annals of Statistics"},{"key":"8_CR23","doi-asserted-by":"publisher","first-page":"285","DOI":"10.1093\/biomet\/25.3-4.285","volume":"25","author":"W.R. Thompson","year":"1933","unstructured":"Thompson, W.R.: On the likelihood that one unknown probability exceeds another in view of the evidence of two samples. Biometrika\u00a025, 285\u2013294 (1933)","journal-title":"Biometrika"},{"key":"8_CR24","doi-asserted-by":"publisher","first-page":"450","DOI":"10.2307\/2371219","volume":"57","author":"W.R. Thompson","year":"1935","unstructured":"Thompson, W.R.: On the theory of apportionment. American Journal of Mathematics\u00a057, 450\u2013456 (1935)","journal-title":"American Journal of Mathematics"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-662-44848-9_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,14]],"date-time":"2019-09-14T20:02:55Z","timestamp":1568491375000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-662-44848-9_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783662448472","9783662448489"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-3-662-44848-9_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2014]]}}}