{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T08:36:14Z","timestamp":1774946174898,"version":"3.50.1"},"reference-count":23,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2011,7,2]],"date-time":"2011-07-02T00:00:00Z","timestamp":1309564800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2011,12]]},"DOI":"10.1007\/s10994-011-5257-4","type":"journal-article","created":{"date-parts":[[2011,7,1]],"date-time":"2011-07-01T18:23:44Z","timestamp":1309544624000},"page":"361-391","source":"Crossref","is-referenced-by-count":15,"title":["An asymptotically optimal policy for finite support models in the multiarmed bandit problem"],"prefix":"10.1007","volume":"85","author":[{"given":"Junya","family":"Honda","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Akimichi","family":"Takemura","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2011,7,2]]},"reference":[{"key":"5257_CR1","doi-asserted-by":"crossref","first-page":"1926","DOI":"10.1137\/S0363012992237273","volume":"33","author":"R. Agrawal","year":"1995","unstructured":"Agrawal, R. (1995a). The continuum-armed bandit problem. SIAM Journal on Control and Optimization, 33, 1926\u20131951.","journal-title":"SIAM Journal on Control and Optimization"},{"key":"5257_CR2","doi-asserted-by":"crossref","first-page":"1054","DOI":"10.2307\/1427934","volume":"27","author":"R. Agrawal","year":"1995","unstructured":"Agrawal, R. (1995b). Sample mean based index policies with o(log n) regret for the multi-armed bandit problem. Advances in Applied Probability, 27, 1054\u20131078.","journal-title":"Advances in Applied Probability"},{"key":"5257_CR3","volume-title":"Proceedings of COLT 2009","author":"J.-Y. Audibert","year":"2009","unstructured":"Audibert, J.-Y., & Bubeck, S. (2009). Minimax policies for adversarial and stochastic bandits. In Proceedings of COLT 2009. Montreal: Omnipress."},{"key":"5257_CR4","doi-asserted-by":"crossref","first-page":"235","DOI":"10.1023\/A:1013689704352","volume":"47","author":"P. Auer","year":"2002","unstructured":"Auer, P., Cesa-Bianchi, N., & Fischer, P. (2002a). Finite-time analysis of the multiarmed bandit problem. Machine Learning, 47, 235\u2013256.","journal-title":"Machine Learning"},{"key":"5257_CR5","doi-asserted-by":"crossref","first-page":"48","DOI":"10.1137\/S0097539701398375","volume":"32","author":"P. Auer","year":"2002","unstructured":"Auer, P., Cesa-Bianchi, N., Freund, Y., & Schapire, R. E. (2002b). The nonstochastic multiarmed bandit problem. SIAM Journal on Computing, 32, 48\u201377.","journal-title":"SIAM Journal on Computing"},{"key":"5257_CR6","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511804441","volume-title":"Convex optimization","author":"S. Boyd","year":"2004","unstructured":"Boyd, S., & Vandenberghe, L. (2004). Convex optimization. Cambridge: Cambridge University Press."},{"key":"5257_CR7","doi-asserted-by":"crossref","first-page":"122","DOI":"10.1006\/aama.1996.0007","volume":"17","author":"A. N. Burnetas","year":"1996","unstructured":"Burnetas, A. N., & Katehakis, M. N. (1996). Optimal adaptive policies for sequential allocation problems. Advances in Applied Mathematics, 17, 122\u2013142.","journal-title":"Advances in Applied Mathematics"},{"key":"5257_CR8","volume-title":"Elements of information theory","author":"T. M. Cover","year":"2006","unstructured":"Cover, T. M., & Thomas, J. A. (2006). Elements of information theory (2nd edn.). New York: Wiley-Interscience.","edition":"2"},{"key":"5257_CR9","first-page":"255","volume-title":"Proceedings of COLT 2002","author":"E. Even-Dar","year":"2002","unstructured":"Even-Dar, E., Mannor, S., & Mansour, Y. (2002). Pac bounds for multi-armed bandit and Markov decision processes. In Proceedings of COLT 2002 (pp. 255\u2013270). London: Springer."},{"key":"5257_CR10","volume-title":"Introduction to sensitivity and stability analysis in nonlinear programming","author":"A. V. Fiacco","year":"1983","unstructured":"Fiacco, A. V. (1983). Introduction to sensitivity and stability analysis in nonlinear programming. New York: Academic Press."},{"key":"5257_CR11","volume-title":"Multi-armed bandit allocation indices. Wiley-Interscience Series in Systems and Optimization","author":"J. C. Gittins","year":"1989","unstructured":"Gittins, J. C. (1989). Multi-armed bandit allocation indices. Wiley-Interscience Series in Systems and Optimization. Chichester: Wiley."},{"key":"5257_CR12","first-page":"67","volume-title":"Proceedings of COLT 2010","author":"J. Honda","year":"2010","unstructured":"Honda, J., & Takemura, A. (2010). An asymptotically optimal bandit algorithm for bounded support models. In Proceedings of COLT 2010, Haifa, Israel (pp. 67\u201379)."},{"key":"5257_CR13","doi-asserted-by":"crossref","first-page":"113","DOI":"10.1007\/BF02191765","volume":"83","author":"T. Ishikida","year":"1994","unstructured":"Ishikida, T., & Varaiya, P. (1994). Multi-armed bandit problem revisited. Journal of Optimization Theory and Applications, 83, 113\u2013154.","journal-title":"Journal of Optimization Theory and Applications"},{"key":"5257_CR14","doi-asserted-by":"crossref","first-page":"262","DOI":"10.1287\/moor.12.2.262","volume":"12","author":"M. N. Katehakis","year":"1987","unstructured":"Katehakis, M. N., & Veinott, A. F. Jr. (1987). The multi-armed bandit problem: decomposition and computation. Mathematics of Operations Research, 12, 262\u2013268.","journal-title":"Mathematics of Operations Research"},{"key":"5257_CR15","first-page":"697","volume-title":"Proceedings of NIPS 2005","author":"R. Kleinberg","year":"2005","unstructured":"Kleinberg, R. (2005). Nearly tight bounds for the continuum-armed bandit problem. In Proceedings of NIPS 2005 (pp. 697\u2013704). New York: MIT Press."},{"key":"5257_CR16","doi-asserted-by":"crossref","first-page":"4","DOI":"10.1016\/0196-8858(85)90002-8","volume":"6","author":"T. L. Lai","year":"1985","unstructured":"Lai, T. L., & Robbins, H. (1985). Asymptotically efficient adaptive allocation rules. Advances in Applied Mathematics, 6, 4\u201322.","journal-title":"Advances in Applied Mathematics"},{"key":"5257_CR17","doi-asserted-by":"crossref","first-page":"117","DOI":"10.1023\/A:1007541107674","volume":"35","author":"N. Meuleau","year":"1999","unstructured":"Meuleau, N., & Bourgine, P. (1999). Exploration of multi-state environments: Local measures and back-propagation of uncertainty. Machine Learning, 35, 117\u2013154.","journal-title":"Machine Learning"},{"key":"5257_CR18","series-title":"Springer Series in Statistics","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4612-5254-2","volume-title":"Convergence of stochastic processes","author":"D. Pollard","year":"1984","unstructured":"Pollard, D. (1984). Convergence of stochastic processes. Springer Series in Statistics. New York: Springer."},{"key":"5257_CR19","doi-asserted-by":"crossref","first-page":"527","DOI":"10.1090\/S0002-9904-1952-09620-8","volume":"58","author":"H. Robbins","year":"1952","unstructured":"Robbins, H. (1952). Some aspects of the sequential design of experiments. Bulletin of the American Mathematical Society, 58, 527\u2013535.","journal-title":"Bulletin of the American Mathematical Society"},{"key":"5257_CR20","first-page":"943","volume-title":"Proceedings of ICML 2000","author":"M. Strens","year":"2000","unstructured":"Strens, M. (2000). A Bayesian framework for reinforcement learning. In Proceedings of ICML 2000 (pp. 943\u2013950). San Francisco: Kaufmann."},{"key":"5257_CR21","first-page":"437","volume-title":"Proceedings of ECML 2005","author":"J. Vermorel","year":"2005","unstructured":"Vermorel, J., & Mohri, M. (2005). Multi-armed bandit algorithms and empirical evaluation. In Proceedings of ECML 2005, Porto, Portugal (pp. 437\u2013448). Berlin: Springer."},{"key":"5257_CR22","unstructured":"Wyatt, J. (1997). Exploration and inference in learning from reinforcement. Doctoral dissertation, Department of Artificial Intelligence, University of Edinburgh."},{"key":"5257_CR23","doi-asserted-by":"crossref","first-page":"297","DOI":"10.1007\/BF02055587","volume":"28","author":"S. Yakowitz","year":"1991","unstructured":"Yakowitz, S., & Lowe, W. (1991). Nonparametric bandit methods. Annals of Operation Research, 28, 297\u2013312.","journal-title":"Annals of Operation Research"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-011-5257-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10994-011-5257-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-011-5257-4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,1]],"date-time":"2019-06-01T01:40:31Z","timestamp":1559353231000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10994-011-5257-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011,7,2]]},"references-count":23,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2011,12]]}},"alternative-id":["5257"],"URL":"https:\/\/doi.org\/10.1007\/s10994-011-5257-4","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2011,7,2]]}}}