{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T20:28:58Z","timestamp":1773779338232,"version":"3.50.1"},"publisher-location":"Berlin, Heidelberg","reference-count":13,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783540926948","type":"print"},{"value":"9783540926955","type":"electronic"}],"license":[{"start":{"date-parts":[[2008,1,1]],"date-time":"2008-01-01T00:00:00Z","timestamp":1199145600000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2008]]},"DOI":"10.1007\/978-3-540-92695-5_5","type":"book-chapter","created":{"date-parts":[[2008,12,16]],"date-time":"2008-12-16T10:11:59Z","timestamp":1229422319000},"page":"56-68","source":"Crossref","is-referenced-by-count":14,"title":["Improving the Exploration Strategy in Bandit Algorithms"],"prefix":"10.1007","author":[{"given":"Olivier","family":"Caelen","sequence":"first","affiliation":[]},{"given":"Gianluca","family":"Bontempi","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"issue":"2\/3","key":"5_CR1","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1023\/A:1013689704352","volume":"47","author":"P. Auer","year":"2002","unstructured":"Auer, P., Cesa-Bianchi, N., Fischer, P.: Finite-time analysis of the multiarmed bandit problem. Machine Learning\u00a047(2\/3), 235\u2013256 (2002)","journal-title":"Machine Learning"},{"issue":"1","key":"5_CR2","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/S0167-9236(03)00061-7","volume":"38","author":"R. Azoulay-Schwartz","year":"2004","unstructured":"Azoulay-Schwartz, R., Kraus, S., Wilkenfeld, J.: Exploitation vs. exploration: choosing a supplier in an environment of incomplete information. Decision support systems\u00a038(1), 1\u201318 (2004)","journal-title":"Decision support systems"},{"key":"5_CR3","volume-title":"Dynamic Programming - Deterministic and Stochastic Models","author":"D.P. Bertsekas","year":"1987","unstructured":"Bertsekas, D.P.: Dynamic Programming - Deterministic and Stochastic Models. Prentice-Hall, Englewood Cliffs (1987)"},{"key":"5_CR4","doi-asserted-by":"crossref","first-page":"141","DOI":"10.1080\/10618600.1992.10477010","volume":"1","author":"A. Genz","year":"1992","unstructured":"Genz, A.: Numerical computation of multivariate normal probabilities. Journal of Computational and Graphical Statistics\u00a0(1), 141\u2013149 (1992)","journal-title":"Journal of Computational and Graphical Statistics"},{"key":"5_CR5","volume-title":"Multi-armed Bandit Allocation Indices","author":"J.C. Gittins","year":"1989","unstructured":"Gittins, J.C.: Multi-armed Bandit Allocation Indices. Wiley, Chichester (1989)"},{"key":"5_CR6","first-page":"421","volume":"23","author":"J. Hardwick","year":"1991","unstructured":"Hardwick, J., Stout, Q.: Bandit strategies for ethical sequential allocation. Computing Science and Statistics\u00a023, 421\u2013424 (1991)","journal-title":"Computing Science and Statistics"},{"key":"5_CR7","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"L.P. Kaelbling","year":"1996","unstructured":"Kaelbling, L.P., Littman, M.L., Moore, A.P.: Reinforcement learning: A survey. Journal of Artificial Intelligence Research\u00a04, 237\u2013285 (1996)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"5_CR8","volume-title":"Handbooks in Operations Research and Management Science","author":"S. Kim","year":"2006","unstructured":"Kim, S., Nelson, B.: Selecting the Best System. In: Handbooks in Operations Research and Management Science. Elsevier Science, Amsterdam (2006)"},{"key":"5_CR9","unstructured":"Kim, S.-H., Nelson, B.L.: Selecting the best system: theory and methods. In: WSC 2003: Proceedings of the 35th conference on Winter simulation, pp. 101\u2013112 (2003)"},{"key":"5_CR10","unstructured":"Schneider, J., Moore, A.: Active learning in discrete input spaces. In: Proceedings of the 34th Interface Symposium (2002)"},{"key":"5_CR11","volume-title":"Reinforcement Learning: An Introduction","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"key":"5_CR12","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4613-9655-0","volume-title":"The Multivariate Normal Distribution","author":"Y.L. Tong","year":"1990","unstructured":"Tong, Y.L.: The Multivariate Normal Distribution. Springer, Heidelberg (1990)"},{"key":"5_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"437","DOI":"10.1007\/11564096_42","volume-title":"Machine Learning: ECML 2005","author":"J. Vermorel","year":"2005","unstructured":"Vermorel, J., Mohri, M.: Multi-armed bandit algorithms and empirical evaluation. In: Gama, J., Camacho, R., Brazdil, P.B., Jorge, A.M., Torgo, L. (eds.) ECML 2005. LNCS, vol.\u00a03720, pp. 437\u2013448. Springer, Heidelberg (2005)"}],"container-title":["Lecture Notes in Computer Science","Learning and Intelligent Optimization"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-92695-5_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,6]],"date-time":"2025-02-06T05:56:06Z","timestamp":1738821366000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-92695-5_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2008]]},"ISBN":["9783540926948","9783540926955"],"references-count":13,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-92695-5_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2008]]}}}