{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T08:27:30Z","timestamp":1758270450875},"publisher-location":"Berlin, Heidelberg","reference-count":34,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642409349"},{"type":"electronic","value":"9783642409356"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013]]},"DOI":"10.1007\/978-3-642-40935-6_16","type":"book-chapter","created":{"date-parts":[[2013,9,27]],"date-time":"2013-09-27T05:14:50Z","timestamp":1380258890000},"page":"218-233","source":"Crossref","is-referenced-by-count":17,"title":["Robust Risk-Averse Stochastic Multi-armed Bandits"],"prefix":"10.1007","author":[{"given":"Odalric-Ambrym","family":"Maillard","sequence":"first","affiliation":[]}],"member":"297","reference":[{"issue":"3","key":"16_CR1","doi-asserted-by":"publisher","first-page":"1105","DOI":"10.1007\/s10957-011-9968-2","volume":"155","author":"A. Ahmadi-Javid","year":"2012","unstructured":"Ahmadi-Javid, A.: Entropic value-at-risk: A new coherent risk measure. Journal of Optimization Theory and Applications\u00a0155(3), 1105\u20131123 (2012)","journal-title":"Journal of Optimization Theory and Applications"},{"issue":"1","key":"16_CR2","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1007\/s10479-006-0132-6","volume":"152","author":"P. Artzner","year":"2007","unstructured":"Artzner, P., Delbaen, F., Eber, J.-M., Heath, D., Ku, H.: Coherent multiperiod risk adjusted values and bellman\u2019s principle. Annals of Operations Research\u00a0152(1), 5\u201322 (2007)","journal-title":"Annals of Operations Research"},{"issue":"19","key":"16_CR3","doi-asserted-by":"publisher","first-page":"1876","DOI":"10.1016\/j.tcs.2009.01.016","volume":"410","author":"J.-Y. Audibert","year":"2009","unstructured":"Audibert, J.-Y., Munos, R., Szepesv\u00e1ri, C.: Exploration-exploitation trade-off using variance estimates in multi-armed bandits. Theoretical Computer Science\u00a0410(19), 1876\u20131902 (2009)","journal-title":"Theoretical Computer Science"},{"issue":"2-3","key":"16_CR4","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1023\/A:1013689704352","volume":"47","author":"P. Auer","year":"2002","unstructured":"Auer, P., Cesa-Bianchi, N., Fischer, P.: Finite-time analysis of the multiarmed bandit problem. Machine Learning\u00a047(2-3), 235\u2013256 (2002)","journal-title":"Machine Learning"},{"key":"16_CR5","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1137\/S0097539701398375","volume":"32","author":"P. Auer","year":"2003","unstructured":"Auer, P., Cesa-Bianchi, N., Freund, Y., Schapire, R.E.: The nonstochastic multiarmed bandit problem. SIAM Journal on Computing\u00a032, 48\u201377 (2003)","journal-title":"SIAM Journal on Computing"},{"issue":"2","key":"16_CR6","doi-asserted-by":"publisher","first-page":"325","DOI":"10.1137\/0329017","volume":"29","author":"J. Borwein","year":"1991","unstructured":"Borwein, J., Lewis, A.: Duality relationships for entropy-like minimization problem. SIAM Journal on Computation and Optimization\u00a029(2), 325\u2013338 (1991)","journal-title":"SIAM Journal on Computation and Optimization"},{"issue":"2","key":"16_CR7","doi-asserted-by":"publisher","first-page":"122","DOI":"10.1006\/aama.1996.0007","volume":"17","author":"A. Burnetas","year":"1996","unstructured":"Burnetas, A., Katehakis, M.: Optimal adaptive policies for sequential allocation problems. Advances in Applied Mathematics\u00a017(2), 122\u2013142 (1996)","journal-title":"Advances in Applied Mathematics"},{"key":"16_CR8","doi-asserted-by":"crossref","unstructured":"Capp\u00e9, O., Garivier, A., Maillard, O.-A., Munos, R., Stoltz, G.: Kullback-leibler upper confidence bounds for optimal sequential allocation. The Annals of Statistics (2013)","DOI":"10.1214\/13-AOS1119"},{"key":"16_CR9","doi-asserted-by":"crossref","unstructured":"Cover, T., Thomas, J.: Elements of Information Theory. John Wiley (1991)","DOI":"10.1002\/0471200611"},{"key":"16_CR10","unstructured":"Defourny, B., Ernst, D., Wehenkel, L.: Risk-aware decision making and dynamic programming. In: NIPS Workshop on Model Uncertainty and Risk in RL (2008)"},{"key":"16_CR11","doi-asserted-by":"crossref","unstructured":"Dembo, A., Zeitouni, O.: Large Deviations Techniques and Applications, 2nd edn. Springer (1998)","DOI":"10.1007\/978-1-4612-5320-4"},{"key":"16_CR12","doi-asserted-by":"publisher","first-page":"228","DOI":"10.1007\/BF01582110","volume":"16","author":"E. Denardo","year":"1979","unstructured":"Denardo, E., Rothblum, U.: Optimal stopping, exponential utility and linear programming. Mathematical Programming\u00a016, 228\u2013244 (1979)","journal-title":"Mathematical Programming"},{"key":"16_CR13","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"199","DOI":"10.1007\/11894841_18","volume-title":"Algorithmic Learning Theory","author":"E. Even-Dar","year":"2006","unstructured":"Even-Dar, E., Kearns, M., Wortman, J.: Risk-sensitive online learning. In: Balc\u00e1zar, J.L., Long, P.M., Stephan, F. (eds.) ALT 2006. LNCS (LNAI), vol.\u00a04264, pp. 199\u2013213. Springer, Heidelberg (2006)"},{"key":"16_CR14","unstructured":"Garivier, A., Capp\u00e9, O.: The KL-UCB algorithm for bounded stochastic bandits and beyond. In: Proceedings of the 24th Annual Conference on Learning Theory (2011)"},{"key":"16_CR15","unstructured":"Harari-Kermadec, H.: Vraisemblance empirique g\u00e9n\u00e9ralis\u00e9e et estimation semi-param\u00e9trique. PhD thesis, Universit\u00e9 Paris\u2013Ouest (December 2006)"},{"key":"16_CR16","unstructured":"Honda, J., Takemura, A.: An asymptotically optimal bandit algorithm for bounded support models. In: Proceedings of the 23rd Annual Conference on Learning Theory, Haifa, Israel (2010)"},{"key":"16_CR17","doi-asserted-by":"publisher","first-page":"361","DOI":"10.1007\/s10994-011-5257-4","volume":"85","author":"J. Honda","year":"2011","unstructured":"Honda, J., Takemura, A.: An asymptotically optimal policy for finite support models in the multiarmed bandit problem. Machine Learning\u00a085, 361\u2013391 (2011)","journal-title":"Machine Learning"},{"key":"16_CR18","unstructured":"Honda, J., Takemura, A.: Finite-time regret bound of a bandit algorithm for the semi-bounded support model. arXiv:1202.2277 (2012)"},{"key":"16_CR19","doi-asserted-by":"publisher","first-page":"356","DOI":"10.1287\/mnsc.18.7.356","volume":"18","author":"R.A. Howard","year":"1972","unstructured":"Howard, R.A., Matheson, J.E.: Risk-sensitive markov decision processes. Management Science\u00a018, 356\u2013369 (1972)","journal-title":"Management Science"},{"key":"16_CR20","series-title":"LNAI","doi-asserted-by":"publisher","first-page":"199","DOI":"10.1007\/978-3-642-34106-9_18","volume-title":"Algorithmic Learning Theory","author":"E. Kaufmann","year":"2012","unstructured":"Kaufmann, E., Korda, N., Munos, R.: Thompson sampling: An asymptotically optimal finite-time analysis. In: Bshouty, N.H., Stoltz, G., Vayatis, N., Zeugmann, T. (eds.) ALT 2012. LNCS (LNAI), vol.\u00a07568, pp. 199\u2013213. Springer, Heidelberg (2012)"},{"key":"16_CR21","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1016\/0196-8858(85)90002-8","volume":"6","author":"T.L. Lai","year":"1985","unstructured":"Lai, T.L., Robbins, H.: Asymptotically efficient adaptive allocation rules. Advances in Applied Mathematics\u00a06, 4\u201322 (1985)","journal-title":"Advances in Applied Mathematics"},{"key":"16_CR22","unstructured":"Liu, Y., Koenig, S.: An exact algorithm for solving mdps under risk-sensitive planning objectives with one-switch utility functions. In: Proceedings of the 7th International Joint Conference on Autonomous Agents and Multiagent Systems, AAMAS 2008, Richland, SC, vol.\u00a01, pp. 453\u2013460. International Foundation for Autonomous Agents and Multiagent Systems (2008)"},{"key":"16_CR23","unstructured":"Maillard, O.-A.: Robust risk-averse stochastic multi-armed bandits. Technical Report HAL-INRIA open archive (2013), \n                      \n                        http:\/\/hal.inria.fr\/hal-00821670"},{"key":"16_CR24","unstructured":"Maillard, O.-A., Munos, R., Stoltz, G.: A finite-time analysis of multi-armed bandits problems with Kullback-Leibler divergences. In: Proceedings of the 23rd Annual Conference on Learning Theory, Budapest, Hungary (2011)"},{"issue":"1","key":"16_CR25","first-page":"77","volume":"7","author":"H. Markowitz","year":"1952","unstructured":"Markowitz, H.: Portfolio selection. The Journal of Finance\u00a07(1), 77\u201391 (1952)","journal-title":"The Journal of Finance"},{"issue":"9","key":"16_CR26","doi-asserted-by":"publisher","first-page":"1379","DOI":"10.1016\/S0005-1098(01)00084-X","volume":"37","author":"S.D. Patek","year":"2001","unstructured":"Patek, S.D.: On terminating markov decision processes with a risk-averse objective function. Automatica\u00a037(9), 1379\u20131386 (2001)","journal-title":"Automatica"},{"key":"16_CR27","doi-asserted-by":"publisher","first-page":"527","DOI":"10.1090\/S0002-9904-1952-09620-8","volume":"58","author":"H. Robbins","year":"1952","unstructured":"Robbins, H.: Some aspects of the sequential design of experiments. Bulletin of the American Mathematics Society\u00a058, 527\u2013535 (1952)","journal-title":"Bulletin of the American Mathematics Society"},{"key":"16_CR28","doi-asserted-by":"crossref","unstructured":"Rockafellar, R.T.: Coherent approaches to risk in optimization under uncertainty. Tutorials in Operation Research, 38\u201361 (2007)","DOI":"10.1287\/educ.1073.0032"},{"key":"16_CR29","unstructured":"Salomon, A., Audibert, J.-Y.: Robustness of Anytime Bandit Policies (2011), \n                      \n                        http:\/\/hal.archives-ouvertes.fr\/hal-00579607"},{"key":"16_CR30","unstructured":"Sani, A., Lazaric, A., Munos, R.: Risk-aversion in multi-armed bandits. In: Proceedings of Advancezs in Neural Information Processing System (2012)"},{"key":"16_CR31","doi-asserted-by":"crossref","first-page":"285","DOI":"10.1093\/biomet\/25.3-4.285","volume":"25","author":"W. Thompson","year":"1933","unstructured":"Thompson, W.: On the likelihood that one unknown probability exceeds another in view of the evidence of two samples. Biometrika\u00a025, 285\u2013294 (1933)","journal-title":"Biometrika"},{"key":"16_CR32","doi-asserted-by":"publisher","first-page":"450","DOI":"10.2307\/2371219","volume":"57","author":"W. Thompson","year":"1935","unstructured":"Thompson, W.: On the theory of apportionment. American Journal of Mathematics\u00a057, 450\u2013456 (1935)","journal-title":"American Journal of Mathematics"},{"key":"16_CR33","unstructured":"von Neumann, J., Morgenstern, O.: Theory of Games and Economic Behavior, Princeton Classic Editions. Princeton University Press (1947)"},{"key":"16_CR34","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"514","DOI":"10.1007\/11776420_38","volume-title":"Learning Theory","author":"M.K. Warmuth","year":"2006","unstructured":"Warmuth, M.K., Kuzmin, D.: Online variance minimization. In: Lugosi, G., Simon, H.U. (eds.) COLT 2006. LNCS (LNAI), vol.\u00a04005, pp. 514\u2013528. Springer, Heidelberg (2006)"}],"container-title":["Lecture Notes in Computer Science","Algorithmic Learning Theory"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-40935-6_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,17]],"date-time":"2019-05-17T15:05:47Z","timestamp":1558105547000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-40935-6_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013]]},"ISBN":["9783642409349","9783642409356"],"references-count":34,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-40935-6_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2013]]}}}