{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T18:06:42Z","timestamp":1725732402729},"publisher-location":"Berlin, Heidelberg","reference-count":16,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642387081"},{"type":"electronic","value":"9783642387098"}],"license":[{"start":{"date-parts":[[2013,1,1]],"date-time":"2013-01-01T00:00:00Z","timestamp":1356998400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013]]},"DOI":"10.1007\/978-3-642-40988-2_16","type":"book-chapter","created":{"date-parts":[[2013,8,28]],"date-time":"2013-08-28T10:56:40Z","timestamp":1377687400000},"page":"241-256","source":"Crossref","is-referenced-by-count":1,"title":["Greedy Confidence Pursuit: A Pragmatic Approach to Multi-bandit Optimization"],"prefix":"10.1007","author":[{"given":"Philip","family":"Bachman","sequence":"first","affiliation":[]},{"given":"Doina","family":"Precup","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"16_CR1","unstructured":"Agrawal, S., Goyal, N.: Analysis of thompson sampling for the multi-armed bandit problem. In: COLT (2012)"},{"key":"16_CR2","unstructured":"Audibert, J.-Y., Bubeck, S., Munos, R.: Best arm identification in multi-armed bandits. In: COLT (2010)"},{"key":"16_CR3","doi-asserted-by":"crossref","unstructured":"Berry, D.A., Fristedt, B.: Bandit Problems. Chapman and Hall Ltd. (1985)","DOI":"10.1007\/978-94-015-3711-7"},{"key":"16_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1007\/978-3-642-04414-4_7","volume-title":"Algorithmic Learning Theory","author":"S. Bubeck","year":"2009","unstructured":"Bubeck, S., Munos, R., Stoltz, G.: Pure exploration in multi-armed bandits problems. In: Gavald\u00e0, R., Lugosi, G., Zeugmann, T., Zilles, S. (eds.) ALT 2009. LNCS, vol.\u00a05809, pp. 23\u201337. Springer, Heidelberg (2009)"},{"key":"16_CR5","unstructured":"Chappelle, O., Li, L.: An empirical evaluation of thompson sampling. In: Advances in Neural Information Processing Systems (2011)"},{"key":"16_CR6","doi-asserted-by":"crossref","unstructured":"Deng, K., Pineau, J., Murphy, S.: Active learning for personalizing treatment. In: IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (2011)","DOI":"10.1109\/ADPRL.2011.5967348"},{"key":"16_CR7","first-page":"1079","volume":"7","author":"E. Even-Dar","year":"2006","unstructured":"Even-Dar, E., Mannor, S., Mansour, Y.: Action elimination and stopping conditions for the multi-armed bandit and reinforcement learning problems. Journal of Machine Learning Research\u00a07, 1079\u20131105 (2006)","journal-title":"Journal of Machine Learning Research"},{"key":"16_CR8","unstructured":"Gabillon, V., Ghavamzadeh, M., Lazaric, A., Bubeck, S.: Multi-bandit best arm identification. In: Advances in Neural Information Processing Systems (2011)"},{"key":"16_CR9","unstructured":"Kalyanakrishnan, S., Stone, P.: Efficient selection of multiple bandit arms: Theory and practice. In: International Conference on Machine Learning (2010)"},{"key":"16_CR10","unstructured":"Kalyanakrishnan, S., Tewari, A., Auer, P., Stone, P.: Pac subset selection in stochastic multi-armed bandits. In: International Conference on Machine Learning (2012)"},{"key":"16_CR11","unstructured":"Li, L., Chappelle, O.: Open problem: Regret bounds for thompson sampling. In: COLT (2012)"},{"key":"16_CR12","doi-asserted-by":"crossref","unstructured":"Madani, O., Lizotte, D.J., Greiner, R.: The budgeted multi-armed bandit problem. In: COLT (2004)","DOI":"10.1007\/978-3-540-27819-1_46"},{"key":"16_CR13","first-page":"623","volume":"5","author":"S. Mannor","year":"2004","unstructured":"Mannor, S., Tsitsiklis, J.N.: The sample complexity of exploration in the multi-armed bandit problem. Journal of Machine Learning Research\u00a05, 623\u2013648 (2004)","journal-title":"Journal of Machine Learning Research"},{"key":"16_CR14","unstructured":"Russo, D., Van Roy, B.: Learning to optimize via posterior sampling. arXiv:1301.2609v1 [cs.LG] (2013)"},{"key":"16_CR15","doi-asserted-by":"publisher","first-page":"639","DOI":"10.1002\/asmb.874","volume":"26","author":"S.L. Scott","year":"2010","unstructured":"Scott, S.L.: A modern bayesian look at the multi-armed bandit. Applied Stochastic Models in Business and Industry\u00a026, 639\u2013658 (2010)","journal-title":"Applied Stochastic Models in Business and Industry"},{"issue":"3-4","key":"16_CR16","doi-asserted-by":"publisher","first-page":"285","DOI":"10.1093\/biomet\/25.3-4.285","volume":"25","author":"W.R. Thompson","year":"1933","unstructured":"Thompson, W.R.: On the likelihood that one unknown probability exceeds another in view of the evidence of two samples. Biometrika\u00a025(3-4), 285\u2013294 (1933)","journal-title":"Biometrika"}],"container-title":["Lecture Notes in Computer Science","Advanced Information Systems Engineering"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-40988-2_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,16]],"date-time":"2019-05-16T23:46:34Z","timestamp":1558050394000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-40988-2_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013]]},"ISBN":["9783642387081","9783642387098"],"references-count":16,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-40988-2_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2013]]}}}