{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T14:39:06Z","timestamp":1773931146640,"version":"3.50.1"},"publisher-location":"Berlin, Heidelberg","reference-count":16,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642341052","type":"print"},{"value":"9783642341069","type":"electronic"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-34106-9_19","type":"book-chapter","created":{"date-parts":[[2012,10,1]],"date-time":"2012-10-01T05:56:27Z","timestamp":1349070987000},"page":"214-228","source":"Crossref","is-referenced-by-count":24,"title":["Regret Bounds for Restless Markov Bandits"],"prefix":"10.1007","author":[{"given":"Ronald","family":"Ortner","sequence":"first","affiliation":[]},{"given":"Daniil","family":"Ryabko","sequence":"additional","affiliation":[]},{"given":"Peter","family":"Auer","sequence":"additional","affiliation":[]},{"given":"R\u00e9mi","family":"Munos","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"19_CR1","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1016\/0196-8858(85)90002-8","volume":"6","author":"T.L. Lai","year":"1985","unstructured":"Lai, T.L., Robbins, H.: Asymptotically efficient adaptive allocation rules. Adv. in Appl. Math.\u00a06, 4\u201322 (1985)","journal-title":"Adv. in Appl. Math."},{"issue":"4","key":"19_CR2","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1109\/MCOM.2008.4481339","volume":"46","author":"I.F. Akyildiz","year":"2008","unstructured":"Akyildiz, I.F., Lee, W.Y., Vuran, M.C., Mohanty, S.: A survey on spectrum management in cognitive radio networks. IEEE Commun. Mag.\u00a046(4), 40\u201348 (2008)","journal-title":"IEEE Commun. Mag."},{"issue":"11","key":"19_CR3","doi-asserted-by":"publisher","first-page":"977","DOI":"10.1109\/TAC.1987.1104485","volume":"32","author":"V. Anantharam","year":"1987","unstructured":"Anantharam, V., Varaiya, P., Walrand, J.: Asymptotically efficient allocation rules for the multiarmed bandit problem with multiple plays, part II: Markovian rewards. IEEE Trans. Automat. Control\u00a032(11), 977\u2013982 (1987)","journal-title":"IEEE Trans. Automat. Control"},{"key":"19_CR4","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1137\/S0097539701398375","volume":"32","author":"P. Auer","year":"2002","unstructured":"Auer, P., Cesa-Bianchi, N., Freund, Y., Schapire, R.E.: The nonstochastic multiarmed bandit problem. SIAM J. Comput.\u00a032, 48\u201377 (2002)","journal-title":"SIAM J. Comput."},{"key":"19_CR5","unstructured":"Audibert, J.-Y., Bubeck, S.: Minimax policies for adversarial and stochastic bandits. In: COLT 2009. Proc. 22nd Annual Conf. on Learning Theory, pp. 217\u2013226 (2009)"},{"key":"19_CR6","first-page":"1563","volume":"11","author":"T. Jaksch","year":"2010","unstructured":"Jaksch, T., Ortner, R., Auer, P.: Near-optimal regret bounds for reinforcement learning. J. Mach. Learn. Res.\u00a011, 1563\u20131600 (2010)","journal-title":"J. Mach. Learn. Res."},{"key":"19_CR7","unstructured":"Bartlett, P.L., Tewari, A.: REGAL: A regularization based algorithm for reinforcement learning in weakly communicating MDPs. In: Proc. 25th Conference on Uncertainty in Artificial Intelligence, UAI 2009, pp. 35\u201342. AUAI Press (2009)"},{"key":"19_CR8","doi-asserted-by":"crossref","unstructured":"Tekin, C., Liu, M.: Adaptive learning of uncontrolled restless bandits with logarithmic regret. In: 49th Annual Allerton Conference, pp. 983\u2013990. IEEE (2011)","DOI":"10.1109\/Allerton.2011.6120273"},{"issue":"1","key":"19_CR9","doi-asserted-by":"publisher","first-page":"68","DOI":"10.1109\/JSTSP.2010.2058091","volume":"5","author":"S. Filippi","year":"2011","unstructured":"Filippi, S., Cappe, O., Garivier, A.: Optimally sensing a single channel without prior information: The tiling algorithm and regret bounds. IEEE J. Sel. Topics Signal Process.\u00a05(1), 68\u201376 (2011)","journal-title":"IEEE J. Sel. Topics Signal Process."},{"key":"19_CR10","unstructured":"Levin, D.A., Peres, Y., Wilmer, E.L.: Markov chains and mixing times. American Mathematical Society (2006)"},{"issue":"2","key":"19_CR11","doi-asserted-by":"crossref","first-page":"148","DOI":"10.1111\/j.2517-6161.1979.tb01068.x","volume":"41","author":"J.C. Gittins","year":"1979","unstructured":"Gittins, J.C.: Bandit processes and dynamic allocation indices. J. R. Stat. Soc. Ser. B Stat. Methodol.\u00a041(2), 148\u2013177 (1979)","journal-title":"J. R. Stat. Soc. Ser. B Stat. Methodol."},{"key":"19_CR12","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1023\/A:1013689704352","volume":"47","author":"P. Auer","year":"2002","unstructured":"Auer, P., Cesa-Bianchi, N., Fischer, P.: Finite-time analysis of the multi-armed bandit problem. Mach. Learn.\u00a047, 235\u2013256 (2002)","journal-title":"Mach. Learn."},{"key":"19_CR13","doi-asserted-by":"publisher","first-page":"287","DOI":"10.2307\/3214163","volume":"25","author":"P. Whittle","year":"1988","unstructured":"Whittle, P.: Restless bandits: Activity allocation in a changing world. J. Appl. Probab.\u00a025, 287\u2013298 (1988)","journal-title":"J. Appl. Probab."},{"key":"19_CR14","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"373","DOI":"10.1007\/978-3-540-75225-7_30","volume-title":"Algorithmic Learning Theory","author":"R. Ortner","year":"2007","unstructured":"Ortner, R.: Pseudometrics for State Aggregation in Average Reward Markov Decision Processes. In: Hutter, M., Servedio, R.A., Takimoto, E. (eds.) ALT 2007. LNCS (LNAI), vol.\u00a04754, pp. 373\u2013387. Springer, Heidelberg (2007)"},{"key":"19_CR15","unstructured":"Aldous, D.J., Fill, J.: Reversible Markov Chains and Random Walks on Graphs (in preparation), http:\/\/www.stat.berkeley.edu\/~aldous\/RWG\/book.html"},{"key":"19_CR16","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1007\/BF01047002","volume":"4","author":"D.J. Aldous","year":"1991","unstructured":"Aldous, D.J.: Threshold limits for cover times. J. Theoret. Probab.\u00a04, 197\u2013211 (1991)","journal-title":"J. Theoret. Probab."}],"container-title":["Lecture Notes in Computer Science","Algorithmic Learning Theory"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-34106-9_19.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,29]],"date-time":"2024-04-29T22:48:50Z","timestamp":1714430930000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-34106-9_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642341052","9783642341069"],"references-count":16,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-34106-9_19","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012]]}}}