{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,8]],"date-time":"2026-05-08T20:04:12Z","timestamp":1778270652788,"version":"3.51.4"},"publisher-location":"Cham","reference-count":10,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319116617","type":"print"},{"value":"9783319116624","type":"electronic"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-319-11662-4_11","type":"book-chapter","created":{"date-parts":[[2014,10,1]],"date-time":"2014-10-01T09:19:18Z","timestamp":1412155158000},"page":"140-154","source":"Crossref","is-referenced-by-count":8,"title":["Selecting Near-Optimal Approximate State Representations in Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Ronald","family":"Ortner","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Odalric-Ambrym","family":"Maillard","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Daniil","family":"Ryabko","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"11_CR1","unstructured":"Bartlett, P.L., Tewari, A.: REGAL: A regularization based algorithm for reinforcement learning in weakly communicating MDPs. In: Proc.\u00a025th Conf.\u00a0on Uncertainty in Artificial Intelligence, UAI 2009, pp. 25\u201342. AUAI Press (2009)"},{"key":"11_CR2","doi-asserted-by":"crossref","unstructured":"Hallak, A., Castro, D.D., Mannor, S.: Model selection in Markovian processes. In: 19th ACM SIGKDD Int\u2019l Conf.\u00a0on Knowledge Discovery and Data Mining, KDD 2013, pp. 374\u2013382. ACM (2013)","DOI":"10.1145\/2487575.2487613"},{"key":"11_CR3","first-page":"1563","volume":"11","author":"T. Jaksch","year":"2010","unstructured":"Jaksch, T., Ortner, R., Auer, P.: Near-optimal regret bounds for reinforcement learning. J. Mach. Learn. Res.\u00a011, 1563\u20131600 (2010)","journal-title":"J. Mach. Learn. Res."},{"key":"11_CR4","first-page":"1555","volume":"15","author":"M. Littman","year":"2002","unstructured":"Littman, M., Sutton, R., Singh, S.: Predictive representations of state. Adv.\u00a0Neural Inf.\u00a0Process.\u00a0Syst.\u00a015, 1555\u20131561 (2002)","journal-title":"Adv.\u00a0Neural Inf.\u00a0Process.\u00a0Syst."},{"key":"11_CR5","doi-asserted-by":"publisher","first-page":"3","DOI":"10.2478\/v10229-011-0002-8","volume":"1","author":"M. Hutter","year":"2009","unstructured":"Hutter, M.: Feature Reinforcement Learning: Part I: Unstructured MDPs. J. Artificial General Intelligence\u00a01, 3\u201324 (2009)","journal-title":"J. Artificial General Intelligence"},{"key":"11_CR6","unstructured":"Maillard, O.A., Nguyen, P., Ortner, R., Ryabko, D.: Optimal regret bounds for selecting the state representation in reinforcement learning. In: Proc.\u00a030th Int\u2019l Conf.\u00a0on Machine Learning, ICML 2013. JMLR Proc., vol.\u00a028, pp. 543\u2013551 (2013)"},{"key":"11_CR7","unstructured":"Nguyen, P., Maillard, O.A., Ryabko, D., Ortner, R.: Competing with an infinite set of models in reinforcement learning. In: Proc.\u00a016th Int\u2019l Conf.\u00a0on Artificial Intelligence and Statistics, AISTATS 2013. JMLR Proc., vol.\u00a031, pp. 463\u2013471 (2013)"},{"key":"11_CR8","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"373","DOI":"10.1007\/978-3-540-75225-7_30","volume-title":"Algorithmic Learning Theory","author":"R. Ortner","year":"2007","unstructured":"Ortner, R.: Pseudometrics for state aggregation in average reward markov decision processes. In: Hutter, M., Servedio, R.A., Takimoto, E. (eds.) ALT 2007. LNCS (LNAI), vol.\u00a04754, pp. 373\u2013387. Springer, Heidelberg (2007)"},{"key":"11_CR9","unstructured":"Ortner, R., Maillard, O.A., Ryabko, D.: Selecting Near-Optimal Approximate State Representations in Reinforcement Learning. Extended version, \n                      \n                        http:\/\/arxiv.org\/abs\/1405.2652"},{"key":"11_CR10","first-page":"1772","volume":"25","author":"R. Ortner","year":"2012","unstructured":"Ortner, R., Ryabko, D.: Online Regret Bounds for Undiscounted Continuous Reinforcement Learning. Adv.\u00a0Neural Inf.\u00a0Process.\u00a0Syst.\u00a025, 1772\u20131780 (2012)","journal-title":"Adv.\u00a0Neural Inf.\u00a0Process.\u00a0Syst."}],"container-title":["Lecture Notes in Computer Science","Algorithmic Learning Theory"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-11662-4_11","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,28]],"date-time":"2019-05-28T01:35:19Z","timestamp":1559007319000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-11662-4_11"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783319116617","9783319116624"],"references-count":10,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-11662-4_11","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014]]}}}