{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T07:31:53Z","timestamp":1725521513197},"publisher-location":"Berlin, Heidelberg","reference-count":14,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540897217"},{"type":"electronic","value":"9783540897224"}],"license":[{"start":{"date-parts":[[2008,1,1]],"date-time":"2008-01-01T00:00:00Z","timestamp":1199145600000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2008]]},"DOI":"10.1007\/978-3-540-89722-4_4","type":"book-chapter","created":{"date-parts":[[2008,11,26]],"date-time":"2008-11-26T13:57:14Z","timestamp":1227707834000},"page":"41-54","source":"Crossref","is-referenced-by-count":1,"title":["Efficient Reinforcement Learning in Parameterized Models: Discrete Parameter Case"],"prefix":"10.1007","author":[{"given":"Kirill","family":"Dyagilev","sequence":"first","affiliation":[]},{"given":"Shie","family":"Mannor","sequence":"additional","affiliation":[]},{"given":"Nahum","family":"Shimkin","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"4_CR1","first-page":"213","volume":"3","author":"R.I. Brafman","year":"2002","unstructured":"Brafman, R.I., Tennenholtz, M.: R-max - a general polynomial time algorithm for near-optimal reinforcement learning. JMLR\u00a03, 213\u2013231 (2002)","journal-title":"JMLR"},{"key":"4_CR2","doi-asserted-by":"crossref","unstructured":"Dyagilev, K., Mannor, S., Shimkin, N.: Efficient reinforcement learning in parameterized models. Technical report, Technion (2008), http:\/\/www.ee.technion.ac.il\/people\/shimkin\/PREPRINTS\/PEL_full.pdf","DOI":"10.4108\/icst.valuetools.2008.56"},{"issue":"1","key":"4_CR3","doi-asserted-by":"publisher","first-page":"52","DOI":"10.1109\/TCOM.1967.1089532","volume":"com-15","author":"T. Kailath","year":"1967","unstructured":"Kailath, T.: The divergence and bhattacharyya distance measures in signal selection. IEEE Transactions of Communication Technology\u00a0com-15(1), 52\u201360 (1967)","journal-title":"IEEE Transactions of Communication Technology"},{"key":"4_CR4","unstructured":"Kakade, S.M.: On the Sample Complexity of Reinforcement Learning. Ph.D thesis, University College London (2003)"},{"key":"4_CR5","unstructured":"Kearns, M.J., Koller, D.: Efficient reinforcement learning in factored MDPs. In: IJCAI, pp. 740\u2013747 (1999)"},{"key":"4_CR6","first-page":"209","volume":"49","author":"M.J. Kearns","year":"2002","unstructured":"Kearns, M.J., Singh, S.P.: Near-optimal reinforcement learning in polynomial time. JMLR\u00a049, 209\u2013232 (2002)","journal-title":"JMLR"},{"key":"4_CR7","volume-title":"Stochastic Systems: Estimation, Identification and Adaptive Control","author":"P.R. Kumar","year":"1998","unstructured":"Kumar, P.R., Varaiya, P.: Stochastic Systems: Estimation, Identification and Adaptive Control. The MIT Press, Cambridge (1998)"},{"key":"4_CR8","first-page":"623","volume":"5","author":"S. Mannor","year":"2004","unstructured":"Mannor, S., Tsitsiklis, J.N.: The sample complexity of exploration in the multi-armed bandit problem. JMLR\u00a05, 623\u2013648 (2004)","journal-title":"JMLR"},{"key":"4_CR9","doi-asserted-by":"publisher","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes. Discrete Stochastic Programming","author":"M.L. Puterman","year":"1994","unstructured":"Puterman, M.L.: Markov Decision Processes. Discrete Stochastic Programming. Wiley, Chichester (1994)"},{"key":"4_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"334","DOI":"10.1007\/11894841_27","volume-title":"Algorithmic Learning Theory","author":"D. Ryabko","year":"2006","unstructured":"Ryabko, D., Hutter, M.: Asymptotic learnability of reinforcement problems with arbitrary dependence. In: Balc\u00e1zar, J.L., Long, P.M., Stephan, F. (eds.) ALT 2006. LNCS, vol.\u00a04264, pp. 334\u2013347. Springer, Heidelberg (2006)"},{"key":"4_CR11","doi-asserted-by":"crossref","unstructured":"Strehl, A.L., Li, L., Wiewiora, E., Langford, J., Littman, M.L.: $\\text{PAC}$ model-free reinforcement learning. In: Proceedings of the ICML 2006 (2006)","DOI":"10.1145\/1143844.1143955"},{"key":"4_CR12","doi-asserted-by":"crossref","unstructured":"Strehl, A.L., Littman, M.L.: A theoretical analysis of model-based interval estimation. In: Proceedings of ICML 2005, pp. 857\u2013864 (2005)","DOI":"10.1145\/1102351.1102459"},{"key":"4_CR13","volume-title":"Reinforcement Learning: An Introduction","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. The MIT Press, Cambridge (1998)"},{"key":"4_CR14","volume-title":"Sequential Analysis","author":"A. Wald","year":"1952","unstructured":"Wald, A.: Sequential Analysis. Wiley, Chichester (1952)"}],"container-title":["Lecture Notes in Computer Science","Recent Advances in Reinforcement Learning"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-89722-4_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,15]],"date-time":"2019-05-15T18:59:06Z","timestamp":1557946746000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-89722-4_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2008]]},"ISBN":["9783540897217","9783540897224"],"references-count":14,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-89722-4_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2008]]}}}