{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T11:47:00Z","timestamp":1725536820157},"publisher-location":"Berlin, Heidelberg","reference-count":17,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642041792"},{"type":"electronic","value":"9783642041808"}],"license":[{"start":{"date-parts":[[2009,1,1]],"date-time":"2009-01-01T00:00:00Z","timestamp":1230768000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2009]]},"DOI":"10.1007\/978-3-642-04180-8_59","type":"book-chapter","created":{"date-parts":[[2009,8,27]],"date-time":"2009-08-27T04:11:20Z","timestamp":1251346280000},"page":"644-659","source":"Crossref","is-referenced-by-count":3,"title":["Compositional Models for Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Nicholas K.","family":"Jong","sequence":"first","affiliation":[]},{"given":"Peter","family":"Stone","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"59_CR1","first-page":"103","volume":"13","author":"A.W. Moore","year":"1993","unstructured":"Moore, A.W., Atkeson, C.G.: Prioritized sweeping: Reinforcement learning with less data and less real time. Machine Learning\u00a013, 103\u2013130 (1993)","journal-title":"Machine Learning"},{"key":"59_CR2","unstructured":"Kearns, M., Singh, S.: Near-optimal reinforcement learning in polynomial time. In: Proceedings of the Fifteenth International Conference on Machine Learning, pp. 260\u2013268 (1998)"},{"key":"59_CR3","first-page":"213","volume":"3","author":"R.I. Brafman","year":"2002","unstructured":"Brafman, R.I., Tennenholtz, M.: R-max \u2013 a general polynomial time algorithm for near-optimal reinforcement learning. Journal of Machine Learning Research\u00a03, 213\u2013231 (2002)","journal-title":"Journal of Machine Learning Research"},{"key":"59_CR4","first-page":"1107","volume":"4","author":"M.G. Lagoudakis","year":"2003","unstructured":"Lagoudakis, M.G., Parr, R.: Least-squares policy iteration. Journal of Machine Learning Research\u00a04, 1107\u20131149 (2003)","journal-title":"Journal of Machine Learning Research"},{"key":"59_CR5","doi-asserted-by":"crossref","unstructured":"Riedmiller, M.: Neural fitted Q iteration \u2013 first experiences with a data efficient neural reinforcement learning method. In: Proceedings of the European Conference on Machine Learning (2005)","DOI":"10.1007\/11564096_32"},{"key":"59_CR6","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1023\/A:1022140919877","volume":"13","author":"A.G. Barto","year":"2003","unstructured":"Barto, A.G., Mahadevan, S.: Recent advances in hierarchical reinforcement learning. Discrete-Event Systems\u00a013, 41\u201377 (2003); Special Issue on Reinforcement Learning","journal-title":"Discrete-Event Systems"},{"key":"59_CR7","doi-asserted-by":"publisher","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"M.L. Puterman","year":"1994","unstructured":"Puterman, M.L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. John Wiley & Sons, Inc., Chichester (1994)"},{"key":"59_CR8","unstructured":"Kakade, S.M.: On the Sample Complexity of Reinforcement Learning. PhD thesis, University College London (2003)"},{"key":"59_CR9","doi-asserted-by":"crossref","unstructured":"Gordon, G.J.: Stable function approximation in dynamic programming. In: Proceedings of the Twelfth International Conference on Machine Learning (1995)","DOI":"10.1016\/B978-1-55860-377-6.50040-2"},{"issue":"2","key":"59_CR10","doi-asserted-by":"publisher","first-page":"161","DOI":"10.1023\/A:1017928328829","volume":"49","author":"D. Ormoneit","year":"2002","unstructured":"Ormoneit, D., Sen, \u015a.: Kernel-based reinforcement learning. Machine Learning\u00a049(2), 161\u2013178 (2002)","journal-title":"Machine Learning"},{"key":"59_CR11","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1613\/jair.639","volume":"13","author":"T.G. Dietterich","year":"2000","unstructured":"Dietterich, T.G.: Hierarchical reinforcement learning with the MAXQ value function decomposition. Journal of Artificial Intelligence Research\u00a013, 227\u2013303 (2000)","journal-title":"Journal of Artificial Intelligence Research"},{"issue":"1\u20132","key":"59_CR12","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"R.S. Sutton","year":"1999","unstructured":"Sutton, R.S., Precup, D., Singh, S.: Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning. Artificial Intelligence\u00a0112(1\u20132), 181\u2013211 (1999)","journal-title":"Artificial Intelligence"},{"key":"59_CR13","doi-asserted-by":"crossref","unstructured":"Jong, N.K., Stone, P.: Model-based exploration in continuous state spaces. In: Proceedings of the Seventh Symposium on Abstraction, Reformulation and Approximation (2007)","DOI":"10.1007\/978-3-540-73580-9_21"},{"key":"59_CR14","doi-asserted-by":"crossref","unstructured":"Jong, N.K., Stone, P.: Hierarchical model-based reinforcement learning: R-max + MAXQ. In: Proceedings of the Twenty-Fifth International Conference on Machine Learning (2008)","DOI":"10.1145\/1390156.1390211"},{"key":"59_CR15","doi-asserted-by":"crossref","unstructured":"Beygelzimer, A., Kakade, S., Langford, J.: Cover trees for nearest neighbor. In: Proceedings of the Twenty-Third International Conference on Machine Learning (2006)","DOI":"10.1145\/1143844.1143857"},{"key":"59_CR16","unstructured":"Duff, M.: Design for an optimal probe. In: Proceedings of the Twentieth International Conference on Machine Learning, pp. 131\u2013138 (2003)"},{"key":"59_CR17","unstructured":"Ravindran, B., Barto, A.G.: SMDP homomorphisms: An algebraic approach to abstraction in semi-Markov decision processes. In: Proceedings of the Eighteenth International Joint Conference on Artificial Intelligence (2003)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-04180-8_59","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,3,9]],"date-time":"2019-03-09T09:50:19Z","timestamp":1552125019000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-04180-8_59"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009]]},"ISBN":["9783642041792","9783642041808"],"references-count":17,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-04180-8_59","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2009]]}}}