{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T06:41:21Z","timestamp":1725518481366},"publisher-location":"Berlin, Heidelberg","reference-count":18,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540875352"},{"type":"electronic","value":"9783540875369"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"DOI":"10.1007\/978-3-540-87536-9_37","type":"book-chapter","created":{"date-parts":[[2008,9,5]],"date-time":"2008-09-05T11:23:30Z","timestamp":1220613810000},"page":"357-366","source":"Crossref","is-referenced-by-count":9,"title":["Multigrid Reinforcement Learning with Reward Shaping"],"prefix":"10.1007","author":[{"given":"Marek","family":"Grze\u015b","sequence":"first","affiliation":[]},{"given":"Daniel","family":"Kudenko","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"37_CR1","unstructured":"Ng, A.Y., Harada, D., Russell, S.J.: Policy invariance under reward transformations: Theory and application to reward shaping. In: Proceedings of the 16th International Conference on Machine Learning, pp. 278\u2013287 (1999)"},{"key":"37_CR2","unstructured":"Randlov, J., Alstrom, P.: Learning to drive a bicycle using reinforcement learning and shaping. In: Proceedings of the 15th International Conference on Machine Learning, pp. 463\u2013471 (1998)"},{"key":"37_CR3","doi-asserted-by":"crossref","unstructured":"Marthi, B.: Automatic shaping and decomposition of reward functions. In: Proceedings of the 24th International Conference on Machine Learning, pp. 601\u2013608 (2007)","DOI":"10.1145\/1273496.1273572"},{"key":"37_CR4","volume-title":"Reinforcement Learning: An Introduction","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"key":"37_CR5","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1613\/jair.575","volume":"11","author":"C. Boutilier","year":"1999","unstructured":"Boutilier, C., Dean, T., Hanks, S.: Decision-theoretic planning: Structural assumptions and computational leverage. Journal of Artificial Intelligence Research\u00a011, 1\u201394 (1999)","journal-title":"Journal of Artificial Intelligence Research"},{"issue":"8","key":"37_CR6","doi-asserted-by":"publisher","first-page":"898","DOI":"10.1109\/9.133184","volume":"36","author":"C.S. Chow","year":"1991","unstructured":"Chow, C.S., Tsitsiklis, J.N.: An optimal one-way multigrid algorithm for discrete-time stochastic control. IEEE Transactions on Automatic Control\u00a036(8), 898\u2013914 (1991)","journal-title":"IEEE Transactions on Automatic Control"},{"key":"37_CR7","unstructured":"Anderson, C., Crawford-Hines, S.: Multigrid Q-learning. Technical Report CS-94-121, Colorado State University (1994)"},{"issue":"1-2","key":"37_CR8","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"R.S. Sutton","year":"1999","unstructured":"Sutton, R.S., Precup, D., Singh, S.P.: Between MDPs and Semi-MDPs: A framework for temporal abstraction in reinforcement learning. Artificial Intelligence\u00a0112(1-2), 181\u2013211 (1999)","journal-title":"Artificial Intelligence"},{"key":"37_CR9","first-page":"851","volume":"6","author":"D. Wingate","year":"2005","unstructured":"Wingate, D., Seppi, K.D.: Prioritization methods for accelerating MDP solvers. Journal of Machine Learning Research\u00a06, 851\u2013881 (2005)","journal-title":"Journal of Machine Learning Research"},{"key":"37_CR10","doi-asserted-by":"crossref","unstructured":"Epshteyn, A., DeJong, G.: Qualitative reinforcement learning. In: Proceedings of the 23rd International Conference on Machine Learning, pp. 305\u2013312 (2006)","DOI":"10.1145\/1143844.1143883"},{"issue":"2-3","key":"37_CR11","doi-asserted-by":"publisher","first-page":"291","DOI":"10.1023\/A:1017992615625","volume":"49","author":"R. Munos","year":"2002","unstructured":"Munos, R., Moore, A.: Variable resolution discretization in optimal control. Machine Learning\u00a049(2-3), 291\u2013323 (2002)","journal-title":"Machine Learning"},{"key":"37_CR12","doi-asserted-by":"crossref","unstructured":"Stone, P., Veloso, M.: Layered learning. In: Proceedings of the 11th European Conference on Machine Learning (2000)","DOI":"10.1007\/3-540-45164-1_38"},{"key":"37_CR13","doi-asserted-by":"crossref","unstructured":"Kaelbling, L.P.: Hierarchical learning in stochastic domains: Preliminary results. In: Proceedings of International Conference on Machine Learning, pp. 167\u2013173 (1993)","DOI":"10.1016\/B978-1-55860-307-3.50028-9"},{"key":"37_CR14","unstructured":"Moore, A., Baird, L., Kaelbling, L.P.: Multi-value-functions: Efficient automatic action hierarchies for multiple goal MDPs. In: Proceedings of the International Joint Conference on Artificial Intelligence, pp. 1316\u20131323 (1999)"},{"key":"37_CR15","unstructured":"Dayan, P., Hinton, G.E.: Feudal reinforcement learning. In: Proceedings of Advances in Neural Information Processing Systems (1993)"},{"key":"37_CR16","unstructured":"Parr, R., Russell, S.: Reinforcement learning with hierarchies of machines. In: Proccedings of Advances in Neural Information Processing Systems, vol.\u00a010 (1997)"},{"key":"37_CR17","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1613\/jair.639","volume":"13","author":"T.G. Dietterich","year":"2000","unstructured":"Dietterich, T.G.: Hierarchical reinforcement learning with the MAXQ value function decomposition. Journal of Artificial Intelligence Research\u00a013, 227\u2013303 (2000)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"37_CR18","doi-asserted-by":"crossref","unstructured":"Taylor, M.E., Stone, P.: Behavior transfer for value-function-based reinforcement learning. In: Proceedings of the 4th International Joint Conference on Autonomous Agents and Multiagent Systems, pp. 53\u201359 (2005)","DOI":"10.1145\/1082473.1082482"}],"container-title":["Lecture Notes in Computer Science","Artificial Neural Networks - ICANN 2008"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-87536-9_37.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,23]],"date-time":"2020-11-23T21:38:35Z","timestamp":1606167515000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-87536-9_37"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[null]]},"ISBN":["9783540875352","9783540875369"],"references-count":18,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-87536-9_37","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[]}}