{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T17:48:14Z","timestamp":1725558494750},"publisher-location":"Berlin, Heidelberg","reference-count":12,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540201212"},{"type":"electronic","value":"9783540398578"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2003]]},"DOI":"10.1007\/978-3-540-39857-8_16","type":"book-chapter","created":{"date-parts":[[2010,6,28]],"date-time":"2010-06-28T04:07:15Z","timestamp":1277698035000},"page":"157-168","source":"Crossref","is-referenced-by-count":1,"title":["A New Way to Introduce Knowledge into Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Pascal","family":"Garcia","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"16_CR1","unstructured":"Sutton, R.S., Barto, A.G.: Introduction to Reinforcement Learning. MIT Press\/Bradford Books (1998)"},{"key":"16_CR2","unstructured":"Singh, S.P., Jaakkola, T., Jordan, M.I.: Reinforcement Learning with Soft State Aggregation. NIPS 7, pp. 361\u2013368. MIT Press, Cambridge (1995)"},{"issue":"5","key":"16_CR3","doi-asserted-by":"publisher","first-page":"674","DOI":"10.1109\/9.580874","volume":"42","author":"J.N. Tsitsiklis","year":"1997","unstructured":"Tsitsiklis, J.N., Van Roy, B.: An analysis of temporal-difference learning with function approximation. IEEE Transactions on Automatic Control\u00a042(5), 674\u2013690 (1997)","journal-title":"IEEE Transactions on Automatic Control"},{"key":"16_CR4","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"R.S. Sutton","year":"1999","unstructured":"Sutton, R.S., Precup, D., Singh, S.: Between MDPs and Semi-MDPs: A Framework for Temporal Abstraction in Reinforcement Learning. Artificial Intelligence\u00a0112, 181\u2013211 (1999)","journal-title":"Artificial Intelligence"},{"key":"16_CR5","unstructured":"Parr, R.: Hierarchical control and learning for Markov decision processes. PhD thesis, University of California, Berkeley, California (1998)"},{"key":"16_CR6","doi-asserted-by":"crossref","unstructured":"Dietterich, T.G.: An Overview of MAXQ Hierarchical Reinforcement Learning. SARA, pp. 26\u201344 (2000)","DOI":"10.1007\/3-540-44914-0_2"},{"key":"16_CR7","volume-title":"Learning Macro-Actions in Reinforcement Learning","author":"J. Randlov","year":"1999","unstructured":"Randlov, J.: Learning Macro-Actions in Reinforcement Learning. NIPS 11. MIT Press, Cambridge (1999)"},{"key":"16_CR8","unstructured":"Stone, P., Sutton, R.S.: Scaling Reinforcement Learning Toward RoboCup Soccer. In: Proceedings of the 18th International Conference on Machine Learning (2001)"},{"key":"16_CR9","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1007\/3-540-36755-1_25","volume-title":"Machine Learning: ECML 2002","author":"I. Menache","year":"2002","unstructured":"Menache, I., Mannor, S., Shimki, N.: Q-Cut - Dynamic Discovery of Subgoals in Reinforcement Learning. In: Elomaa, T., Mannila, H., Toivonen, H. (eds.) ECML 2002. LNCS (LNAI), vol.\u00a02430, pp. 295\u2013306. Springer, Heidelberg (2002)"},{"key":"16_CR10","unstructured":"Watkins, C.J.C.H.: Learning from Delayed Rewards. PhD Thesis. University of Cambridge, England (1989)"},{"key":"16_CR11","doi-asserted-by":"publisher","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes","author":"M.L. Puterman","year":"1994","unstructured":"Puterman, M.L.: Markov Decision Processes. Wiley, New York (1994)"},{"key":"16_CR12","doi-asserted-by":"crossref","unstructured":"Moore, A.W., Atkeson, C.G.: The Parti-game Algorithm for Variable Resolution Reinforcement Learning in Multidimensional State-spaces. In: Advances in Neural Information Processing Systems (1995)","DOI":"10.1007\/BF00993591"}],"container-title":["Lecture Notes in Computer Science","Machine Learning: ECML 2003"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-39857-8_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,3,15]],"date-time":"2019-03-15T04:34:22Z","timestamp":1552624462000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-39857-8_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2003]]},"ISBN":["9783540201212","9783540398578"],"references-count":12,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-39857-8_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2003]]}}}