{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,13]],"date-time":"2025-11-13T10:07:48Z","timestamp":1763028468267,"version":"3.45.0"},"reference-count":7,"publisher":"Elsevier","isbn-type":[{"type":"print","value":"9781558602007"}],"license":[{"start":{"date-parts":[[1991,1,1]],"date-time":"1991-01-01T00:00:00Z","timestamp":662688000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[1991,1,1]],"date-time":"1991-01-01T00:00:00Z","timestamp":662688000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[1991]]},"DOI":"10.1016\/b978-1-55860-200-7.50076-3","type":"book-chapter","created":{"date-parts":[[2014,6,30]],"date-time":"2014-06-30T06:07:50Z","timestamp":1404108470000},"page":"368-372","source":"Crossref","is-referenced-by-count":7,"title":["Scaling Reinforcement Learning Techniques via Modularity"],"prefix":"10.1016","author":[{"given":"Lambert E.","family":"Wixson","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/B978-1-55860-200-7.50076-3_bib1","doi-asserted-by":"crossref","first-page":"65","DOI":"10.1016\/0004-3702(87)90051-8","article-title":"\u201cPlanning as Search: A Quantitative Approach,\u2015","volume":"33","author":"Korf","year":"1987","journal-title":"Artificial Intelligence"},{"key":"10.1016\/B978-1-55860-200-7.50076-3_bib2","unstructured":"Sridhar Mahadevan and Jonathan Connell, \u201cAutomatic Programming of Behavior-based Robots using Reinforcement Learning,\u2015 Technical report, I.B.M. T.J. Watson Research Center, December 1990."},{"key":"10.1016\/B978-1-55860-200-7.50076-3_bib3","doi-asserted-by":"crossref","unstructured":"Richard S. Sutton, \u201cIntegrated Architectures for Learning, Planning, and Reacting Based on Approximating Dynamic Programming,\u2015 In Proceedings of the Seventh International Conference on Machine Learning, June 1990.","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"key":"10.1016\/B978-1-55860-200-7.50076-3_bib4","unstructured":"C.J.C.H. Watkins, Learning from Delayed Rewards, PhD thesis, Cambridge University Psychology Dept., 1989."},{"key":"10.1016\/B978-1-55860-200-7.50076-3_bib5","unstructured":"Steven D. Whitehead and Dana H. Ballard, \u201cLearning to Perceive and Act,\u2015 Technical Report 331, University of Rochester Computer Science Dept., June 1990."},{"key":"10.1016\/B978-1-55860-200-7.50076-3_bib6","unstructured":"Steven D. Whitehead and Dana H. Ballard, \u201cA study of cooperative mechanisms for faster reinforcement learning,\u2015 Technical Report 365, University of Rochester Computer Science Dept., 1991."},{"key":"10.1016\/B978-1-55860-200-7.50076-3_bib7","unstructured":"Lambert E. Wixson and Dana H. Ballard, \u201cLearning to Find Objects,\u2015 Technical report, University of Rochester Computer Science Dept., 1991, in preparation."}],"container-title":["Machine Learning Proceedings 1991"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:B9781558602007500763?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:B9781558602007500763?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,11,13]],"date-time":"2025-11-13T10:02:28Z","timestamp":1763028148000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/B9781558602007500763"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[1991]]},"ISBN":["9781558602007"],"references-count":7,"URL":"https:\/\/doi.org\/10.1016\/b978-1-55860-200-7.50076-3","relation":{},"subject":[],"published":{"date-parts":[[1991]]}}}