{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T19:00:55Z","timestamp":1773342055589,"version":"3.50.1"},"reference-count":19,"publisher":"Elsevier","isbn-type":[{"value":"9781558602472","type":"print"}],"license":[{"start":{"date-parts":[[1992,1,1]],"date-time":"1992-01-01T00:00:00Z","timestamp":694224000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[1992]]},"DOI":"10.1016\/b978-1-55860-247-2.50017-6","type":"book-chapter","created":{"date-parts":[[2014,7,1]],"date-time":"2014-07-01T02:52:37Z","timestamp":1404183157000},"page":"92-101","source":"Crossref","is-referenced-by-count":51,"title":["A Teaching Method for Reinforcement Learning"],"prefix":"10.1016","author":[{"given":"Jeffery A.","family":"Clouse","sequence":"first","affiliation":[]},{"given":"Paul E.","family":"Utgoff","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/B978-1-55860-247-2.50017-6_bib1","doi-asserted-by":"crossref","first-page":"31","DOI":"10.1109\/37.24809","article-title":"Learning to control an inverted pendulum using neural networks","volume":"9","author":"Anderson","year":"1989","journal-title":"IEEE Control Systems Magazine"},{"key":"10.1016\/B978-1-55860-247-2.50017-6_bib2","first-page":"835","article-title":"Neuronlike adaptive elements that can solve difficult learning control problems","volume":"13","author":"Barto","year":"1983","journal-title":"IEEE Transactions on Systems, Man and Cybernetics"},{"key":"10.1016\/B978-1-55860-247-2.50017-6_bib3","series-title":"Learning and Computational Neuroscience","article-title":"Learning and sequential decision making","author":"Barto","year":"1990"},{"key":"10.1016\/B978-1-55860-247-2.50017-6_bib4","unstructured":"Barto, A. G., Bradtke, S. J., & Singh, S. P. (1991). Real-time learning and control using asynchronous dynamic programming, (COINS Technical Report 91\u201357), Amherst, MA: University of Massachusetts, Department of Computer and Information Science."},{"key":"10.1016\/B978-1-55860-247-2.50017-6_bib5","series-title":"Dynamic programming","author":"Bellman","year":"1957"},{"key":"10.1016\/B978-1-55860-247-2.50017-6_bib6","doi-asserted-by":"crossref","first-page":"330","DOI":"10.1111\/j.1467-8640.1987.tb00219.x","article-title":"Learning to control a dynamical physical system","volume":"3","author":"Connell","year":"1987","journal-title":"Computational Intelligence"},{"key":"10.1016\/B978-1-55860-247-2.50017-6_bib7","doi-asserted-by":"crossref","first-page":"108","DOI":"10.1038\/scientificamerican0173-108","article-title":"Mathematical games","volume":"228","author":"Gardner","year":"1973","journal-title":"Scientific American"},{"key":"10.1016\/B978-1-55860-247-2.50017-6_bib8","doi-asserted-by":"crossref","unstructured":"Gullapalli, Vijaykumar (1991). A comparison of supervised and reinforcement learning methods on a reinforcement learning task. Proceedings of the 1991 IEEE International Symposium on Intelligent Control (pp. 394\u2013399). Arlington, VA.","DOI":"10.1109\/ISIC.1991.187390"},{"key":"10.1016\/B978-1-55860-247-2.50017-6_bib9","doi-asserted-by":"crossref","first-page":"203","DOI":"10.1007\/BF00336991","article-title":"Linear function neurons: Structure and training","volume":"53","author":"Hampson","year":"1986","journal-title":"Biological Cybernetics"},{"key":"10.1016\/B978-1-55860-247-2.50017-6_bib10","series-title":"Advances in Neural Information Processing Systems","article-title":"Learning to control an unstable system with forward modeling","author":"Jordan","year":"1990"},{"key":"10.1016\/B978-1-55860-247-2.50017-6_bib11","unstructured":"Lin, Long-Ji (1991). Programming robots using reinforcement learning and teaching. Proceedings of the Ninth National Conference on Artificial Intelligence (pp. 781\u2013786). Anaheim, CA: MIT Press."},{"key":"10.1016\/B978-1-55860-247-2.50017-6_bib12","article-title":"BOXES: An experiment in adaptive control","volume":"2","author":"Michie","year":"1968"},{"key":"10.1016\/B978-1-55860-247-2.50017-6_bib13","series-title":"Learning machines","author":"Nilsson","year":"1965"},{"key":"10.1016\/B978-1-55860-247-2.50017-6_bib14","unstructured":"Selfridge, O., Sutton, R. S., & Barto, A. G. (1985). Training and tracking in robotics. Proceedings of the Ninth International Joint Conference on Artificial Intelligence (pp. 670\u2013672). Los Angeles, CA: Morgan Kaufmann."},{"key":"10.1016\/B978-1-55860-247-2.50017-6_bib15","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1007\/BF00115009","article-title":"Learning to predict by the method of temporal differences","volume":"3","author":"Sutton","year":"1988","journal-title":"Machine Learning"},{"key":"10.1016\/B978-1-55860-247-2.50017-6_bib16","unstructured":"Utgoff, P. E., & Clouse, J. A. (1991). Two kinds of training information for evaluation function learning. Proceedings of the Ninth National Conference on Artificial Intelligence (pp. 596\u2013600). Anaheim, CA: MIT Press."},{"key":"10.1016\/B978-1-55860-247-2.50017-6_bib17","unstructured":"Watkins, C. J. C. H. (1989). Learning with delayed rewards. Doctoral dissertation, Psychology Department, Cambridge University."},{"key":"10.1016\/B978-1-55860-247-2.50017-6_bib18","unstructured":"Whitehead, Steven, D. (1991). A complexity analysis of cooperative mechanisms in reinforcement learning. Proceedings of the Ninth National Conference on Artificial Intelligence (pp. 607\u2013613). Anaheim, CA: MIT Press."},{"key":"10.1016\/B978-1-55860-247-2.50017-6_bib19","unstructured":"Widrow, B., & Smith, F. W. (1964). Pattern-recognizing control systems. In Tou & Wilcox (Eds.), Computer and Information Sciences Proceedings. Washington, D.C: Spartan Books."}],"container-title":["Machine Learning Proceedings 1992"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:B9781558602472500176?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:B9781558602472500176?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2018,10,2]],"date-time":"2018-10-02T07:59:30Z","timestamp":1538467170000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/B9781558602472500176"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[1992]]},"ISBN":["9781558602472"],"references-count":19,"URL":"https:\/\/doi.org\/10.1016\/b978-1-55860-247-2.50017-6","relation":{},"subject":[],"published":{"date-parts":[[1992]]}}}