{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2022,4,2]],"date-time":"2022-04-02T13:51:14Z","timestamp":1648907474375},"reference-count":36,"publisher":"Elsevier BV","issue":"3","license":[{"start":{"date-parts":[[1995,12,1]],"date-time":"1995-12-01T00:00:00Z","timestamp":817776000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Neurocomputing"],"published-print":{"date-parts":[[1995,12]]},"DOI":"10.1016\/0925-2312(95)00035-x","type":"journal-article","created":{"date-parts":[[2002,7,26]],"date-time":"2002-07-26T02:26:33Z","timestamp":1027650393000},"page":"271-292","source":"Crossref","is-referenced-by-count":15,"title":["Direct associative reinforcement learning methods for dynamic systems control"],"prefix":"10.1016","volume":"9","author":[{"given":"Vijaykumar","family":"Gullapalli","sequence":"first","affiliation":[]}],"member":"78","reference":[{"issue":"3","key":"10.1016\/0925-2312(95)00035-X_BIB1","doi-asserted-by":"crossref","first-page":"31","DOI":"10.1109\/37.24809","article-title":"Learning to control an inverted pendulum using neural networks","volume":"9","author":"Anderson","year":"1989","journal-title":"IEEE Control Systems Mag."},{"key":"10.1016\/0925-2312(95)00035-X_BIB2","article-title":"Connectionist learning for control: An overview","author":"Barto","year":"1989"},{"key":"10.1016\/0925-2312(95)00035-X_BIB3","article-title":"Some learning tasks from a control perspective","author":"Barto","year":"1991"},{"key":"10.1016\/0925-2312(95)00035-X_BIB4","doi-asserted-by":"crossref","first-page":"360","DOI":"10.1109\/TSMC.1985.6313371","article-title":"Pattern recognizing stochastic learning automata","volume":"15","author":"Barto","year":"1985","journal-title":"IEEE Trans. Systems, Man, and Cybernet."},{"key":"10.1016\/0925-2312(95)00035-X_BIB5","first-page":"835","article-title":"Neuronlike elements that can solve difficult learning control problems","volume":"13","author":"Barto","year":"1983","journal-title":"IEEE Trans. Systems, Man, and Cybernet."},{"key":"10.1016\/0925-2312(95)00035-X_BIB6","article-title":"Principles of optimalizing control systems and an application to an internal combustion engine","author":"Draper","year":"1951","journal-title":"ASME Publications"},{"key":"10.1016\/0925-2312(95)00035-X_BIB7","author":"Duda","year":"1973"},{"key":"10.1016\/0925-2312(95)00035-X_BIB8","first-page":"39","article-title":"On stochastic approximation","volume":"vol. 1","author":"Dvoretzky","year":"1956"},{"key":"10.1016\/0925-2312(95)00035-X_BIB9","author":"Goodwin","year":"1984"},{"key":"10.1016\/0925-2312(95)00035-X_BIB10","doi-asserted-by":"crossref","first-page":"671","DOI":"10.1016\/0893-6080(90)90056-Q","article-title":"A stochastic reinforcement learning algorithm for learning real-valued functions","volume":"3","author":"Gullapalli","year":"1990","journal-title":"Neural Networks"},{"key":"10.1016\/0925-2312(95)00035-X_BIB11","series-title":"Proc. 1991 IEEE Int. Symp. on Intelligent Control","first-page":"394","article-title":"A comparison of supervised and reinforcement learning methods on a reinforcement learning task","author":"Gullapalli","year":"1991"},{"key":"10.1016\/0925-2312(95)00035-X_BIB12","article-title":"Reinforcement learning and its application to control","author":"Gullapalli","year":"1992"},{"key":"10.1016\/0925-2312(95)00035-X_BIB13","series-title":"Proc. 1992 IEEE Int. Conf. on Robotics and Automation","first-page":"1475","article-title":"Learning reactive admittance control","author":"Gullapalli","year":"1992"},{"key":"10.1016\/0925-2312(95)00035-X_BIB14","series-title":"Proc. 1991 IEEE Int. Symp. on Intelligent Control","first-page":"388","article-title":"Goal-directed encoding of task knowledge for robotic skill acquisition","author":"Handelman","year":"1991"},{"key":"10.1016\/0925-2312(95)00035-X_BIB15","article-title":"Supervised learning and systems with excess degrees of freedom","author":"Jordan","year":"1988"},{"key":"10.1016\/0925-2312(95)00035-X_BIB16","series-title":"Attention and Performance, XIII","article-title":"Indeterminate motor skill learning problems","author":"Jordan","year":"1990"},{"key":"10.1016\/0925-2312(95)00035-X_BIB17","series-title":"Advances in Neural Information Processing Systems II","article-title":"Learning to control an unstable system with forward modeling","author":"Jordan","year":"1990"},{"key":"10.1016\/0925-2312(95)00035-X_BIB18","series-title":"Foundations of Cognitive Science","article-title":"Action","author":"Jordan","year":"1989"},{"key":"10.1016\/0925-2312(95)00035-X_BIB19","article-title":"Forward models: Supervised learning with a distal teacher","author":"Jordan","year":"1990"},{"key":"10.1016\/0925-2312(95)00035-X_BIB20","series-title":"Neural Networks for Control","article-title":"Computational schemes and neural network models for formation and control of multijoint arm trajectory","author":"Kawato","year":"1990"},{"issue":"3","key":"10.1016\/0925-2312(95)00035-X_BIB21","doi-asserted-by":"crossref","DOI":"10.1214\/aoms\/1177729392","article-title":"Stochastic estimation of the maximum of a regression function","volume":"23","author":"Kiefer","year":"1952","journal-title":"Ann. Math. Stat."},{"key":"10.1016\/0925-2312(95)00035-X_BIB22","article-title":"Neural nets and robotic control","author":"Mars","year":"1988"},{"key":"10.1016\/0925-2312(95)00035-X_BIB23","series-title":"Adaptive, Learning and Pattern Recognition Systems: Theory and Applications","first-page":"287","article-title":"Reinforcement learning control and pattern recognition systems","author":"Mendel","year":"1970"},{"key":"10.1016\/0925-2312(95)00035-X_BIB24","article-title":"The learning and planning of actions","author":"Miyata","year":"1988"},{"key":"10.1016\/0925-2312(95)00035-X_BIB25","series-title":"Proc. Ninth Annual Conf. of the Cognitive Science Society","first-page":"165","article-title":"A dual back-propagation scheme for scalar reward learning","author":"Munro","year":"1987"},{"key":"10.1016\/0925-2312(95)00035-X_BIB26","author":"Narendra","year":"1989"},{"key":"10.1016\/0925-2312(95)00035-X_BIB27","series-title":"Neural Networks for Control","article-title":"The truck backer-upper: An example of self-learning in neural networks","author":"Nguyen","year":"1990"},{"issue":"1","key":"10.1016\/0925-2312(95)00035-X_BIB28","doi-asserted-by":"crossref","first-page":"400","DOI":"10.1214\/aoms\/1177729586","article-title":"A stochastic approximation method","volume":"22","author":"Robbins","year":"1951","journal-title":"Ann. Math. Stat."},{"key":"10.1016\/0925-2312(95)00035-X_BIB29","doi-asserted-by":"crossref","first-page":"1109","DOI":"10.1109\/TAC.1982.1103060","article-title":"A self-learning automaton with variable resolution for high precision assembly by industrial robots","volume":"27","author":"Simons","year":"1982","journal-title":"IEEE Trans. Automatic Control"},{"key":"10.1016\/0925-2312(95)00035-X_BIB30","article-title":"Temporal credit assignment in reinforcement learning","author":"Sutton","year":"1984"},{"key":"10.1016\/0925-2312(95)00035-X_BIB31","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1007\/BF00115009","article-title":"Learning to predict by the methods of temporal differences","volume":"3","author":"Sutton","year":"1988","journal-title":"Machine Learning"},{"key":"10.1016\/0925-2312(95)00035-X_BIB32","author":"Tsetlin","year":"1973"},{"key":"10.1016\/0925-2312(95)00035-X_BIB33","article-title":"Learning from delayed rewards","author":"Watkins","year":"1989"},{"key":"10.1016\/0925-2312(95)00035-X_BIB34","series-title":"Proc. 1989 Int. Joint Conf. on Neural Networks","article-title":"Backpropagation and neurocontrol: A review and prospectus","author":"Werbos","year":"1989"},{"key":"10.1016\/0925-2312(95)00035-X_BIB35","series-title":"Neural Networks for Control","article-title":"A menu of designs for reinforcement learning over time","author":"Werbos","year":"1990"},{"key":"10.1016\/0925-2312(95)00035-X_BIB36","series-title":"Proc. IEEE Int. Conf. on Neural Networks","article-title":"On the use of backpropagation in associative reinforcement learning","author":"Williams","year":"1988"}],"container-title":["Neurocomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:092523129500035X?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:092523129500035X?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2019,4,14]],"date-time":"2019-04-14T16:25:41Z","timestamp":1555259141000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/092523129500035X"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[1995,12]]},"references-count":36,"journal-issue":{"issue":"3","published-print":{"date-parts":[[1995,12]]}},"alternative-id":["092523129500035X"],"URL":"https:\/\/doi.org\/10.1016\/0925-2312(95)00035-x","relation":{},"ISSN":["0925-2312"],"issn-type":[{"value":"0925-2312","type":"print"}],"subject":[],"published":{"date-parts":[[1995,12]]}}}