{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,7]],"date-time":"2026-01-07T07:54:29Z","timestamp":1767772469502,"version":"3.37.3"},"reference-count":56,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Syst. Man Cybern, Syst."],"published-print":{"date-parts":[[2021,1]]},"DOI":"10.1109\/tsmc.2020.3041775","type":"journal-article","created":{"date-parts":[[2020,12,24]],"date-time":"2020-12-24T20:57:16Z","timestamp":1608843436000},"page":"40-50","source":"Crossref","is-referenced-by-count":34,"title":["Looking Back on the Actor\u2013Critic Architecture"],"prefix":"10.1109","volume":"51","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3581-1896","authenticated-orcid":false,"given":"Andrew G.","family":"Barto","sequence":"first","affiliation":[]},{"given":"Richard S.","family":"Sutton","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7392-3840","authenticated-orcid":false,"given":"Charles W.","family":"Anderson","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","article-title":"Learning internal representations by error propagation","author":"rumelhart","year":"1986","journal-title":"Parallel Distributed Processing Explorations in the Microstructure of Cognition Vol 1 Foundations"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.1983.6313193"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1093\/comjnl\/6.3.232"},{"key":"ref32","first-page":"129","article-title":"Trial and error","author":"michie","year":"1961","journal-title":"Science Survey Part 2"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1037\/h0049039"},{"key":"ref30","first-page":"670","article-title":"Training and tracking in robotics","author":"selfridge","year":"1985","journal-title":"Proc 9th Int Joint Conf Artif Intell"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4757-1895-9_16"},{"key":"ref36","first-page":"229","article-title":"Learning by statistical cooperation of self-interested neuron-like computing elements","volume":"4","author":"barto","year":"1985","journal-title":"Hum Neurobiol"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-934613-41-5.50014-3"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1063\/1.36249"},{"key":"ref28","first-page":"511","article-title":"Pandemonium: A paradigm for learning","author":"selfridge","year":"1959","journal-title":"Proc Symp Mech Thought Process"},{"key":"ref27","first-page":"288","article-title":"Pattern-recognizing control systems","author":"widrow","year":"1964","journal-title":"Computer and Information Sciences"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/66.3.561"},{"journal-title":"Reinforcement Learning An Introduction","year":"2018","author":"sutton","key":"ref2"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.1983.6313077"},{"journal-title":"&#x201C;Memo&#x201D; Functions and the Pop-2 Language","year":"1967","author":"popplestone","key":"ref20"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1037\/0033-295X.88.2.135"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.1973.4309272"},{"article-title":"Temporal credit assignment in reinforcement learning","year":"1984","author":"sutton","key":"ref24"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1016\/0166-4328(82)90001-8"},{"key":"ref26","first-page":"137","article-title":"BOXES: An experiment in adaptive control","author":"michie","year":"1968","journal-title":"Machine Intelligence"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/BF00115009"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1126\/science.275.5306.1593"},{"key":"ref51","first-page":"215","article-title":"Adaptive critics and the basal ganglia","author":"barto","year":"1995","journal-title":"Models of Information Processing in the Basal Ganglia"},{"journal-title":"Superintelligence Paths Dangers Strategies","year":"2014","author":"bostrom","key":"ref56"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/3316.001.0001"},{"key":"ref54","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-642-27645-3","author":"wiering","year":"2012","journal-title":"Reinforcement Learning State-of-the-Art"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1037\/\/0033-295X.84.5.413"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-96384-1"},{"key":"ref40","first-page":"ii629","article-title":"Gradient following without back-propagation in layered networks","author":"barto","year":"1987","journal-title":"Proc IEEE 1st Annu Conf Neural Network"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/BF00453370"},{"journal-title":"Automaton Theory and Modeling of Biological Systems","year":"1973","author":"tsetlin","key":"ref13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.1974.5408453"},{"journal-title":"Learning Automata An Introduction","year":"1989","author":"narendra","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/0042-6989(74)90024-8"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/BF00337061"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/978-94-015-3711-7"},{"journal-title":"Memo Functions A Language Feature With &#x201C;Rote-Learning&#x201D; Properties","year":"1967","author":"michie","key":"ref19"},{"journal-title":"The Hedonistic Neuron A Theory of Memory Learning and Intelligence","year":"1982","author":"klopf","key":"ref4"},{"article-title":"Brain function and adaptive systems&#x2014;A heterostatic theory","year":"1972","author":"klopf","key":"ref3"},{"article-title":"Goal seeking components for adaptive intelligence: An initial assessment","year":"1981","author":"barto","key":"ref6"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1037\/11592-000"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/BF02478259"},{"journal-title":"Animal Intelligence","year":"1911","author":"thorndike","key":"ref7"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1152\/jn.1998.80.1.1"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/BF00365229"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2009.07.008"},{"article-title":"Approximating a policy can be easier than approximating a value function","year":"2000","author":"anderson","key":"ref48"},{"key":"ref47","doi-asserted-by":"crossref","first-page":"990","DOI":"10.1109\/IJCNN.2001.939495","article-title":"Action-dependent adaptive critic designs","volume":"2","author":"liu","year":"2001","journal-title":"Proc Int Joint Conf Neural Netw (Cat No 01CH37222)"},{"article-title":"Learning and problem solving with multilayer connectionist systems","year":"1986","author":"anderson","key":"ref42"},{"article-title":"Feature generation and selection by a layered network of reinforcement learning elements: Some initial experiments","year":"1982","author":"anderson","key":"ref41"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/37.24809"}],"container-title":["IEEE Transactions on Systems, Man, and Cybernetics: Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6221021\/9321226\/09306925.pdf?arnumber=9306925","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T15:57:41Z","timestamp":1642003061000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9306925\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1]]},"references-count":56,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/tsmc.2020.3041775","relation":{},"ISSN":["2168-2216","2168-2232"],"issn-type":[{"type":"print","value":"2168-2216"},{"type":"electronic","value":"2168-2232"}],"subject":[],"published":{"date-parts":[[2021,1]]}}}