{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T09:07:38Z","timestamp":1766135258759,"version":"3.41.0"},"reference-count":17,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[1998,3,1]],"date-time":"1998-03-01T00:00:00Z","timestamp":888710400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[1998,3,1]],"date-time":"1998-03-01T00:00:00Z","timestamp":888710400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Journal of Intelligent and Robotic Systems"],"published-print":{"date-parts":[[1998,3]]},"DOI":"10.1023\/a:1007904418265","type":"journal-article","created":{"date-parts":[[2002,12,22]],"date-time":"2002-12-22T12:13:40Z","timestamp":1040559220000},"page":"221-238","source":"Crossref","is-referenced-by-count":7,"title":["Robot Control Optimization Using Reinforcement Learning"],"prefix":"10.1007","volume":"21","author":[{"given":"Kai-Tai","family":"Song","sequence":"first","affiliation":[]},{"given":"Wen-Yu","family":"Sun","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"150354_CR1","volume-title":"Model Based Control of a Robot Manipulator","author":"C. H. An","year":"1988","unstructured":"An, C. H., Atkeson, C. G., and Hollerbach, J. M.: Model Based Control of a Robot Manipulator, MIT Press, Cambridge, MA, 1988."},{"issue":"3","key":"150354_CR2","first-page":"220","volume":"97","author":"J. S. Albus","year":"1975","unstructured":"Albus, J. S.: A new approach to manipulator control: the cerebellar model articulation controller (CMAC), Trans. of ASME, Series G\n97(3) (1975), 220\u2013227.","journal-title":"Trans. of ASME, Series G"},{"key":"150354_CR3","volume-title":"Neural Networks for Control","author":"A. G. Barto","year":"1990","unstructured":"Barto, A. G.: Connectionist learning for control, in: W. T. Miller, R. Sutton, and P. Werbos (eds), Neural Networks for Control, MIT Press, Cambridge, MA, 1990."},{"issue":"3","key":"150354_CR4","doi-asserted-by":"crossref","first-page":"360","DOI":"10.1109\/TSMC.1985.6313371","volume":"15","author":"A. G. Barto","year":"1985","unstructured":"Barto, A. G. and Anandan, P.: Pattern-recognizing stochastic learning automata, IEEE Trans. Systems Man Cybernet.\n15(3) (1985), 360\u2013375.","journal-title":"IEEE Trans. Systems Man Cybernet."},{"issue":"1","key":"150354_CR5","doi-asserted-by":"crossref","first-page":"13","DOI":"10.1109\/37.257890","volume":"14","author":"V. Gullapalli","year":"1994","unstructured":"Gullapalli, V., Franklin, J. A., and Benbrahim, H.: Acquiring robot skills via reinforcement learning, IEEE Control Systems\n14(1) (1994), 13\u201324.","journal-title":"IEEE Control Systems"},{"key":"150354_CR6","doi-asserted-by":"crossref","unstructured":"Gullapalli, V.: Associative reinforcement learning of real-valued functions, in: Proc. of the IEEE Int. Conf. on Systems Man Cybernet., 1991, pp. 1453\u20131458.","DOI":"10.1109\/ICSMC.1991.169893"},{"key":"150354_CR7","doi-asserted-by":"crossref","first-page":"671","DOI":"10.1016\/0893-6080(90)90056-Q","volume":"3","author":"V. Gullapalli","year":"1990","unstructured":"Gullapalli, V.: A stochastic reinforcement learning algorithm for learning real-valued functions, Neural Networks\n3 (1990), 671\u2013692.","journal-title":"Neural Networks"},{"key":"150354_CR8","unstructured":"Michie, D. and Chambers, R. A.: BOXES: an experiment in adaptive control, in: E. Dale and D. Michie (eds), Machine Intelligence 2, 1986, pp. 137\u2013152."},{"issue":"1","key":"150354_CR9","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/70.88112","volume":"6","author":"W. T. Miller","year":"1990","unstructured":"Miller, W. T., Hewes, R. P., Glanz, F. H., and Kraft, L. G.: Real-time dynamic control of an industrial manipulator using a neural-network-based learning controller, IEEE Trans. Robot. Automat.\n6(1) (1990), 1\u20139.","journal-title":"IEEE Trans. Robot. Automat."},{"key":"150354_CR10","doi-asserted-by":"crossref","first-page":"323","DOI":"10.1109\/TSMC.1974.5408453","volume":"14","author":"K. S. Narendra","year":"1974","unstructured":"Narendra, K. S. and Thathachar, M. A. L.: Learning automata \u2013 a survey, IEEE Trans. Systems Man Cybernet.\n14 (1974), 323\u2013334.","journal-title":"IEEE Trans. Systems Man Cybernet."},{"key":"150354_CR11","doi-asserted-by":"crossref","first-page":"126","DOI":"10.1115\/1.3139652","volume":"102","author":"M. H. Raibert","year":"1981","unstructured":"Raibert, M. H. and Craig, J.: Hybrid position\/force control of manipulators, Trans. ASME J. Dyn. Systems Meas. Control\n102 (1981), 126\u2013133.","journal-title":"Trans. ASME J. Dyn. Systems Meas. Control"},{"key":"150354_CR12","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/5236.001.0001","volume-title":"Parallel Distributed Processing","author":"D. E. Rumelhart","year":"1986","unstructured":"Rumelhart, D. E., Hinton, G. E., and Williams, R. J.: Parallel Distributed Processing, MIT Press, Cambridge, MA, 1986."},{"key":"150354_CR13","unstructured":"Song, K. T. and Chu, T. S.: An experimental study of force tracking control by reinforcement learning, in: Proc. 1994 Internat. Symp. on Artificial Neural Networks, Taiwan, 1994, pp. 728\u2013734."},{"key":"150354_CR14","unstructured":"Sun, W. Y.: Control design and experimental study of a robot using reinforcement learning, Master thesis, National Chiao Tung Univ., 1995."},{"issue":"1","key":"150354_CR15","first-page":"9","volume":"3","author":"R. S. Sutton","year":"1988","unstructured":"Sutton, R. S.: Learning to predict by the method of temporal difference, Machine Learning\n3(1) (1988), 9\u201344.","journal-title":"Machine Learning"},{"key":"150354_CR16","doi-asserted-by":"crossref","unstructured":"Werbos, P. J.: Generalization of back propagation with application to a recurrent gas market model, Neural Networks\n1 (October, 1988), 339\u2013356.","DOI":"10.1016\/0893-6080(88)90007-X"},{"key":"150354_CR17","volume-title":"Adaptive Signal Processing","author":"B. Widrow","year":"1985","unstructured":"Widrow, B. and Stearns, S. D.: Adaptive Signal Processing, Prentice-Hall, Englewood Cliffs, NJ, 1985."}],"container-title":["Journal of Intelligent and Robotic Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1007904418265.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1023\/A:1007904418265\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1007904418265.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,6]],"date-time":"2025-06-06T14:27:41Z","timestamp":1749220061000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1023\/A:1007904418265"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[1998,3]]},"references-count":17,"journal-issue":{"issue":"3","published-print":{"date-parts":[[1998,3]]}},"alternative-id":["150354"],"URL":"https:\/\/doi.org\/10.1023\/a:1007904418265","relation":{},"ISSN":["0921-0296","1573-0409"],"issn-type":[{"type":"print","value":"0921-0296"},{"type":"electronic","value":"1573-0409"}],"subject":[],"published":{"date-parts":[[1998,3]]}}}