{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,30]],"date-time":"2026-03-30T12:46:53Z","timestamp":1774874813875,"version":"3.50.1"},"publisher-location":"Berlin, Heidelberg","reference-count":12,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642049200","type":"print"},{"value":"9783642049217","type":"electronic"}],"license":[{"start":{"date-parts":[[2009,1,1]],"date-time":"2009-01-01T00:00:00Z","timestamp":1230768000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2009]]},"DOI":"10.1007\/978-3-642-04921-7_39","type":"book-chapter","created":{"date-parts":[[2009,9,22]],"date-time":"2009-09-22T11:46:25Z","timestamp":1253619985000},"page":"380-390","source":"Crossref","is-referenced-by-count":11,"title":["A Cat-Like Robot Real-Time Learning to Run"],"prefix":"10.1007","author":[{"given":"Pawe\u0142","family":"Wawrzy\u0144ski","sequence":"first","affiliation":[]}],"member":"297","reference":[{"key":"39_CR1","doi-asserted-by":"crossref","unstructured":"Bartlett, P.L., Baxter, J.: Stochastic optimization of controlled partially observable markov decision processes. In: Proc. of the 39th IEEE Conf. on Decision and Control (CDC 2000), vol.\u00a01, pp. 124\u2013129 (2000)","DOI":"10.1109\/CDC.2000.912744"},{"key":"39_CR2","first-page":"834","volume":"13","author":"A.G. Barto","year":"1983","unstructured":"Barto, A.G., Sutton, R.S., Anderson, C.W.: Neuronlike adaptive elements that can learn difficult learning control problems. IEEE Trans. on SMC\u00a013, 834\u2013846 (1983)","journal-title":"IEEE Trans. on SMC"},{"key":"39_CR3","unstructured":"Bhatnagar, S., Sutton, R.S., Ghavamzadeh, M., Lee, M.: Incremental natural actor-critic algorithms. In: Advances in NIPS, vol.\u00a021 (2008)"},{"key":"39_CR4","doi-asserted-by":"publisher","first-page":"341","DOI":"10.1080\/019697299125127","volume":"30","author":"P. Cichosz","year":"1999","unstructured":"Cichosz, P.: An analysis of experience replay in temporal difference learning. Cybernetics and Systems\u00a030, 341\u2013363 (1999)","journal-title":"Cybernetics and Systems"},{"key":"39_CR5","unstructured":"Kimura, H., Kobayashi, S.: An analysis of actor\/critic algorithm using eligibility traces: Reinforcement learning with imperfect value functions. In: Proc. of the 15th ICML, pp. 278\u2013286 (1998)"},{"issue":"4","key":"39_CR6","doi-asserted-by":"publisher","first-page":"1143","DOI":"10.1137\/S0363012901385691","volume":"42","author":"V. Konda","year":"2003","unstructured":"Konda, V., Tsitsiklis, J.: Actor-critic algorithms. SIAM Journal on Control and Optimization\u00a042(4), 1143\u20131166 (2003)","journal-title":"SIAM Journal on Control and Optimization"},{"key":"39_CR7","unstructured":"Lin, L.-J.: Reinforcement learning for robots using neural networks. Ph.D thesis, Carnegie Mellon University, Pittsburgh, PA, USA (1992)"},{"issue":"2-3","key":"39_CR8","doi-asserted-by":"publisher","first-page":"311","DOI":"10.1016\/0004-3702(92)90058-6","volume":"55","author":"S. Mahadevan","year":"1992","unstructured":"Mahadevan, S., Connell, J.: Automatic programming of behavior-based robots using reinforcement learning. Artificial Intelligence\u00a055(2-3), 311\u2013365 (1992)","journal-title":"Artificial Intelligence"},{"key":"39_CR9","volume-title":"Reinforcement Learning: An Introduction","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"key":"39_CR10","first-page":"279","volume":"8","author":"C. Watkins","year":"1992","unstructured":"Watkins, C., Dayan, P.: Q-learning. Machine Learning\u00a08, 279\u2013292 (1992)","journal-title":"Machine Learning"},{"key":"39_CR11","doi-asserted-by":"crossref","unstructured":"Wawrzy\u0144ski, P.: Learning to control a 6-degree-of-freedom walking robot. In: Proc. of EUROCON 2007, pp. 698\u2013705 (2007)","DOI":"10.1109\/EURCON.2007.4400335"},{"key":"39_CR12","unstructured":"Wawrzy\u0144ski, P., Pacut, A.: Truncated importance sampling for reinforcement learning with experience replay. In: Proc. CSIT Int. Multiconf., pp. 305\u2013315 (2007)"}],"container-title":["Lecture Notes in Computer Science","Adaptive and Natural Computing Algorithms"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-04921-7_39","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,22]],"date-time":"2019-05-22T19:27:34Z","timestamp":1558553254000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-04921-7_39"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009]]},"ISBN":["9783642049200","9783642049217"],"references-count":12,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-04921-7_39","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2009]]}}}