{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,20]],"date-time":"2025-10-20T10:18:59Z","timestamp":1760955539248,"version":"3.28.0"},"reference-count":25,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014,11]]},"DOI":"10.1109\/humanoids.2014.7041417","type":"proceedings-article","created":{"date-parts":[[2015,2,18]],"date-time":"2015-02-18T00:28:49Z","timestamp":1424219329000},"page":"554-559","source":"Crossref","is-referenced-by-count":3,"title":["Efficient reuse of previous experiences in humanoid motor learning"],"prefix":"10.1109","author":[{"given":"Norikazu","family":"Sugimoto","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Voot","family":"Tangkaratt","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Thijs","family":"Wensveen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tingting","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Masashi","family":"Sugiyama","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jun","family":"Morimoto","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/Humanoids.2011.6100908"},{"article-title":"Temporal Credit Assignment in Reinforcement Learning","year":"1984","author":"sutton","key":"ref11"},{"key":"ref12","doi-asserted-by":"crossref","DOI":"10.1109\/TNN.1998.712192","article-title":"Reinforcement Learning: An Introduction","author":"sutton","year":"1998"},{"key":"ref13","first-page":"538","article-title":"The optimal reward baseline for gradient-based reinforcement learning","author":"weaver","year":"2001","journal-title":"Processings of the Seventeeth Conference on Uncertainty in Artificial Intelligence"},{"article-title":"Toward a theory of reinforcement-learning connectionist systems","year":"1988","author":"williams","key":"ref14"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2011.09.005"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1162\/NECO_a_00452"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2007.11.026"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1162\/NECO_a_00199"},{"key":"ref4","first-page":"1471","article-title":"Variance reduction techniques for gradient estimates in reinforcement learning","volume":"5","author":"greensmith","year":"2004","journal-title":"Journal of Machine Learning Research"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4757-2553-7"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2006.05.012"},{"key":"ref5","first-page":"1531","article-title":"A natural policy gradient","author":"kakade","year":"2002","journal-title":"Advances in Neural Information Processing Systems 14"},{"key":"ref8","article-title":"The SL simulation and real-time control software package","author":"schaal","year":"2009","journal-title":"Technical Report"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2006.282564"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1163\/156855307781389356"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2009.12.004"},{"key":"ref1","first-page":"12","article-title":"How Can A Robot Learn From Watching A Human?","author":"atkeson","year":"1997","journal-title":"Proceedings of the Fourteenth International Conference on Machine Learning"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1989.1.2.281"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2009.12.004"},{"key":"ref24","article-title":"Nonparametric Representation of Policies and Value Functions: A Trajectory-Based Approach","author":"atkeson","year":"2002","journal-title":"NIPS"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/HUMANOIDS.2013.7030010"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2009.191"}],"event":{"name":"2014 IEEE-RAS 14th International Conference on Humanoid Robots (Humanoids 2014)","start":{"date-parts":[[2014,11,18]]},"location":"Madrid, Spain","end":{"date-parts":[[2014,11,20]]}},"container-title":["2014 IEEE-RAS International Conference on Humanoid Robots"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7028729\/7041308\/07041417.pdf?arnumber=7041417","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,23]],"date-time":"2017-06-23T05:45:47Z","timestamp":1498196747000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7041417\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,11]]},"references-count":25,"URL":"https:\/\/doi.org\/10.1109\/humanoids.2014.7041417","relation":{},"subject":[],"published":{"date-parts":[[2014,11]]}}}