{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,5]],"date-time":"2026-01-05T22:11:41Z","timestamp":1767651101932},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2009,8,1]],"date-time":"2009-08-01T00:00:00Z","timestamp":1249084800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Auton Robot"],"published-print":{"date-parts":[[2009,8]]},"DOI":"10.1007\/s10514-009-9133-z","type":"journal-article","created":{"date-parts":[[2009,8,31]],"date-time":"2009-08-31T13:14:38Z","timestamp":1251724478000},"page":"131-144","source":"Crossref","is-referenced-by-count":31,"title":["Nonparametric representation of an approximated Poincar\u00e9 map for learning biped locomotion"],"prefix":"10.1007","volume":"27","author":[{"given":"Jun","family":"Morimoto","sequence":"first","affiliation":[]},{"given":"Christopher G.","family":"Atkeson","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2009,9,1]]},"reference":[{"key":"9133_CR1","first-page":"1","volume-title":"Proceedings of the 23rd international conference on machine learning","author":"P. Abbeel","year":"2006","unstructured":"Abbeel, P., Quigley, M., & Ng, A. Y. (2006). Using inaccurate models in reinforcement learning. In Proceedings of the 23rd international conference on machine learning (pp.\u00a01\u20138). New York: ACM."},{"key":"9133_CR2","first-page":"1008","volume-title":"Advances in neural information processing systems 10","author":"C. G. Atkeson","year":"1998","unstructured":"Atkeson, C. G. (1998). Nonparametric model-based reinforcement learning. In M.I. Jordan, M. Kearns & S. Solla (Eds.), Advances in neural information processing systems 10 (pp.\u00a01008\u20131014). Cambridge: MIT."},{"key":"9133_CR3","first-page":"12","volume-title":"Proc. 14th international conference on machine learning","author":"C. G. Atkeson","year":"1997","unstructured":"Atkeson, C. G., & Schaal, S. (1997). Robot learning from demonstration. In Proc. 14th international conference on machine learning (pp.\u00a012\u201320). San Mateo: Morgan Kaufmann."},{"key":"9133_CR4","unstructured":"Bagnell, A., & Schneider, J. (2003). Covariant policy search. In Proceedings of the eighteenth international joint conference on artificial intelligence (pp.\u00a01019\u20131024)."},{"key":"9133_CR5","first-page":"968","volume-title":"Advances in neural information processing systems 11","author":"L. C. Baird","year":"1999","unstructured":"Baird, L. C., & Moore, A. W. (1999). Gradient descent for general reinforcement learning. In Advances in neural information processing systems 11 (pp.\u00a0968\u2013974). Cambridge: MIT."},{"key":"9133_CR6","doi-asserted-by":"crossref","first-page":"283","DOI":"10.1016\/S0921-8890(97)00043-2","volume":"22","author":"H. Benbrahim","year":"1997","unstructured":"Benbrahim, H., & Franklin, J. (1997). Biped dynamic walking using reinforcement learning. Robotics and Autonomous Systems, 22, 283\u2013302.","journal-title":"Robotics and Autonomous Systems"},{"key":"9133_CR7","volume-title":"Pattern recognition and machine learning","author":"C. M. Bishop","year":"2006","unstructured":"Bishop, C. M. (2006). Pattern recognition and machine learning. New York: Springer."},{"key":"9133_CR8","doi-asserted-by":"crossref","unstructured":"Byl, K., & Tedrake, R. (2008). Metastable walking on stochastically rough terrain. In Proceedings of robotics: science and systems IV, Zurich, Switzerland, June 2008.","DOI":"10.15607\/RSS.2008.IV.030"},{"key":"9133_CR9","first-page":"1939","volume":"6","author":"J. Q. Candela","year":"2005","unstructured":"Candela, J. Q., & Rasmussen, C. E. (2005). A unifying view of sparse approximate Gaussian process regression. Journal of Machine Learning Research, 6, 1939\u20131959.","journal-title":"Journal of Machine Learning Research"},{"key":"9133_CR10","first-page":"457","volume-title":"Proceedings of fifteenth conference on uncertainty in artificial intelligence","author":"R. Dearden","year":"1999","unstructured":"Dearden, R., Friedman, N., & Andre, D. (1999). Model based Bayesian exploration. In Proceedings of fifteenth conference on uncertainty in artificial intelligence (pp.\u00a0457\u2013464). San Francisco: Morgan Kaufmann."},{"key":"9133_CR11","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1007\/s004220050558","volume":"82","author":"R. Q. V. Linde der","year":"1999","unstructured":"der Linde, R. Q. V. (1999). Passive bipedal walking with phasic muscle contraction. Biological Cybernetics, 82, 227\u2013237.","journal-title":"Biological Cybernetics"},{"issue":"1","key":"9133_CR12","doi-asserted-by":"crossref","first-page":"219","DOI":"10.1162\/089976600300015961","volume":"12","author":"K. Doya","year":"2000","unstructured":"Doya, K. (2000). Reinforcement learning in continuous time and space. Neural Computation, 12(1), 219\u2013245.","journal-title":"Neural Computation"},{"issue":"2","key":"9133_CR13","doi-asserted-by":"crossref","first-page":"213","DOI":"10.1177\/0278364907084980","volume":"27","author":"G. Endo","year":"2008","unstructured":"Endo, G., Morimoto, J., Matsubara, T., Nakanishi, J., & Cheng, G. (2008). Learning CPG-based biped locomotion with a policy gradient method: application to a humanoid robot. International Journal of Robotics Research, 27(2), 213\u2013228.","journal-title":"International Journal of Robotics Research"},{"key":"9133_CR14","doi-asserted-by":"crossref","first-page":"457","DOI":"10.7551\/mitpress\/7503.003.0062","volume-title":"Advances in neural information processing systems 19","author":"M. Ghavamzadeh","year":"2007","unstructured":"Ghavamzadeh, M., & Engel, Y. (2007). Bayesian policy gradient algorithms. In B. Scholkopf, J. Platt & T. Hofmann (Eds.), Advances in neural information processing systems 19 (pp.\u00a0457\u2013464). Cambridge: MIT."},{"key":"9133_CR15","doi-asserted-by":"crossref","unstructured":"Hirai, K., Hirose, M., & Takenaka, T. (1998). The development of Honda humanoid robot. In Proceedings of the 1998 IEEE international conference on robotics and automation (pp.\u00a0160\u2013165).","DOI":"10.1109\/ROBOT.1998.677288"},{"key":"9133_CR16","doi-asserted-by":"crossref","unstructured":"Howard, M., Klanke, S., Gienger, M., Goerick, C., & Vijayakumar, S. (2009). A novel method for learning policies from variable constraint data. Autonomous Robots (same special issue, Part B).","DOI":"10.1007\/s10514-009-9129-8"},{"issue":"5","key":"9133_CR17","doi-asserted-by":"crossref","first-page":"884","DOI":"10.1109\/TRO.2007.904896","volume":"23","author":"S. Hyon","year":"2007","unstructured":"Hyon, S., Hale, J. G., & Cheng, G. (2007). Full-body compliant human-humanoid interaction: Balancing in the presence of unknown external forces. IEEE Transactions on Robotics, 23(5), 884\u2013898.","journal-title":"IEEE Transactions on Robotics"},{"key":"9133_CR18","first-page":"345","volume-title":"Advances in neural information processing systems 7","author":"T. Jaakkola","year":"1995","unstructured":"Jaakkola, T., Singh, S. P., & Jordan, M. I. (1995). Reinforcement learning algorithm for partially observable Markov decision problems. In G. Tesauro, D. Touretzky & T. Leen (Eds.), Advances in neural information processing systems 7 (pp.\u00a0345\u2013352). Cambridge: MIT."},{"issue":"2","key":"9133_CR19","doi-asserted-by":"crossref","first-page":"63","DOI":"10.1109\/MRA.2007.380655","volume":"14","author":"S. Kajita","year":"2007","unstructured":"Kajita, S., Nagasaki, T., Kaneko, K., & Hirukawa, H. (2007). ZMP-based biped running control. Robotics and Automation Magazine, IEEE, 14(2), 63\u201372.","journal-title":"Robotics and Automation Magazine, IEEE"},{"key":"9133_CR20","first-page":"1531","volume-title":"Advances in neural information processing systems 14","author":"S. Kakade","year":"2002","unstructured":"Kakade, S. (2002). A natural policy gradient. In Advances in neural information processing systems 14 (pp.\u00a01531\u20131536). Cambridge: MIT."},{"key":"9133_CR21","unstructured":"Kimura, H., & Kobayashi, S. (1998). An analysis of actor\/critic algorithms using eligibility traces: reinforcement learning with imperfect value functions. In Proceedings of the 15th int. conf. on machine learning (pp.\u00a0284\u2013292)."},{"key":"9133_CR22","doi-asserted-by":"crossref","unstructured":"Ko, J., & Fox, D. (2009). GP-BayesFilters: Bayesian filtering using Gaussian process prediction and observation models. Autonomous Robots (same special issue, Part A).","DOI":"10.1007\/s10514-009-9119-x"},{"issue":"4","key":"9133_CR23","doi-asserted-by":"crossref","first-page":"1143","DOI":"10.1137\/S0363012901385691","volume":"42","author":"V. R. Konda","year":"2003","unstructured":"Konda, V. R., & Tsitsiklis, J. N. (2003). Actor-critic algorithms. SIAM Journal on Control and Optimization, 42(4), 1143\u20131166.","journal-title":"SIAM Journal on Control and Optimization"},{"key":"9133_CR24","unstructured":"Kuvayev, L., & Sutton, R. (1996). Model-based reinforcement learning with an approximate, learned model. In Proceedings of the ninth Yale workshop on adaptive and learning systems (pp.\u00a0101\u2013105)."},{"issue":"11","key":"9133_CR25","doi-asserted-by":"crossref","first-page":"911","DOI":"10.1016\/j.robot.2006.05.012","volume":"54","author":"T. Matsubara","year":"2006","unstructured":"Matsubara, T., Morimoto, J., Nakanishi, J., Sato, M., & Doya, K. (2006). Learning CPG-based biped locomotion with a policy gradient method. Robotics and Autonomous Systems, 54(11), 911\u2013920.","journal-title":"Robotics and Autonomous Systems"},{"issue":"2","key":"9133_CR26","doi-asserted-by":"crossref","first-page":"62","DOI":"10.1177\/027836499000900206","volume":"9","author":"T. McGeer","year":"1990","unstructured":"McGeer, T. (1990). Passive dynamic walking. International Journal of Robotics Research, 9(2), 62\u201382.","journal-title":"International Journal of Robotics Research"},{"key":"9133_CR27","unstructured":"Meuleau, N., Kim, K. E., & Kaelbling, L. P. (2001). Exploration in gradient-based reinforcement learning. Technical report, AI Memo 2001-003, MIT."},{"issue":"2","key":"9133_CR28","doi-asserted-by":"crossref","first-page":"60","DOI":"10.1177\/027836498400300206","volume":"3","author":"H. Miura","year":"1984","unstructured":"Miura, H., & Shimoyama, I. (1984). Dynamical walk of biped locomotion. International Journal of Robotics Research, 3(2), 60\u201374.","journal-title":"International Journal of Robotics Research"},{"key":"9133_CR29","doi-asserted-by":"crossref","unstructured":"Miyazaki, F., & Arimoto, S. (1981). Implementation of a hierarchical control for biped locomotion. In 8th IFAC (pp.\u00a043\u201348).","DOI":"10.1016\/S1474-6670(17)63748-0"},{"issue":"2","key":"9133_CR30","doi-asserted-by":"crossref","first-page":"41","DOI":"10.1109\/MRA.2007.380654","volume":"14","author":"J. Morimoto","year":"2007","unstructured":"Morimoto, J., & Atkeson, C. G. (2007). Learning biped locomotion: application of Poincar\u00e9-map-based reinforcement learning. IEEE Robotics and Automation Magazine, 14(2), 41\u201351.","journal-title":"IEEE Robotics and Automation Magazine"},{"key":"9133_CR31","doi-asserted-by":"crossref","first-page":"37","DOI":"10.1016\/S0921-8890(01)00113-0","volume":"36","author":"J. Morimoto","year":"2001","unstructured":"Morimoto, J., & Doya, K. (2001). Acquisition of stand-up behavior by a real robot using hierarchical reinforcement learning. Robotics and Autonomous Systems, 36, 37\u201351.","journal-title":"Robotics and Autonomous Systems"},{"key":"9133_CR32","doi-asserted-by":"crossref","unstructured":"Morimoto, J., Endo, G., Nakanishi, J., Hyon, S., Cheng, G., Atkeson, C. G., & Bentivegna, D. (2006). Modulation of simple sinusoidal patterns by a coupled oscillator model for biped walking. In Proceedings of the 2006 IEEE international conference on robotics and automation (pp. 1579\u20131584).","DOI":"10.1109\/ROBOT.2006.1641932"},{"issue":"1","key":"9133_CR33","doi-asserted-by":"crossref","first-page":"185","DOI":"10.1109\/TRO.2008.915457","volume":"24","author":"J. Morimoto","year":"2008","unstructured":"Morimoto, J., Endo, G., Nakanish, J., & Cheng, G. (2008). A biologically inspired biped locomotion strategy for humanoid robots: modulation of sinusoidal patterns by a coupled oscillator model. IEEE Transaction on Robotics, 24(1), 185\u2013191.","journal-title":"IEEE Transaction on Robotics"},{"key":"9133_CR34","unstructured":"Nagasaka, K., Inaba, M., & Inoue, H. (1999). Stabilization of dynamic walk on a humanoid using torso position compliance control. In Proceedings of 17th annual conference on robotics society of Japan (pp.\u00a01193\u20131194)."},{"key":"9133_CR35","doi-asserted-by":"crossref","unstructured":"Nagasaka, K., Kuroki, Y., Suzuki, S., Itoh, Y., & Yamaguchi, J. (2004). Integrated motion control for walking, jumping and running on a small bipedal entertainment robot. In Proceedings of IEEE 2004 international conference on robotics and automation (pp.\u00a03189\u20133194). New Orleans, LA, USA.","DOI":"10.1109\/ROBOT.2004.1308745"},{"issue":"4","key":"9133_CR36","doi-asserted-by":"crossref","first-page":"682","DOI":"10.1016\/j.neunet.2008.02.003","volume":"21","author":"J. Peters","year":"2008","unstructured":"Peters, J., & Schaal, S. (2008). Reinforcement learning of motor skills with policy gradients. Neural Networks, 21(4), 682\u2013697.","journal-title":"Neural Networks"},{"issue":"7\u20139","key":"9133_CR37","doi-asserted-by":"crossref","first-page":"1180","DOI":"10.1016\/j.neucom.2007.11.026","volume":"71","author":"J. Peters","year":"2008","unstructured":"Peters, J., & Schaal, S. (2008). Natural actor-critic. Neurocomputing, 71(7\u20139), 1180\u20131190.","journal-title":"Neurocomputing"},{"key":"9133_CR38","first-page":"751","volume-title":"Advances in neural information processing systems","author":"C. E. Rasmussen","year":"2004","unstructured":"Rasmussen, C. E., & Kuss, M. (2004). Gaussian processes in reinforcement learning. In Advances in neural information processing systems (vol.\u00a016, pp.\u00a0751\u2013759). Cambridge: MIT."},{"key":"9133_CR39","volume-title":"Gaussian processes for machine learning","author":"C. E. Rasmussen","year":"2006","unstructured":"Rasmussen, C. E., & Williams, C. K. I. (2006). Gaussian processes for machine learning. Cambridge: MIT."},{"key":"9133_CR40","doi-asserted-by":"crossref","unstructured":"Riedmiller, M., Gablel, R. H. T., & Lange, S. (2009). Reinforcement learning for robot soccer. Autonomous Robots (same special issue, Part A).","DOI":"10.1007\/s10514-009-9120-4"},{"key":"9133_CR41","doi-asserted-by":"crossref","unstructured":"Shiriaev, A., Robertsson, A., Perram, J., & Sandberg, A. (2005). Periodic motion planning for virtually constrained (hybrid) mechanical systems. In Proceedings of IEEE conference on decision and control (pp.\u00a04035\u20134040).","DOI":"10.1109\/CDC.2005.1582793"},{"key":"9133_CR42","first-page":"619","volume-title":"Advances in neural information processing systems 13","author":"J. Smola","year":"2001","unstructured":"Smola, J., & Bartlett, P. (2001). Sparse greedy Gaussian process regression. In T. G. Diettrich & V. Tresp (Eds.), Advances in neural information processing systems 13 (pp.\u00a0619\u2013625). Cambridge: MIT."},{"key":"9133_CR43","first-page":"1257","volume-title":"Advances in neural information processing systems 18","author":"E. Snelson","year":"2006","unstructured":"Snelson, E., & Ghahramani, Z. (2006). Sparse Gaussian processes using pseudo-inputs. In Y. Weiss, B. Scholkof & J. Platt (Eds.), Advances in neural information processing systems 18 (pp.\u00a01257\u20131264). Cambridge: MIT."},{"key":"9133_CR44","volume-title":"Nonlinear dynamics and chaos","author":"S. H. Strogatz","year":"1994","unstructured":"Strogatz, S. H. (1994). Nonlinear dynamics and chaos. Reading: Addison-Wesley."},{"key":"9133_CR45","unstructured":"Sugihara, T., & Nakamura, Y. (2002). Whole-body cooperative COG control through ZMP manipulation for humanoid robots. In IEEE int. conf. on robotics and automation, Washington DC, USA, 2002."},{"key":"9133_CR46","doi-asserted-by":"crossref","unstructured":"Sugihara, T., & Nakamura, Y. (2005). A fast online gait planning with boundary condition relaxation for humanoid robots. In IEEE int. conf. on robotics and automation (pp.\u00a0306\u2013311). Barcelona, Spain.","DOI":"10.1109\/ROBOT.2005.1570136"},{"key":"9133_CR47","volume-title":"Reinforcement learning: an introduction","author":"R. S. Sutton","year":"1998","unstructured":"Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: an introduction. Cambridge: MIT."},{"key":"9133_CR48","first-page":"1057","volume-title":"Advances in neural information processing systems 12","author":"R. S. Sutton","year":"2000","unstructured":"Sutton, R. S., McAllester, D., Singh, S., & Mansour, Y. (2000). Policy gradient methods for reinforcement learning with function approximation. In Advances in neural information processing systems 12 (pp.\u00a01057\u20131063). Cambridge: MIT."},{"key":"9133_CR49","doi-asserted-by":"crossref","unstructured":"Tedrake, R., Zhang, T. W., & Seung, H. S. (2004). Stochastic policy gradient reinforcement learning on a simple 3D biped. In Proceedings of the 2004 IEEE\/RSJ international conference on intelligent robots and systems (pp.\u00a02849\u20132854).","DOI":"10.1109\/IROS.2004.1389841"},{"key":"9133_CR50","doi-asserted-by":"crossref","unstructured":"Tsuchiya, K., Aoi, S., & Tsujita, K. (2003). Locomotion control of a biped locomotion robot using nonlinear oscillators. In Proceedings of the IEEE\/RSJ international conference on intelligent robots and systems (pp.\u00a01745\u20131750). Las Vegas, NV, USA.","DOI":"10.1109\/IROS.2003.1248896"},{"issue":"6","key":"9133_CR51","doi-asserted-by":"crossref","first-page":"559","DOI":"10.1177\/0278364904044410","volume":"23","author":"E. R. Westervelt","year":"2004","unstructured":"Westervelt, E. R., Buche, G., & Grizzle, J. W. (2004). Experimental validation of a framework for the design of controllers that induce stable walking in planar bipeds. International Journal of Robotics Research, 23(6), 559\u2013582.","journal-title":"International Journal of Robotics Research"},{"key":"9133_CR52","first-page":"514","volume-title":"Advances in neural information processing systems","author":"C. K. I. Williams","year":"1996","unstructured":"Williams, C. K. I., & Rasmussen, C. E. (1996). Gaussian processes for regression. In Advances in neural information processing systems (vol.\u00a08, pp.\u00a0514\u2013520). Cambridge: MIT."}],"container-title":["Autonomous Robots"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10514-009-9133-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10514-009-9133-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10514-009-9133-z","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,16]],"date-time":"2024-03-16T05:38:29Z","timestamp":1710567509000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10514-009-9133-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,8]]},"references-count":52,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2009,8]]}},"alternative-id":["9133"],"URL":"https:\/\/doi.org\/10.1007\/s10514-009-9133-z","relation":{},"ISSN":["0929-5593","1573-7527"],"issn-type":[{"value":"0929-5593","type":"print"},{"value":"1573-7527","type":"electronic"}],"subject":[],"published":{"date-parts":[[2009,8]]}}}