{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,3]],"date-time":"2026-05-03T04:51:59Z","timestamp":1777783919339,"version":"3.51.4"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2021,6,3]],"date-time":"2021-06-03T00:00:00Z","timestamp":1622678400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,6,3]],"date-time":"2021-06-03T00:00:00Z","timestamp":1622678400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Intell Robot Syst"],"published-print":{"date-parts":[[2021,7]]},"DOI":"10.1007\/s10846-021-01355-9","type":"journal-article","created":{"date-parts":[[2021,6,3]],"date-time":"2021-06-03T12:03:21Z","timestamp":1622721801000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":27,"title":["Learning Humanoid Robot Running Motions with Symmetry Incentive through Proximal Policy Optimization"],"prefix":"10.1007","volume":"102","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2599-6265","authenticated-orcid":false,"given":"Luckeciano C.","family":"Melo","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1263-5110","authenticated-orcid":false,"given":"Dicksiano C.","family":"Melo","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2944-4476","authenticated-orcid":false,"given":"Marcos R. O. A.","family":"Maximo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,6,3]]},"reference":[{"issue":"1","key":"1355_CR1","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1609\/aimag.v18i1.1276","volume":"18","author":"H Kitano","year":"1997","unstructured":"Kitano, H., Asada, M., Kuniyoshi, Y., Noda, I., Osawa, E., Matsubara, H.: Robocup: A challenge problem for ai. AI Mag. 18(1), 73 (1997). https:\/\/doi.org\/10.1609\/aimag.v18i1.1276, https:\/\/aaai.org\/ojs\/index.php\/aimagazine\/article\/view\/1276","journal-title":"AI Mag."},{"key":"1355_CR2","doi-asserted-by":"crossref","unstructured":"Gouaillier, D., Hugel, V., Blazevic, P., Kilner, C., Monceaux, J., Lafourcade, P., Marnier, B., Serre, J., Maisonnier, B.: Mechatronic design of nao humanoid. In: 2009 IEEE International conference on robotics and automation, pp. 769\u2013774 (2009)","DOI":"10.1109\/ROBOT.2009.5152516"},{"key":"1355_CR3","unstructured":"Melo, L.C., Maximo, M.R.O.A., da Cunha, A.M.: Learning humanoid robot motions through deep neural networks. In: Proceedings of the II brazilian humanoid robot workshop (BRAHUR) and II brazilian workshop on service robotics (BRASERO), pp. 74\u201379. https:\/\/fei.edu.br\/brahurbrasero2019\/Proceedings_BRAHUR_BRASERO_2019.pdf (2019)"},{"issue":"3","key":"1355_CR4","doi-asserted-by":"publisher","first-page":"172988141667513","DOI":"10.1177\/1729881416675135","volume":"14","author":"MROA Maximo","year":"2017","unstructured":"Maximo, M.R.O.A., Colombini, E.L., Ribeiro, C.H.: Stable and fast model-free walk with arms movement for humanoid robots. Int. J. Adv. Robot. Syst. 14 (3), 1729881416675135 (2017). https:\/\/doi.org\/10.1177\/1729881416675135","journal-title":"Int. J. Adv. Robot. Syst."},{"key":"1355_CR5","unstructured":"Farchy, A., Barrett, S., MacAlpine, P., Stone, P.: Humanoid robots learning to walk faster: From the real world to simulation and back. In: Proc. of 12th Int. Conf. on autonomous agents and multiagent systems (AAMAS) (2013)"},{"key":"1355_CR6","doi-asserted-by":"crossref","unstructured":"Kuindersma, S., Permenter, F., Tedrake, R.: An Efficiently Solvable Quadratic Program for Stabilizing Dynamic Locomotion. In: Proceedings of the 2014 IEEE International Conference on Robotics and Automation (ICRA). IEEE, Hong Kong, China (2014)","DOI":"10.1109\/ICRA.2014.6907230"},{"key":"1355_CR7","unstructured":"Kajita, S., Kanehiro, F., Kaneko, K., Yokoi, K., Hirukawa, H.: The 3D Linear Inverted Pendulum Mode: A simple modeling for a biped walking pattern generation. In: Proceedings of the 2001 IEEE\/RSJ International Conference on Intelligent Robots and Systems. IEEE, Hawaii, USA (2001)"},{"key":"1355_CR8","first-page":"1082","volume":"307","author":"S Collins","year":"2005","unstructured":"Collins, S., Ruina, A., Tedrake, R., Wisse, M.: Efficient bipedal robots based on passive dynamic walkers. Science Magazine 307, 1082\u20131085 (2005)","journal-title":"Science Magazine"},{"key":"1355_CR9","doi-asserted-by":"crossref","unstructured":"Muniz, F., Maximo, M.R.O.A., Ribeiro, C.H.C.: Keyframe movement optimization for simulated humanoid robot using a parallel optimization framework. In: 2016 XIII Latin American Robotics Symposium and IV Brazilian Robotics Symposium (LARS\/SBR), pp. 79\u201384 (2016)","DOI":"10.1109\/LARS-SBR.2016.20"},{"key":"1355_CR10","unstructured":"Fischer, J., Dorer, K.: Learning a walk behavior utilizing toes from scratch. https:\/\/archive.robocup.info\/Soccer\/Simulation\/3D\/FCPs\/RoboCup\/2019\/magmaOffenburg_SS3D_RC2019_FCP.pdf (2019)"},{"key":"1355_CR11","doi-asserted-by":"crossref","unstructured":"Abreu, M., Simes, D., Lau, N., Reis, L.P.: Fast, human-like running and sprinting. https:\/\/archive.robocup.info\/Soccer\/Simulation\/3D\/FCPs\/RoboCup\/2019\/FCPortugal_SS3D_RC2019_FCP.pdf (2019)","DOI":"10.1007\/978-3-030-35699-6_1"},{"key":"1355_CR12","doi-asserted-by":"crossref","unstructured":"Abrel, M., Reis, L.P., Lau, N.: Learning to run faster in a humanoid robot soccer environment through reinforcement learning. In: Proceedings of the 2019 RoboCup symposium. RoboCup, Sydney, Australia (2019)","DOI":"10.1007\/978-3-030-35699-6_1"},{"key":"1355_CR13","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. CoRR abs\/1707.06347. arXiv:1707.06347 (2017)"},{"key":"1355_CR14","doi-asserted-by":"crossref","unstructured":"Abdolhosseini, F., Ling, H.Y., Xie, Z., Peng, X., Panne, M.V.D.: On learning symmetric locomotion. Motion, Interaction and Games (2019)","DOI":"10.1145\/3359566.3360070"},{"key":"1355_CR15","doi-asserted-by":"crossref","unstructured":"Carvalho Melo, L., Omena Albuquerque M\u00e1ximo, M.R.: Learning humanoid robot running skills through proximal policy optimization. In: 2019 Latin american robotics symposium (LARS), 2019 Brazilian symposium on robotics (SBR) and 2019 workshop on robotics in education (WRE), pp. 37\u201342 (2019)","DOI":"10.1109\/LARS-SBR-WRE48964.2019.00015"},{"key":"1355_CR16","volume-title":"Reinforcement learning: An introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement learning: An introduction , 2nd edn. The MIT Press, Cambridge (2018). http:\/\/incompleteideas.net\/book\/the-book-2nd.htmlhttp:\/\/incompleteideas.net\/book\/the-book-2nd.html","edition":"2nd edn."},{"key":"1355_CR17","unstructured":"Schulman, J., Moritz, P., Levine, S., Jordan, M.I., Abbeel, P.: High-dimensional continuous control using generalized advantage estimation. CoRR abs\/1506.02438. arXiv:1506.02438 (2015)"},{"key":"1355_CR18","unstructured":"Schulman, J., Moritz, P., Levine, S., Jordan, M.I., Abbeel, P.: High-dimensional continuous control using generalized advantage estimation. In: Bengio, Y, LeCun, Y (eds.) 4th International conference on learning representations, ICLR 2016, San Juan, Puerto Rico, May 2-4, 2016, Conference Track Proceedings. arXiv:1506.02438 (2016)"},{"key":"1355_CR19","volume-title":"Openai baselines","author":"P Dhariwal","year":"2017","unstructured":"Dhariwal, P., Hesse, C., Klimov, O., Nichol, A., Plappert, M., Radford, A., Schulman, J., Sidor, S., Wu, Y., Zhokhov, P.: Openai baselines. GitHub, San Francisco (2017). https:\/\/github.com\/openai\/baselineshttps:\/\/github.com\/openai\/baselines"},{"key":"1355_CR20","unstructured":"Melo, L.C., Maximo, M.R.O.A., da Cunha, A.M.: Bottom-up meta-policy search. In: Proceedings of the deep reinforcement learning workshop of NeurIPS 2019 (2019)"},{"key":"1355_CR21","doi-asserted-by":"crossref","unstructured":"Carvalho Melo, D, Quartucci Forster, C H, Omena de Albuquerque Mximo, M R: Learning when to kick through deep neural networks. In: 2019 Latin american robotics symposium (LARS), 2019 Brazilian symposium on robotics (SBR) and 2019 workshop on robotics in education (WRE), pp. 43\u201348 (2019)","DOI":"10.1109\/LARS-SBR-WRE48964.2019.00016"},{"key":"1355_CR22","doi-asserted-by":"crossref","unstructured":"MacAlpine, P., Collins, N., Lopez-Mobilia, A., Stone, P.: Ut austin villa: Robocup 2012 3d simulation league champion. In: Chen, X., Stone, P., Sucar, L.E., van der Zant, T. (eds.) RoboCup 2012: Robot soccer world cup XVI, pp. 77\u201388. Springer Berlin Heidelberg, Berlin, Heidelberg (2013)","DOI":"10.1007\/978-3-642-39250-4_8"},{"key":"1355_CR23","doi-asserted-by":"crossref","unstructured":"Abdolmaleki, A., Sim\u00f5es, D, Lau, N., Reis, L.P., Neumann, G.: Learning a humanoid kick with controlled distance. In: Behnke, S., Sheh, R., Sar\u0131el, S., Lee, D.D. (eds.) RoboCup 2016: Robot world cup XX, pp. 45\u201357. Springer International Publishing, Cham (2017)","DOI":"10.1007\/978-3-319-68792-6_4"},{"key":"1355_CR24","doi-asserted-by":"crossref","unstructured":"Depinet, M., MacAlpine, P., Stone, P.: Keyframe sampling, optimization, and behavior integration: Towards long-distance kicking in the robocup 3d simulation league. In: Bianchi, R.A.C., Akin, H.L., Ramamoorthy, S., Sugiura, K. (eds.) RoboCup-2014: Robot soccer world cup XVIII, Lecture Notes in Artificial Intelligence. Springer Verlag. Berlin (2015)","DOI":"10.1007\/978-3-319-18615-3_47"},{"key":"1355_CR25","doi-asserted-by":"crossref","unstructured":"MacAlpine, P., Stone, P.: UT Austin Villa: RoboCup 2017 3D simulation league competition and technical challenges champions. In: Sammut, C., Obst, O., Tonidandel, F., Akyama, H. (eds.) RoboCup 2017: Robot soccer world cup XXI. Lecture Notes in Artificial Intelligence, Springer (2018)","DOI":"10.1007\/978-3-030-00308-1_39"},{"key":"1355_CR26","unstructured":"Urieli, D., MacAlpine, P., Kalyanakrishnan, S., Bentor, Y., Stone, P.: On optimizing interdependent skills: A case study in simulated 3d humanoid robot soccer. In: Tumer, K., Yolum, P., Sonenberg, L., Stone, P. (eds.) Proc. of 10th Int. Conf. on autonomous agents and multiagent systems (AAMAS), vol. 2, pp. 769\u2013776. IFAAMAS (2011)"},{"key":"1355_CR27","unstructured":"MacAlpine, P., Barrett, S., Urieli, D., Vu, V., Stone, P.: Design and optimization of an omnidirectional humanoid walk: A winning approach at the RoboCup 2011 3D simulation competition. In: Proceedings of the twenty-sixth AAAI conference on artificial intelligence (AAAI) (2012)"},{"key":"1355_CR28","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1016\/j.artint.2017.09.001","volume":"254","author":"P MacAlpine","year":"2018","unstructured":"MacAlpine, P., Stone, P.: Overlapping layered learning. Artif. Intell. 254, 21\u201343 (2018). https:\/\/doi.org\/10.1016\/j.artint.2017.09.001. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0004370217301066","journal-title":"Artif. Intell."},{"key":"1355_CR29","doi-asserted-by":"crossref","unstructured":"Dorer, K.: Learning to use toes in a humanoid robot. In: Akiyama, H., Obst, O., Sammut, C., Tonidandel, F. (eds.) RoboCup 2017: Robot world cup XXI, pp. 168\u2013179. Springer International Publishing, Cham (2018)","DOI":"10.1007\/978-3-030-00308-1_14"},{"key":"1355_CR30","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., Riedmiller, M.: Playing atari with deepreinforcement learning. arXiv:1312.5602, Cite arxiv:1312.5602Comment: NIPS Deep Learning Workshop 2013 (2013)"},{"key":"1355_CR31","unstructured":"Mnih, V., Badia, A.P., Mirza, M., Graves, A., Harley, T., Lillicrap, T.P., Silver, D., Kavukcuoglu, K.: Asynchronous methods for deep reinforcement learning. In: Proceedings of the 33rd international conference on international conference on machine learning - Volume 48, ICML\u201916, pp. 1928\u20131937. JMLR.org (2016)"},{"key":"1355_CR32","unstructured":"Wang, Z., Bapst, V., Heess, N., Mnih, V., Munos, R., Kavukcuoglu, K., de Freitas, N.: Sample efficient actor-critic with experience replay. arXiv:1611.01224 (2016)"},{"key":"1355_CR33","unstructured":"Schulman, J., Levine, S., Moritz, P., Jordan, M.I., Abbeel, P.: Trust region policy optimization (2017)"},{"key":"1355_CR34","unstructured":"Heess, N., TB, D., S, S., Lemmon, J., Merel, J., Wayne, G., Tassa, Y., Erez, T., Wang, Z., Eslami, S.M.A., Riedmiller, M., Silver, D.: Emergence of locomotion behaviours in rich environments. arXiv (20 17)"},{"issue":"4","key":"1355_CR35","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3197517.3201311","volume":"37","author":"XB Peng","year":"2018","unstructured":"Peng, X.B., Abbeel, P., Levine, S., van de Panne, M.: Deepmimic: Example-guided deep reinforcement learning of physics-based character skills. ACM Trans. Graph. 37(4), 1\u201314 (2018). https:\/\/doi.org\/10.1145\/3197517.3201311","journal-title":"ACM Trans. Graph."},{"key":"1355_CR36","unstructured":"Melo, L.C.: Imitation learning and meta-learning for optimizing humanoid robot motions. Master\u2019s Thesis, Instituto Tecnol\u00f3gico de Aeron\u00e1utica (2019)"},{"key":"1355_CR37","volume-title":"Simspark","author":"H Vatankhah","year":"2018","unstructured":"Vatankhah, H., Lau, N., MacAlpine, P., van Dijk, S., Glaser, S.: Simspark. Gitlab, San Francisco (2018). https:\/\/gitlab.com\/robocup-sim\/SimSpark"},{"key":"1355_CR38","unstructured":"Maximo, M.R.O.A., Ribeiro, C.H.C.: ZMP-based humanoid walking engine with arms movement and stabilization. In: Proceedings of the 2016 Congresso Brasileiro de Autom\u00e1tica (CBA). SBA, Vit\u00f3ria, ES, Brazil (2016)"},{"key":"1355_CR39","doi-asserted-by":"crossref","unstructured":"Xu, Y., Vatankhah, H.: Simspark: An open source robot simulator developed by the robocup community. In: Behnke, S., Veloso, M., Visser, A., Xiong, R. (eds.) RoboCup 2013: Robot world cup XVII, pp. 632\u2013639. Springer Berlin Heidelberg, Berlin, Heidelberg (2014)","DOI":"10.1007\/978-3-662-44468-9_59"},{"key":"1355_CR40","doi-asserted-by":"crossref","unstructured":"MacAlpine, P., Collins, N., Lopez-Mobilia, A., Stone, P.: UT Austin Villa: RoboCup 2012 3D simulation league champion. In: Chen, X., Stone, P., Sucar, L.E., der Zant, T.V. (eds.) RoboCup-2012: Robot soccer world cup XVI, Lecture notes in artificial intelligence. Springer Verlag, Berlin (2013)","DOI":"10.1007\/978-3-642-39250-4_8"},{"key":"1355_CR41","unstructured":"Intel: Intel devcloud. https:\/\/software.intel.com\/en-us\/ai-academy\/devcloud (2018)"},{"key":"1355_CR42","unstructured":"et al, M.A.: TensorFlow: Large-scale machine learning on heterogeneous systems. https:\/\/www.tensorflow.org\/, Software available from tensorflow.org (2015)"}],"container-title":["Journal of Intelligent &amp; Robotic Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10846-021-01355-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10846-021-01355-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10846-021-01355-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,29]],"date-time":"2022-12-29T16:32:24Z","timestamp":1672331544000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10846-021-01355-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6,3]]},"references-count":42,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2021,7]]}},"alternative-id":["1355"],"URL":"https:\/\/doi.org\/10.1007\/s10846-021-01355-9","relation":{},"ISSN":["0921-0296","1573-0409"],"issn-type":[{"value":"0921-0296","type":"print"},{"value":"1573-0409","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,6,3]]},"assertion":[{"value":"16 October 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 February 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 June 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"<!--Emphasis Type='Bold' removed-->Conflict of Interests"}}],"article-number":"54"}}