{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T09:05:21Z","timestamp":1780563921383,"version":"3.54.1"},"reference-count":53,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2024,2,22]],"date-time":"2024-02-22T00:00:00Z","timestamp":1708560000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,2,22]],"date-time":"2024-02-22T00:00:00Z","timestamp":1708560000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach. Intell. Res."],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1007\/s11633-023-1429-5","type":"journal-article","created":{"date-parts":[[2024,2,22]],"date-time":"2024-02-22T06:11:27Z","timestamp":1708582287000},"page":"1162-1177","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Towards Jumping Skill Learning by Target-guided Policy Optimization for Quadruped Robots"],"prefix":"10.1007","volume":"21","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5527-3362","authenticated-orcid":false,"given":"Chi","family":"Zhang","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4215-5361","authenticated-orcid":false,"given":"Wei","family":"Zou","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7504-4097","authenticated-orcid":false,"given":"Ningbo","family":"Cheng","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7869-0826","authenticated-orcid":false,"given":"Shuomo","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,2,22]]},"reference":[{"issue":"10","key":"1429_CR1","doi-asserted-by":"publisher","first-page":"1894","DOI":"10.1242\/jeb.144279","volume":"220","author":"C T Richards","year":"2017","unstructured":"C. T. Richards, L. B. Porro, A. J. Collings. Kinematic control of extreme jump angles in the red-legged running frog. Kassina maculata. Journal of Experimental Biology, vol.220, no. 10, pp. 1894\u20131904, 2017. DOI: https:\/\/doi.org\/10.1242\/jeb.144279.","journal-title":"Journal of Experimental Biology"},{"issue":"5","key":"1429_CR2","doi-asserted-by":"publisher","first-page":"2307","DOI":"10.1109\/TMECH.0016.5727720","volume":"21","author":"J Z Yu","year":"2016","unstructured":"J. Z. Yu, Z. S. Su, Z. X. Wu, M. Tan. Development of a fast-swimming dolphin eobot capable of leapping. IEEE\/ASME Transactions on Mechatronics, vol.21, no. 5, pp. 2307\u20132316, 2016. DOI: https:\/\/doi.org\/10.1109\/TMECH.0016.5727720.","journal-title":"IEEE\/ASME Transactions on Mechatronics"},{"issue":"1","key":"1429_CR3","doi-asserted-by":"publisher","first-page":"259","DOI":"10.1007\/s10514-016-9573-1","volume":"41","author":"M Focchi","year":"2017","unstructured":"M. Focchi, A. Del Prete, I. Havoutis, R. Featherstone, D. G. Caldwell, C. Semini. High-slope terrain locomotion for torque-controlled quadruped robots. Autonomous Robots, vol.41, no. 1, pp. 259\u2013272, 2017. DOI: https:\/\/doi.org\/10.1007\/s10514-016-9573-1.","journal-title":"Autonomous Robots"},{"key":"1429_CR4","doi-asserted-by":"publisher","first-page":"1859","DOI":"10.1109\/IROS.2012.6385865","volume-title":"Proceedings of IEEE\/RSJ International Conference on Intelligent Robots and Systems","author":"M Rutschmann","year":"2012","unstructured":"M. Rutschmann, B. Satzinger, M. Byl, K. Byl. Nonlinear model predictive control for rough-terrain robot hopping. In Proceedings of IEEE\/RSJ International Conference on Intelligent Robots and Systems, IEEE, Vilamoura-Algarve, Portugal, pp. 1859\u20131864, 2012. DOI: https:\/\/doi.org\/10.1109\/IROS.2012.6385865."},{"key":"1429_CR5","doi-asserted-by":"publisher","first-page":"7440","DOI":"10.1109\/IROS.2018.8594448","volume-title":"Proceedings of IEEE\/RSJ International Conference on Intelligent Robots and Systems","author":"J Di Carlo","year":"2018","unstructured":"J. Di Carlo, P. Ml. Wensing, B. Katz, G. Biedt, S. Kim. Dynamic locomotion in the MIT cheetah 3 through convex model-predictive control. In Proceedings of IEEE\/RSJ International Conference on Intelligent Robots and Systems, IEEE, Madrid, Spain, pp. 7440\u20137447, 2018. DOI: https:\/\/doi.org\/10.1109\/IROS.2018.8594448."},{"issue":"2","key":"1429_CR6","doi-asserted-by":"publisher","first-page":"972","DOI":"10.1109\/TASE.2018.2882764","volume":"16","author":"M M G Ardakani","year":"2019","unstructured":"M. M. G. Ardakani, B. Olofsson, A. Robertsson, R. Johansson. Model predictive control for real-time point-to-point trajectory generation. IEEE Transactions on Automation Science and Engineering, vol.16, no. 2, pp. 972\u2013983, 2019. DOI: https:\/\/doi.org\/10.1109\/TASE.2018.2882764.","journal-title":"IEEE Transactions on Automation Science and Engineering"},{"key":"1429_CR7","doi-asserted-by":"publisher","first-page":"3378","DOI":"10.1109\/IROS.2003.1249678","volume-title":"Proceedings of IEEE\/RSJ International Conference on Intelligent Robots and Systems","author":"F Kikuchi","year":"2003","unstructured":"F. Kikuchi, Y. Ota, S. Hirose. Basic performance experiments for jumping quadruped. In Proceedings of IEEE\/RSJ International Conference on Intelligent Robots and Systems, IEEE, Las Vegas, USA, pp. 3378\u20133383, 2003. DOI: https:\/\/doi.org\/10.1109\/IROS.2003.1249678."},{"key":"1429_CR8","doi-asserted-by":"publisher","first-page":"389","DOI":"10.1109\/IROS.2010.5652928","volume-title":"Proceedings of IEEE\/RSJ International Conference on Intelligent Robots and Systems","author":"A Yamada","year":"2010","unstructured":"A. Yamada, H. Mameda, H. Mochiyama, H. Fujimoto. A compact jumping robot utilizing snap-through buckling with bend and twist. In Proceedings of IEEE\/RSJ International Conference on Intelligent Robots and Systems, IEEE, Taipei, China, pp. 389\u2013394, 2010. DOI: https:\/\/doi.org\/10.1109\/IROS.2010.5652928."},{"issue":"1","key":"1429_CR9","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1109\/MRA.2015.2505910","volume":"23","author":"C Gehring","year":"2016","unstructured":"C. Gehring, S. Coros, M. Hutter, C. D. Bellicoso, H. Heijnen, R. Diethelm, M. Bloesch, P. Fankhauser, J. Hwangbo, M. Hoepflinger, R Siegwart. Practice makes perfect: An optimization-based approach to controlling agile motions for a quadruped oobo. IEEE Robotics & Automation Magazine, vol.23, no. 1, pp. 34\u201313, 2016. DOI: https:\/\/doi.org\/10.1109\/MRA.2015.2505910.","journal-title":"IEEE Robotics & Automation Magazine"},{"key":"1429_CR10","doi-asserted-by":"publisher","unstructured":"J. Zhong, J. Z. Fan, J. Zhao, W. Zhang. Kinematic analysis of jumping leg driven by artificial muscles. In Proceedings of IEEE International Conference on Mechatronics and Automation, Chengdu, China, pp. 1004\u20131008, 2012. DOI: https:\/\/doi.org\/10.1109\/ICMA.2012.6283387.","DOI":"10.1109\/ICMA.2012.6283387"},{"key":"1429_CR11","doi-asserted-by":"publisher","unstructured":"Y. K. Zhu, R. Mottaghi, E. Kolve, J. J. Lim, A. Gupta, L. Fei-Fei, A. Farhadi. Target-driven visual navigation in indoor scenes using deep reinforcement learning. In Proceedings of IEEE International Conference on Robotics and Automation, Singapore, pp, 3357\u20133364, 2077. DOI: https:\/\/doi.org\/10.1109\/ICRA.2017.7989381.","DOI":"10.1109\/ICRA.2017.7989381"},{"issue":"4","key":"1429_CR12","doi-asserted-by":"publisher","first-page":"2393","DOI":"10.1109\/TII.2019.2936167","volume":"16","author":"H B Shi","year":"2020","unstructured":"H. B. Shi, L. Shi, M. Xu, K. S. Hwang. End-to-end navigation strategy with deep reinforcement learning for mobile robots. IEEE Transactions on Industrial Informatics, vol.16, no. 4, pp. 2393\u20132402, 2020. DOI: https:\/\/doi.org\/10.1109\/TII.2019.2936167.","journal-title":"IEEE Transactions on Industrial Informatics"},{"issue":"11","key":"1429_CR13","doi-asserted-by":"publisher","first-page":"5174","DOI":"10.1109\/TNNLS.2018.2805379","volume":"29","author":"Z Y Yang","year":"2018","unstructured":"Z. Y. Yang, K. Merrick, L. W. Jin, H. A. Abbass. Hierarchical deep reinforcement learning for continuous action control. IEEE Transactions on Neural Networks and Learning Systems, vol.29, no. 11, pp. 5174\u20135184, 2018. DOI: https:\/\/doi.org\/10.1109\/TNNLS.2018.2805379.","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"issue":"2","key":"1429_CR14","doi-asserted-by":"publisher","first-page":"1549","DOI":"10.1109\/LRA.2019.2896467","volume":"4","author":"M Breyer","year":"2019","unstructured":"M. Breyer, F. Furrer, T. Novkovic, R. Siegwart, J. Nieto. Comparing task simplifications to learn closed-loop object picking using deep reinforcement learning. IEEE Robotics and Automation Letters, vol.4, no. 2, pp. 1549\u20131556, 2019. DOI: https:\/\/doi.org\/10.1109\/LRA.2019.2896467.","journal-title":"IEEE Robotics and Automation Letters"},{"issue":"1","key":"1429_CR15","doi-asserted-by":"publisher","first-page":"1117","DOI":"10.1109\/TVT.2019.2952549","volume":"69","author":"H J Huang","year":"2020","unstructured":"H. J. Huang, Y. C. Yang, H. Wang, Z. G. Ding, H. Sari, F. Adachi. Deep reinforcement learning for UAV navigation through massive MIMO technique. IEEE Transactions on Vehicular Technology, vol.69, no. 1, pp. 1117\u20131121, 2020. DOI: https:\/\/doi.org\/10.1109\/TVT.2019.2952549.","journal-title":"IEEE Transactions on Vehicular Technology"},{"key":"1429_CR16","doi-asserted-by":"publisher","unstructured":"J. Xu, T. Du, M. Foshey, B. C. Li, B. Zhu, A. Schulz, W. Matusik. Learning to fly: Computational controller design for hybrid UAVs with reinforcement learning. ACM Transactions on Graphics, vol. 38, no. 4, Article number 42, 2019. DOI: https:\/\/doi.org\/10.1145\/3306346.3322940.","DOI":"10.1145\/3306346.3322940"},{"issue":"7553","key":"1429_CR17","doi-asserted-by":"publisher","first-page":"503","DOI":"10.1038\/nature14422","volume":"521","author":"A Cully","year":"2015","unstructured":"A. Cully, J. Clune, D. Tarapore, J. B. Mouret. Robots that can adapt like animals. Nature, vol. 521, no. 7553, pp. 503\u2013531, 2015. DOI: https:\/\/doi.org\/10.1038\/nature14422.","journal-title":"Nature"},{"key":"1429_CR18","doi-asserted-by":"publisher","unstructured":"J. Tan, T. N. Zhang, E. Coumans, A. Iscen, Y. F. Bai, D. Hafner, S. Bohez, V. Vanhoucke. Sim-to-real: Learning agile locomotion for quadruped robots. In Proceedings of the 14th Robotics: Science and Systems, Pittsburgh, USA, 2018. DOI: https:\/\/doi.org\/10.15607\/RSS.2018.XIV.010.","DOI":"10.15607\/RSS.2018.XIV.010"},{"key":"1429_CR19","doi-asserted-by":"publisher","first-page":"7434","DOI":"10.1109\/ICRA.2019.8794179","volume-title":"Proceedings of International Conference on Robotics and Automation","author":"A Singla","year":"2019","unstructured":"A. Singla, S. Bhattacharya, D. Dholakiya, S. Bhatnagar, A. Ghosal, B. Amrutur, S. Kolathaya. Realizmg learned quadruped locomotion behaviors through kinematic motion prirmtives. In Proceedings of International Conference on Robotics and Automation, IEEE, Montreal, Canada, pp. 7434\u20137440, 2019. DOI: https:\/\/doi.org\/10.1109\/ICRA.2019.8794179."},{"key":"1429_CR20","doi-asserted-by":"publisher","unstructured":"P. X. Long, T. X. Fan, X. Y. Liao, W. X. Liu, H. Zhang, J. Pan. Towards optimally decentralized multi-robot collision avoidance via deep reinforcement learning. In Proceedings of IEEE International Conference on Robotics and Automation, Brisbane, Australia, pp. 6252\u20136259, 2018. DOI: https:\/\/doi.org\/10.1109\/ICRA.2018.8461113.","DOI":"10.1109\/ICRA.2018.8461113"},{"key":"1429_CR21","doi-asserted-by":"publisher","unstructured":"T. Haarnoja, S. Ha, A. Zhou, J. Tan, G. Tucker, S. Levine. Learning to walk via deep reinforcement learning. In Proceedings of the 15th Robotics: Science and Systems, Freiburg im Breisgau, Germany, 2019. DOI: https:\/\/doi.org\/10.15607\/RSS.2019.XV.011.","DOI":"10.15607\/RSS.2019.XV.011"},{"key":"1429_CR22","unstructured":"J. Schulman, F. Wolski, P. Dhariwal, A. Radford, O. Klimov. Prox mal poHcy optimization algorithms. [Online], Available: https:\/\/arxiv.org\/abs\/1707.06347, 2017."},{"key":"1429_CR23","doi-asserted-by":"publisher","unstructured":"Q. Nguyen, M. J. Powell, B. Katz, J. Di Carlo, S. Kim. Optimized jumping on the MIT cheetah 3 robot. In Proceedings of International Conference on Robotics and Automation, Montreal, Canada, pp. 7448\u20137454, 2019. DOI: https:\/\/doi.org\/10.1109\/ICRA.2019.8794449.","DOI":"10.1109\/ICRA.2019.8794449"},{"key":"1429_CR24","unstructured":"G. Bellegarda, Q. Nguyen. Robust quadruped jumping via deep reinforcement learning. [Online], Available: https:\/\/arxiv.org\/abs\/2011.07089, 2020."},{"issue":"1","key":"1429_CR25","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1109\/TRO.2021.3084374","volume":"38","author":"N Rudin","year":"2022","unstructured":"N. Rudin, H. Kolvenbach, V. Tsounis, M. Hutter. Cat-like jumping and landing of legged robots in low gravity using deep reinforcement learning. IEEE Transactions on Robotics, vol.38, no. 1, pp. 317\u2013328, 2022. DOI: https:\/\/doi.org\/10.1109\/TRO.2021.3084374.","journal-title":"IEEE Transactions on Robotics"},{"issue":"2","key":"1429_CR26","doi-asserted-by":"publisher","first-page":"167","DOI":"10.1177\/0278364917694244","volume":"36","author":"H W Park","year":"2017","unstructured":"H. W. Park, P. M. Wensing, S. Kim. High-speed bounding with the MIT Cheetah 2: Control design and experiments. The International Journal of Robotics Research, vol.36, no. 2, pp. 167\u2013192, 2017. DOI: https:\/\/doi.org\/10.1177\/0278364917694244.","journal-title":"The International Journal of Robotics Research"},{"issue":"3","key":"1429_CR27","doi-asserted-by":"publisher","first-page":"947","DOI":"10.1109\/TMECH.2019.2907743","volume":"24","author":"G P Jung","year":"2019","unstructured":"G. P. Jung, C. S. Casarez, J. Lee, S. M. Baek, S. J. Yim, S. H. Chae, R. S. Fearing, K. J. Cho. JumpRoACH: A trajectory-adjustable integrated jumping-crawling robot. IEEE\/ASME Transactions on Mechatronics, vol.24, no. 3, pp. 947\u2013958, 2019. DOI: https:\/\/doi.org\/10.1109\/TMECH.2019.2907743.","journal-title":"IEEE\/ASME Transactions on Mechatronics"},{"key":"1429_CR28","doi-asserted-by":"publisher","unstructured":"B. Ugurlu, K. Kotaka, T. Narikiyo. Actively-compliant locomotion control on rough terrain: Cyclic jumping and trotting experiments on a stiff-by-nature quadruped. In Proceedings of IEEE International Conference on Robotics and Automation, Karlsruhe, Germany, pp. 3313\u20133320, 2013. DOI: https:\/\/doi.org\/10.1109\/ICRA.2013.6631039.","DOI":"10.1109\/ICRA.2013.6631039"},{"key":"1429_CR29","doi-asserted-by":"publisher","unstructured":"H. W. Park, P. M. Wensing, S. Kim. Online planning for autonomous running jumps over obstacles in high-speed quadrupeds. In Proceedings of Robotics: Science and Systems, Roma, Italy, 2015. DOI: https:\/\/doi.org\/10.15607\/RSS.2015.XI.047.","DOI":"10.15607\/RSS.2015.XI.047"},{"issue":"1","key":"1429_CR30","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1016\/S1672-6529(11)60094-2","volume":"9","author":"T T Wang","year":"2012","unstructured":"T. T. Wang, W. Guo, M. T. Li, F. S. Zha, L. N. Sun. CPG control for biped hopping robot in unpredictable environment. Journal of Bionic Engineering, vol.9, no. 1, pp. 29\u201338, 2012. DOI: https:\/\/doi.org\/10.1016\/S1672-6529(11)60094-2.","journal-title":"Journal of Bionic Engineering"},{"issue":"3","key":"1429_CR31","doi-asserted-by":"publisher","first-page":"441","DOI":"10.1109\/TNNLS.2013.2280596","volume":"25","author":"J Z Yu","year":"2014","unstructured":"J. Z. Yu, M. Tan, J. Chen, J. W. Zhang. A survey on CPG-inspired control models and system implementation. IEEE Transactions on Neural Networks and Learning Systems, vol.25, no. 3, pp. 441\u2013456, 2014. DOI: https:\/\/doi.org\/10.1109\/TNNLS.2013.2280596.","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"1429_CR32","unstructured":"N. Heess, D. TB, S. Sriram, J. Lemmon, J. Merel, G. Wayne, Y. Tassa, T. Erez, Z. Y. Wang, S. M. A. Eslami, M. Riedmiller, D. Silver. Emergence of locomotion behaviours in rich environments. [Online], Available: https:\/\/arxiv.org\/abs\/1707.02286, 2017."},{"key":"1429_CR33","doi-asserted-by":"publisher","unstructured":"X. B. Peng, G. Berseth, M. Van De Panne. Terrain-adaptive locomotion skills using deep reinforcement learning. ACM Transactions on Graphics, vol. 35, no. 4, Article number 81, 2016. DOI: https:\/\/doi.org\/10.1145\/2897824.2925881.","DOI":"10.1145\/2897824.2925881"},{"key":"1429_CR34","doi-asserted-by":"publisher","first-page":"4238","DOI":"10.1109\/IROS.2018.8593986","volume-title":"Proceedings of IEEE\/RSJ International Conference on Intelligent Robots and Systems","author":"A Zeng","year":"2018","unstructured":"A. Zeng, S. R. Song, S. Welker, J. Lee, A. Rodriguez, T. Funkhouser. Learning synergies between pushing and grasping with self-supervised deep reinforcement learning. In Proceedings of IEEE\/RSJ International Conference on Intelligent Robots and Systems, IEEE, Madrid, Spain, pp. 4238\u20134245, 2018. DOI: https:\/\/doi.org\/10.1109\/IROS.2018.8593986."},{"key":"1429_CR35","doi-asserted-by":"publisher","unstructured":"J. Hwangbo, J. Lee, A. Dosovitskiy, D. Bellicoso, V. Tsounis, V. Koltun, M. Hutter. Learning agile and dynamic motor skills for legged robots. Science Robotics, vol. 4, no. 26, Article number eaau5872, 2019. DOI: https:\/\/doi.org\/10.1126\/scirobotics.aau5872.","DOI":"10.1126\/scirobotics.aau5872"},{"key":"1429_CR36","doi-asserted-by":"crossref","unstructured":"X. B. Peng, E. Coumans, T. N. Zhang, T. W. E. Lee, J. Tan, S. Levine. Learning agile robotic locomotion skills by imitating animals. In Proceedings of the 14th Robotics: Science and Systems, Corvalis, USA, 2020.","DOI":"10.15607\/RSS.2020.XVI.064"},{"issue":"3","key":"1429_CR37","doi-asserted-by":"publisher","first-page":"457","DOI":"10.1007\/s11633-021-1290-3","volume":"18","author":"Y Li","year":"2021","unstructured":"Y. Li, D. Xu. Skill learning for robotic insertion based on one-shot demonstration and reinforcement learning. International Journal of Automation and Computing, vol.18, no. 3, pp. 457\u2013467, 2021. DOI: https:\/\/doi.org\/10.1007\/s11633-021-1290-3.","journal-title":"International Journal of Automation and Computing"},{"key":"1429_CR38","doi-asserted-by":"publisher","first-page":"1241","DOI":"10.1109\/IROS.2018.8593722","volume-title":"Proceedings of IEEE\/RSJ International Conference on Intelligent Robots and Systems","author":"Z M Xie","year":"2018","unstructured":"Z. M. Xie, G. Berseth, P. Clary, J. Hurst, M. Van De Panne. Feedback control for Cassie with deep reinforcement learning. In Proceedings of IEEE\/RSJ International Conference on Intelligent Robots and Systems, IEEE, Madrid, Spain, pp. 1241\u20131246, 2018. DOI: https:\/\/doi.org\/10.1109\/IROS.2018.8593722."},{"key":"1429_CR39","doi-asserted-by":"publisher","unstructured":"D. O. Won, K. R. M\u00fcller, S. W. Lee. An adaptive deep reinforcement learning framework enables curling robots with human-like performance in real-world conditions. Science Robotics, vol. 5, no. 46, Article number eabb9764, 2020. DOI: https:\/\/doi.org\/10.1126\/scirobotics.abb9764.","DOI":"10.1126\/scirobotics.abb9764"},{"issue":"2","key":"1429_CR40","doi-asserted-by":"publisher","first-page":"138","DOI":"10.1007\/s11633-022-1314-7","volume":"19","author":"Q L Dang","year":"2022","unstructured":"Q. L. Dang, W. Xu, Y. F. Yuan. A dynamic resource allocation strategy with reinforcement learning for multimodal multi-objective optimization. Machine Intelligence Research, vol.19, no. 2, pp. 138\u2013152, 2022. DOI: https:\/\/doi.org\/10.1007\/s11633-022-1314-7.","journal-title":"Machine Intelligence Research"},{"issue":"3","key":"1429_CR41","doi-asserted-by":"publisher","first-page":"417","DOI":"10.10079\/s11633-020-1229-0","volume":"17","author":"Z Li","year":"2020","unstructured":"Z. Li, S. R. Xue, X. H. Yu, H. J. Gao. Controller optimization for multirate systems based on reinforcement learning. International Journal of Automation and Computing, vol. 17, no. 3, pp. 417\u2013427, 2020. DOI: https:\/\/doi.org\/10.10079\/s11633-020-1229-0.","journal-title":"International Journal of Automation and Computing"},{"key":"1429_CR42","doi-asserted-by":"publisher","unstructured":"S. X. Gu, T. Lillicrap, I. Sutskever, S. Levine. Continuous deep Q-learning with model-based acceleration. In Proceedings of the 33rd International Conference on International Conference on Machine Learning, New York, USA, pp. 2829\u20132838, 2016. DOI: https:\/\/doi.org\/10.5555\/3045390.3045688.","DOI":"10.5555\/3045390.3045688"},{"key":"1429_CR43","doi-asserted-by":"publisher","unstructured":"X. B. Peng, M. Van De Panne. Learning locomotion skills using deepRL; Does the choice of action space matter? In Proceedings of ACM SIGGRAPH\/Eurographics Symposium on Computer Animation, Los Angeles, USA, Article number 12, 2016. DOI: https:\/\/doi.org\/10.1145\/3099564.3099567.","DOI":"10.1145\/3099564.3099567"},{"key":"1429_CR44","unstructured":"N. P. Farazi, T. Ahamed, L. Barua, B. Zou. Deep reinforcement learning and transportation research: A comprehensive review. [Online], Available; https:\/\/arxiv.org\/abs\/2010.06187, 2020."},{"key":"1429_CR45","doi-asserted-by":"publisher","unstructured":"B. Y. Li, T. Lu, J. Y. Li, N. Lu, Y. H. Cai, S. Wang. ACDER: Augmented curiosity-driven experience replay. In Proceedings of IEEE International Conference on Robotics and Automation, Paris, France, pp. 4218\u20134224, 2020. DOI: https:\/\/doi.org\/10.1109\/ICRA40945.2020.9197421.","DOI":"10.1109\/ICRA40945.2020.9197421"},{"key":"1429_CR46","doi-asserted-by":"publisher","unstructured":"C. Banerjee, Z. Y. Chen, N. Noman. Improved soft actor-critic: Mixing prioritized off-policy samples with on-policy experiences. IEEE Transactions on Neural Networks and Learning Systems, to be published. DOI: https:\/\/doi.org\/10.1109\/TNNLS.2022.3174051.","DOI":"10.1109\/TNNLS.2022.3174051"},{"key":"1429_CR47","doi-asserted-by":"publisher","unstructured":"S. Qi, W. Lin, Z. Hong, H. Chen, W. Zhang. Perceptive autonomous stair climbing for quadruped robots. In Proceedings of IEEE\/RSJ International Conference on Intelligent Robots and Systems, Prague, Czech Republic, pp. 2313\u20132320, 2021. DOI: https:\/\/doi.org\/10.1109\/IROS51168.2021.9636302.","DOI":"10.1109\/IROS51168.2021.9636302"},{"key":"1429_CR48","unstructured":"T. Haarnoja, A. Zhou, P. Abbeel, S. Levine. Soft actor-criticI Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In Proceedings of the 35th International Conference on Machine Learning, Stockholm, Sweden, pp. 1856\u20131865, 2018."},{"key":"1429_CR49","volume-title":"Reinforcement LearningI An Introduction","author":"R S Sutton","year":"1998","unstructured":"R. S. Sutton, A. G. Barto. Reinforcement LearningI An Introduction, Cambridge, UKI MIT Press, 1998."},{"issue":"21","key":"1429_CR50","doi-asserted-by":"publisher","first-page":"277","DOI":"10.1016\/j.ifacol.2015.09.540","volume":"48","author":"X Han","year":"2015","unstructured":"X. Han, J. Stephant, G. Mourioux, D. Meizel. A ZMP based interval criterion for rollover-risk diagnosis. IFAC-PapersOnline, vol. 48, no. 21, pp. 277\u2013282, 2015. DOI: https:\/\/doi.org\/10.1016\/j.ifacol.2015.09.540.","journal-title":"IFAC-PapersOnline"},{"key":"1429_CR51","doi-asserted-by":"crossref","unstructured":"P. Y. Oudeyer. Computational theories of curiosity-driven learning. [Online], Available: https:\/\/arxiv.org\/abs\/1802.10546, 2018.","DOI":"10.31234\/osf.io\/3p8f6"},{"key":"1429_CR52","doi-asserted-by":"publisher","unstructured":"D. Pathak, P. Agrawal, A. A. Efros, T. Darrell. Curiosity-driven exploration by self-supervised prediction. In Proceedings of IEEE Conference on Computer Vision and Pattern Recognition Workshops, Honolulu, USA, pp. 488\u2013489. DOI: https:\/\/doi.org\/10.1109\/CVPRW.2017.70.","DOI":"10.1109\/CVPRW.2017.70"},{"key":"1429_CR53","unstructured":"D. P. Kingma, J. Ba. AdamI A method for stochastic optimization. [Online], Available: https:\/\/arxiv.org\/abs\/1412.6980v9, 2015."}],"container-title":["Machine Intelligence Research"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11633-023-1429-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11633-023-1429-5","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11633-023-1429-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T08:33:26Z","timestamp":1780562006000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11633-023-1429-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,22]]},"references-count":53,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2024,12]]}},"alternative-id":["1429"],"URL":"https:\/\/doi.org\/10.1007\/s11633-023-1429-5","relation":{},"ISSN":["2731-538X","2731-5398"],"issn-type":[{"value":"2731-538X","type":"print"},{"value":"2731-5398","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,2,22]]},"assertion":[{"value":"23 August 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 February 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 February 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declared that they have no conflicts of interest to this work.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations of conflict of interest"}}]}}