{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,20]],"date-time":"2026-04-20T10:06:31Z","timestamp":1776679591211,"version":"3.51.2"},"reference-count":33,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T00:00:00Z","timestamp":1706745600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100005046","name":"Natural Science Foundation of Heilongjiang Province","doi-asserted-by":"publisher","award":["YQ2020E028"],"award-info":[{"award-number":["YQ2020E028"]}],"id":[{"id":"10.13039\/501100005046","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2024,2]]},"DOI":"10.1109\/lra.2024.3349934","type":"journal-article","created":{"date-parts":[[2024,1,4]],"date-time":"2024-01-04T20:08:29Z","timestamp":1704398909000},"page":"1811-1818","source":"Crossref","is-referenced-by-count":13,"title":["Learning Locomotion for Quadruped Robots via Distributional Ensemble Actor-Critic"],"prefix":"10.1109","volume":"9","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2049-8921","authenticated-orcid":false,"given":"Sicen","family":"Li","sequence":"first","affiliation":[{"name":"College of Mechanical and Electrical Engineering, Harbin Engineering University, Harbin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1307-2700","authenticated-orcid":false,"given":"Yiming","family":"Pang","sequence":"additional","affiliation":[{"name":"College of Mechanical and Electrical Engineering, Harbin Engineering University, Harbin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-0316-7590","authenticated-orcid":false,"given":"Panju","family":"Bai","sequence":"additional","affiliation":[{"name":"College of Mechanical and Electrical Engineering, Harbin Engineering University, Harbin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-0695-0196","authenticated-orcid":false,"given":"Jiawei","family":"Li","sequence":"additional","affiliation":[{"name":"College of Shipbuilding Engineering, Harbin Engineering University, Harbin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-0313-1868","authenticated-orcid":false,"given":"Zhaojin","family":"Liu","sequence":"additional","affiliation":[{"name":"College of Mechanical and Electrical Engineering, Harbin Engineering University, Harbin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8287-0279","authenticated-orcid":false,"given":"Shihao","family":"Hu","sequence":"additional","affiliation":[{"name":"College of Mechanical and Electrical Engineering, Harbin Engineering University, Harbin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6470-4889","authenticated-orcid":false,"given":"Liquan","family":"Wang","sequence":"additional","affiliation":[{"name":"College of Mechanical and Electrical Engineering, Harbin Engineering University, Harbin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0741-9838","authenticated-orcid":false,"given":"Gang","family":"Wang","sequence":"additional","affiliation":[{"name":"College of Shipbuilding Engineering, Harbin Engineering University, Harbin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","first-page":"773","article-title":"Fast and efficient locomotion via learned gait transitions","volume-title":"Proc. Conf. Robot Learn.","author":"Yang","year":"2022"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2022.xviii.022"},{"key":"ref3","article-title":"Understanding domain randomization for sim-to-real transfer","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Chen","year":"2021"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2018.xiv.010"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11791"},{"key":"ref6","article-title":"Fully parameterized quantile function for distributional reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Yang","year":"2019"},{"key":"ref7","article-title":"Randomized ensembled double Q-learning: Learning fast without a model","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Chen","year":"2020"},{"key":"ref8","first-page":"5556","article-title":"Controlling overestimation bias with truncated mixture of continuous distributional quantile critics","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kuznetsov","year":"2020"},{"key":"ref9","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja","year":"2018"},{"key":"ref10","first-page":"1","article-title":"On the effect of auxiliary tasks on representation dynamics","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Lyle","year":"2021"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.abk2822"},{"key":"ref12","first-page":"786","article-title":"Towards legged locomotion on steep planetary terrain","volume-title":"Proc. IEEE\/RSJ 36th Int. Conf. Intell. Robots Syst.","author":"Weibel","year":"2023"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460904"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1002\/rob.22197"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/SSCI47803.2020.9308468"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10342062"},{"key":"ref17","first-page":"1096","article-title":"Implicit quantile networks for distributional reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Dabney","year":"2018"},{"key":"ref18","doi-asserted-by":"crossref","DOI":"10.1109\/ICRA57147.2024.10610137","article-title":"Learning risk-aware quadrupedal locomotion using distributional reinforcement learning","volume-title":"Proc. 41st IEEE Conf. Robot. Automat.","author":"Schneider","year":"2024"},{"key":"ref19","first-page":"6131","article-title":"Sunrise: A simple unified framework for ensemble learning in deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Lee","year":"2021"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1561\/2200000071"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3151396"},{"key":"ref23","first-page":"1312","article-title":"Universal value function approximators","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Schaul","year":"2015"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"ref25","article-title":"Better exploration with optimistic actor critic","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Ciosek","year":"2019"},{"key":"ref26","article-title":"Pybullet, a python module for physics simulation for games, robotics and machine learning","author":"Coumans","year":"2016"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2021.xvii.011"},{"key":"ref28","first-page":"627","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","volume-title":"Proc. 14th Int. Conf. Artif. Intell. Statist.","author":"Ross","year":"2011"},{"key":"ref29","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.abc5986"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3290509"},{"key":"ref32","article-title":"Highly dynamic quadruped locomotion via whole-body impulse control and model predictive control","author":"Kim","year":"2019"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487541"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7083369\/10360389\/10380686.pdf?arnumber=10380686","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,23]],"date-time":"2024-12-23T19:31:01Z","timestamp":1734982261000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10380686\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2]]},"references-count":33,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/lra.2024.3349934","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"value":"2377-3766","type":"electronic"},{"value":"2377-3774","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,2]]}}}