{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T15:30:00Z","timestamp":1774539000790,"version":"3.50.1"},"reference-count":74,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key R&#x0026;D Program of China","award":["2022ZD0114900"],"award-info":[{"award-number":["2022ZD0114900"]}]},{"name":"Beijing Municipal Science &#x0026; Technology Commission","award":["Z231100007423015"],"award-info":[{"award-number":["Z231100007423015"]}]},{"name":"Young Elite Scientists Sponsorship Program by CAST","award":["2022QNRC002"],"award-info":[{"award-number":["2022QNRC002"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2024,5]]},"DOI":"10.1109\/tpami.2023.3339515","type":"journal-article","created":{"date-parts":[[2023,12,5]],"date-time":"2023-12-05T18:21:12Z","timestamp":1701800472000},"page":"2804-2818","source":"Crossref","is-referenced-by-count":32,"title":["Bi-DexHands: Towards Human-Level Bimanual Dexterous Manipulation"],"prefix":"10.1109","volume":"46","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0033-492X","authenticated-orcid":false,"given":"Yuanpei","family":"Chen","sequence":"first","affiliation":[{"name":"Institute for AI, Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2440-6438","authenticated-orcid":false,"given":"Yiran","family":"Geng","sequence":"additional","affiliation":[{"name":"Institute for AI, Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0428-4552","authenticated-orcid":false,"given":"Fangwei","family":"Zhong","sequence":"additional","affiliation":[{"name":"National Key Laboratory of General Artificial Intelligence and Beijing Institute for General Artificial Intelligence, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3769-2077","authenticated-orcid":false,"given":"Jiaming","family":"Ji","sequence":"additional","affiliation":[{"name":"Institute for AI, Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6076-5000","authenticated-orcid":false,"given":"Jiechuang","family":"Jiang","sequence":"additional","affiliation":[{"name":"School of Computer Science, Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3967-2704","authenticated-orcid":false,"given":"Zongqing","family":"Lu","sequence":"additional","affiliation":[{"name":"School of Computer Science, Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7984-9909","authenticated-orcid":false,"given":"Hao","family":"Dong","sequence":"additional","affiliation":[{"name":"School of Computer Science, Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8132-5613","authenticated-orcid":false,"given":"Yaodong","family":"Yang","sequence":"additional","affiliation":[{"name":"Institute for AI, Peking University, Beijing, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794102"},{"key":"ref2","article-title":"Solving rubiks cube with a robot hand","author":"Akkaya","year":"2019"},{"key":"ref3","first-page":"297","article-title":"A system for general in-hand object re-orientation","volume-title":"Proc. Conf. Robot Learn.","author":"Chen"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1126\/science.aat8414"},{"key":"ref5","first-page":"1","article-title":"Learning to coordinate manipulation skills via skill behavior diversification","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Lee"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/icra48891.2023.10161571"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/icra48891.2023.10160667"},{"key":"ref8","article-title":"GraspARL: Dynamic grasping via adversarial reinforcement learning","author":"Wu","year":"2022"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8206046"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2017.2693391"},{"key":"ref11","first-page":"1","article-title":"Learning to poke by poking: Experiential learning of intuitive physics","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Agrawal"},{"key":"ref12","article-title":"Reinforcement learning for pivoting task","author":"Antonova","year":"2017"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989467"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00434"},{"key":"ref15","first-page":"1094","article-title":"Meta-world: A benchmark and evaluation for multi-task and meta reinforcement learning","volume-title":"Proc. Conf. Robot Learn.","author":"Yu"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.2974707"},{"key":"ref17","article-title":"robosuite: A modular simulation framework and benchmark for robot learning","author":"Zhu","year":"2020"},{"key":"ref18","article-title":"Bayley-III clinical use and interpretation","author":"Bayley","year":"2006","journal-title":"Citeseer"},{"key":"ref19","first-page":"1","article-title":"Isaac gym: High performance GPU based physics simulation for robot learning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst. Datasets Benchmarks","author":"Makoviychuk"},{"key":"ref20","article-title":"The starcraft multi-agent challenge","author":"Samvelyan","year":"2019"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1177\/0278364917700714"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01111"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2023.3339515\/mm1"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2018.xiv.049"},{"key":"ref25","article-title":"Deepmind control suite","author":"Tassa","year":"2018"},{"key":"ref26","first-page":"1496","article-title":"Solving challenging dexterous manipulation tasks with trajectory optimisation and reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Charlesworth"},{"key":"ref27","first-page":"1","article-title":"CausalWorld: A robotic manipulation benchmark for causal structure and transfer learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Ahmed"},{"key":"ref28","first-page":"1","article-title":"Brax - A differentiable physics engine for large scale rigid body simulation","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst. Datasets Benchmarks","author":"Freeman"},{"key":"ref29","article-title":"Openai GYM","author":"Brockman","year":"2016"},{"key":"ref30","article-title":"D4RL: Datasets for deep data-driven reinforcement learning","author":"Fu","year":"2020"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9560986"},{"key":"ref32","first-page":"12208","article-title":"FACMAC: Factored multi-agent centralised policy gradients","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Peng"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2000.844081"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2013.2289018"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989394"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/AIM.2016.7576796"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1080\/01691864.2017.1365011"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/Humanoids.2011.6100824"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2012.07.005"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-021-27261-0"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2000.844067"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487156"},{"key":"ref43","article-title":"Model predictive path integral control using covariance variable importance sampling","author":"Williams","year":"2015"},{"key":"ref44","article-title":"Generalization in dexterous manipulation via geometry-aware multi-task learning","author":"Huang","year":"2021"},{"key":"ref45","article-title":"Masked visual pre-training for motor control","author":"Xiao","year":"2022"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3196104"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19842-7_33"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794033"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561491"},{"key":"ref50","volume-title":"Bayley-III Clin. Use and Interpretation","author":"Weiss","year":"2010"},{"key":"ref51","article-title":"FIRST WORDS Project, \u201c16 actions with objects by 16 months","author":"Wetherby","year":"2018"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1542\/peds.2021-052138"},{"key":"ref53","article-title":"Shadowrobot dexterous hand","year":"2005"},{"key":"ref54","article-title":"An overview of multi-agent reinforcement learning from game theoretical perspective","author":"Yang","year":"2020"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1093\/nsr\/nwac256"},{"key":"ref56","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"},{"key":"ref57","article-title":"Soft actor-critic algorithms and applications","author":"Haarnoja","year":"2018"},{"key":"ref58","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Schulman"},{"key":"ref59","first-page":"1","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Lillicrap"},{"key":"ref60","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fujimoto"},{"key":"ref61","first-page":"13458","article-title":"Settling the variance of multi-agent policy gradients","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Kuba"},{"key":"ref62","first-page":"1","article-title":"Trust region policy optimisation in multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kuba"},{"key":"ref63","article-title":"Heterogeneous-agent mirror learning: A continuum of solutions to cooperative marl","author":"Kuba","year":"2022"},{"key":"ref64","article-title":"The surprising effectiveness of PPO in cooperative, multi-agent games","author":"Yu","year":"2021"},{"key":"ref65","article-title":"Is independent learning all you need in the starcraft multi-agent challenge?","author":"Schr\u00f6der de Witt","year":"2020"},{"key":"ref66","first-page":"6379","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Lowe"},{"key":"ref67","first-page":"20132","article-title":"A minimalist approach to offline reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Fujimoto"},{"key":"ref68","first-page":"2052","article-title":"Off-policy deep reinforcement learning without exploration","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fujimoto"},{"key":"ref69","first-page":"1","article-title":"Offline reinforcement learning with implicit q-learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kostrikov"},{"key":"ref70","article-title":"Settling the bias and variance of meta-gradient estimation for meta-reinforcement learning","author":"Liu","year":"2021"},{"key":"ref71","article-title":"Rl $^{2}$2: Fast reinforcement learning via slow reinforcement learning","author":"Duan","year":"2016"},{"key":"ref72","first-page":"1","article-title":"ProMP: Proximal meta-policy search","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Rothfuss"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1093\/acprof:oso\/9780195372090.003.0006"},{"key":"ref74","first-page":"77","article-title":"PointNet: Deep learning on point sets for 3D classification and segmentation","volume-title":"Proc. IEEE Conf. Comput. Vis. Pattern Recognit.","author":"Qi"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/10490207\/10343126.pdf?arnumber=10343126","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,9]],"date-time":"2024-04-09T19:32:07Z","timestamp":1712691127000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10343126\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5]]},"references-count":74,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2023.3339515","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,5]]}}}