{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T05:58:34Z","timestamp":1771567114620,"version":"3.50.1"},"reference-count":26,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,7,15]],"date-time":"2024-07-15T00:00:00Z","timestamp":1721001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,7,15]],"date-time":"2024-07-15T00:00:00Z","timestamp":1721001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,7,15]]},"DOI":"10.1109\/aim55361.2024.10636979","type":"proceedings-article","created":{"date-parts":[[2024,8,22]],"date-time":"2024-08-22T17:52:35Z","timestamp":1724349155000},"page":"709-714","source":"Crossref","is-referenced-by-count":4,"title":["Open-Source Reinforcement Learning Environments Implemented in MuJoCo with Franka Manipulator"],"prefix":"10.1109","author":[{"given":"Zichun","family":"Xu","sequence":"first","affiliation":[{"name":"Harbin Institute of Technology,State Key Laboratory of Robotics and Systems,Harbin,Heilongjiang Province,China,150001"}]},{"given":"Yuntao","family":"Li","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology,State Key Laboratory of Robotics and Systems,Harbin,Heilongjiang Province,China,150001"}]},{"given":"Xiaohang","family":"Yang","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology,State Key Laboratory of Robotics and Systems,Harbin,Heilongjiang Province,China,150001"}]},{"given":"Zhiyuan","family":"Zhao","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology,State Key Laboratory of Robotics and Systems,Harbin,Heilongjiang Province,China,150001"}]},{"given":"Lei","family":"Zhuang","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology,State Key Laboratory of Robotics and Systems,Harbin,Heilongjiang Province,China,150001"}]},{"given":"Jingdong","family":"Zhao","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology,State Key Laboratory of Robotics and Systems,Harbin,Heilongjiang Province,China,150001"}]}],"member":"263","reference":[{"key":"ref1","first-page":"589","article-title":"Motion Planner Augmented Reinforcement Learning for Robot Manipulation in Obstructed Environments","volume-title":"Conference on Robot Learning (CoRL)","author":"Yamada"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TIE.2023.3299051"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3116700"},{"key":"ref4","volume-title":"Gymnasium","author":"Towers","year":"2023"},{"key":"ref5","first-page":"2975","article-title":"Robopianist: Dexterous Piano Playing with Deep Reinforcement Learning","volume-title":"Conference on Robot Learning (CoRL)","author":"Zakka"},{"key":"ref6","article-title":"End-toend robotic reinforcement learning without reward engineering","author":"Singh","year":"2019","journal-title":"arXiv preprint arXiv:1904.07854"},{"key":"ref7","article-title":"Asymmetric Actor Critic for Image-Based Robot Learning","volume-title":"Robotics: Science and Systems Conference (RSS)","author":"Pinto"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793789"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.039"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9560986"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"ref12","article-title":"Multi-goal reinforcement learning: Challenging robotics environments and request for research","author":"Plappert","year":"2018","journal-title":"arXiv preprint arXiv:1802.09464"},{"key":"ref13","first-page":"1025","article-title":"Relay Policy Learning: Solving Long-Horizon Tasks via Imitation and Reinforcement Learning","volume-title":"Conference on Robot Learning (CoRL)","author":"Gupta"},{"key":"ref14","article-title":"robosuite: A modular simulation framework and benchmark for robot learning","author":"Zhu","year":"2020","journal-title":"arXiv preprint arXiv:2009.12293"},{"key":"ref15","article-title":"panda-gym: Open-source goal-conditioned environments for robotic learning","author":"Gallou\u00e9dec","year":"2021","journal-title":"arXiv preprint arXiv:2106.13687"},{"key":"ref16","article-title":"Isaac Gym: High Performance GPU Based Physics Simulation For Robot Learning","volume-title":"Conference on Neural Information Processing Systems (NeurIPS)","author":"Makoviychuk"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2022.xviii.035"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2023.xix.037"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2015.7139807"},{"key":"ref20","article-title":"Continuous control with deep reinforcement learning","volume-title":"International Conference on Learning Representations (ICLR)","author":"Lillicrap"},{"key":"ref21","first-page":"1856","article-title":"Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor","volume-title":"International Conference on Machine Learning (ICML)","author":"Haarnoja"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1812.05905"},{"key":"ref23","first-page":"5556","article-title":"Controlling Overestimation Bias with Truncated Mixture of Continuous Distributional Quantile Critics","volume-title":"International Conference on Machine Learning (ICML)","author":"Kuznetsov"},{"issue":"268","key":"ref24","first-page":"1","article-title":"Stable-baselines3: Reliable reinforcement learning implementations","volume":"22","author":"Raffin","year":"2021","journal-title":"Journal of Machine Learning Research"},{"key":"ref25","first-page":"5048","article-title":"Hindsight Experience Replay","volume-title":"Conference on Neural Information Processing Systems (NeurIPS)","author":"Andrychowicz"},{"key":"ref26","first-page":"1582","article-title":"Addressing Function Approximation Error in Actor-Critic Methods","volume-title":"International Conference on Machine Learning (ICML)","author":"Fujimoto"}],"event":{"name":"2024 IEEE\/ASME International Conference on Advanced Intelligent Mechatronics (AIM)","location":"Boston, MA, USA","start":{"date-parts":[[2024,7,15]]},"end":{"date-parts":[[2024,7,19]]}},"container-title":["2024 IEEE International Conference on Advanced Intelligent Mechatronics (AIM)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10636941\/10636942\/10636979.pdf?arnumber=10636979","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,3]],"date-time":"2024-09-03T04:53:28Z","timestamp":1725339208000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10636979\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,15]]},"references-count":26,"URL":"https:\/\/doi.org\/10.1109\/aim55361.2024.10636979","relation":{},"subject":[],"published":{"date-parts":[[2024,7,15]]}}}