{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T16:47:33Z","timestamp":1777654053421,"version":"3.51.4"},"reference-count":19,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"The National Key R&D Program of China","award":["2021YFF0306405"],"award-info":[{"award-number":["2021YFF0306405"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Automat. Mag."],"published-print":{"date-parts":[[2023,6]]},"DOI":"10.1109\/mra.2023.3262461","type":"journal-article","created":{"date-parts":[[2023,5,1]],"date-time":"2023-05-01T18:31:33Z","timestamp":1682965893000},"page":"57-66","source":"Crossref","is-referenced-by-count":32,"title":["Mastering the Complex Assembly Task With a Dual-Arm Robot: A Novel Reinforcement Learning Method"],"prefix":"10.1109","volume":"30","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6131-2057","authenticated-orcid":false,"given":"Daqi","family":"Jiang","sequence":"first","affiliation":[{"name":"School of Mechanical Engineering and Automation, Northeastern University, Shenyang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7639-6967","authenticated-orcid":false,"given":"Hong","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Mechanical Engineering and Automation, Northeastern University, Shenyang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1573-5419","authenticated-orcid":false,"given":"Yanzheng","family":"Lu","sequence":"additional","affiliation":[{"name":"School of Mechanical Engineering and Automation, Northeastern University, Shenyang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197326"},{"key":"ref12","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref15","first-page":"807","article-title":"Rectified linear units improve restricted Boltzmann machines","author":"nair","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref14","first-page":"1645","article-title":"Tactile sim-to-real policy transfer via real-to-sim image translation","author":"church","year":"0","journal-title":"Proc Conf Robot Learn"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.aau5872"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.010"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1126\/science.aar6404"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1126\/science.aat8414"},{"key":"ref17","first-page":"4572","article-title":"Generative adversarial imitation learning","volume":"29","author":"ho","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref16","first-page":"2017","article-title":"Spatial transformer networks","volume":"28","author":"jaderberg","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref19","author":"hershberger","year":"2019","journal-title":"RViz 3D visualization tool for ROS"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1006\/cviu.1997.0547"},{"key":"ref8","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017"},{"key":"ref7","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2014.2320500"},{"key":"ref4","first-page":"1089","article-title":"Beyond pick-and-place: Tackling robotic stacking of diverse shapes","volume":"164","author":"lee","year":"0","journal-title":"Proc 5th Annu Conf Robot Learn"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.rcim.2021.102306"},{"key":"ref6","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref5","article-title":"Playing atari with deep reinforcement learning","author":"mnih","year":"2013"}],"container-title":["IEEE Robotics &amp; Automation Magazine"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/100\/10153106\/10113133.pdf?arnumber=10113133","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,3]],"date-time":"2023-07-03T18:00:24Z","timestamp":1688407224000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10113133\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6]]},"references-count":19,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/mra.2023.3262461","relation":{},"ISSN":["1070-9932","1558-223X"],"issn-type":[{"value":"1070-9932","type":"print"},{"value":"1558-223X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,6]]}}}