{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T15:45:29Z","timestamp":1774367129420,"version":"3.50.1"},"reference-count":28,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,9,27]],"date-time":"2021-09-27T00:00:00Z","timestamp":1632700800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,9,27]],"date-time":"2021-09-27T00:00:00Z","timestamp":1632700800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,9,27]],"date-time":"2021-09-27T00:00:00Z","timestamp":1632700800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,9,27]]},"DOI":"10.1109\/iros51168.2021.9636681","type":"proceedings-article","created":{"date-parts":[[2021,12,16]],"date-time":"2021-12-16T20:45:38Z","timestamp":1639687538000},"page":"3724-3730","source":"Crossref","is-referenced-by-count":20,"title":["A Multi-Target Trajectory Planning of a 6-DoF Free-Floating Space Robot via Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Shengjie","family":"Wang","sequence":"first","affiliation":[]},{"given":"Xiang","family":"Zheng","sequence":"additional","affiliation":[]},{"given":"Yuxue","family":"Cao","sequence":"additional","affiliation":[]},{"given":"Tao","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"International Conference on Machine Learning"},{"key":"ref11","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017"},{"key":"ref12","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018"},{"key":"ref13","article-title":"Addressing function approximation error in actor-critic methods","author":"fujimoto","year":"2018"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.aau5872"},{"key":"ref15","article-title":"Data-efficient deep reinforcement learning for dexterous manipulation","author":"popov","year":"2017"},{"key":"ref16","first-page":"5048","article-title":"Hindsight experience replay","author":"andrychowicz","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref17","article-title":"Learning Agile Robotic Locomotion Skills by Imitating Animals","author":"peng","year":"2020"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2019.XV.073"},{"key":"ref19","first-page":"4754","article-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models","author":"chua","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref28","article-title":"Project webpage","year":"0"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/s40435-018-0459-2"},{"key":"ref27","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2014"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.rcim.2017.11.007"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.actaastro.2015.03.008"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460497"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref7","first-page":"1","article-title":"Path Planning for a Space-Based Manipulator System Based on Quantum Genetic Algorithm","volume":"3207950","author":"chen","year":"2017","journal-title":"Robotics"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.actaastro.2017.04.031"},{"key":"ref9","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015"},{"key":"ref1","first-page":"441","article-title":"Zero reaction maneuver: flight validation with ETS-VII space robot and extension to kinematically redundant arm","author":"yoshida","year":"2001","journal-title":"Proceedings IEEE International Conference on Robotics and Automation"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.ast.2019.105657"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICUS48101.2019.8995991"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ROBIO.2018.8665049"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1155\/2016\/7819540"},{"key":"ref26","article-title":"High-dimensional continuous control using generalized advantage estimation","author":"schulman","year":"2015"},{"key":"ref25","article-title":"Implementation Matters in Deep Policy Gradients: A Case Study on PPO and TRPO","author":"engstrom","year":"2020"}],"event":{"name":"2021 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","location":"Prague, Czech Republic","start":{"date-parts":[[2021,9,27]]},"end":{"date-parts":[[2021,10,1]]}},"container-title":["2021 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9635848\/9635849\/09636681.pdf?arnumber=9636681","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T16:54:38Z","timestamp":1652201678000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9636681\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,9,27]]},"references-count":28,"URL":"https:\/\/doi.org\/10.1109\/iros51168.2021.9636681","relation":{},"subject":[],"published":{"date-parts":[[2021,9,27]]}}}