{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T02:39:29Z","timestamp":1730255969848,"version":"3.28.0"},"reference-count":40,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1109\/icra57147.2024.10610197","type":"proceedings-article","created":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T17:51:05Z","timestamp":1723139465000},"page":"5141-5147","source":"Crossref","is-referenced-by-count":0,"title":["Extremum-Seeking Action Selection for Accelerating Policy Optimization"],"prefix":"10.1109","author":[{"given":"Ya-Chien","family":"Chang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sicun","family":"Gao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.15607\/rss.2020.xvi.064"},{"volume-title":"Proceedings of The 2nd Conference on Robot Learning","author":"Kalashnikov","article-title":"Scalable deep reinforcement learning for vision-based robotic manipulation","key":"ref2"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1109\/TVT.2018.2890773"},{"doi-asserted-by":"publisher","key":"ref4","DOI":"10.1109\/ICRA48506.2021.9560769"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.15607\/RSS.2020.XVI.040"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1109\/ICRA48506.2021.9560886"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.15607\/RSS.2023.XIX.085"},{"volume-title":"Thirty-seventh Conference on Neural Information Processing Systems","author":"Ganai","article-title":"Iterative reachability estimation for safe reinforcement learning","key":"ref8"},{"key":"ref9","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"ICML 2015","author":"Schulman"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.32657\/10356\/90191"},{"year":"2017","author":"Schulman","article-title":"Proximal policy optimization algorithms","key":"ref11"},{"key":"ref12","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"International conference on machine learning","author":"Fujimoto"},{"key":"ref13","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proceedings of the 35th International Conference on Machine Learning, ser. Proceedings of Machine Learning Research","volume":"80","author":"Haarnoja"},{"year":"2020","author":"Engstrom","article-title":"Implementation matters in deep policy gradients: A case study on ppo and trpo","key":"ref14"},{"year":"2020","author":"Hsu","article-title":"Revisiting design choices in proximal policy optimization","key":"ref15"},{"doi-asserted-by":"publisher","key":"ref16","DOI":"10.1177\/0278364920987859"},{"key":"ref17","article-title":"Learning to utilize shaping rewards: A new approach of reward shaping","author":"Hu","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref18","first-page":"15 281","article-title":"Unpacking reward shaping: Understanding the benefits of reward engineering on sample complexity","volume":"35","author":"Gupta","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.1002\/0471669784"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.1007\/978-1-4471-2224-1"},{"doi-asserted-by":"publisher","key":"ref21","DOI":"10.3166\/ejc.15.331-347"},{"key":"ref22","first-page":"7059","article-title":"Dual policy iteration","volume-title":"Advances in Neural Information Processing Systems 31: Annual Conference on Neural Information Processing Systems 2018, NeurIPS 2018, December 3-8, 2018, Montr\u00e9al, Canada","author":"Sun"},{"doi-asserted-by":"publisher","key":"ref23","DOI":"10.1109\/IROS.2012.6386109"},{"doi-asserted-by":"publisher","key":"ref24","DOI":"10.1016\/j.arcontrol.2012.03.004"},{"doi-asserted-by":"publisher","key":"ref25","DOI":"10.15607\/RSS.2021.XVII.056"},{"doi-asserted-by":"publisher","key":"ref26","DOI":"10.1109\/CDC42340.2020.9304242"},{"doi-asserted-by":"publisher","key":"ref27","DOI":"10.1109\/TAC.2023.3290037"},{"doi-asserted-by":"publisher","key":"ref28","DOI":"10.1109\/DDCLS58216.2023.10166569"},{"key":"ref29","article-title":"A survey of exploration methods in reinforcement learning","author":"Amin","year":"2021","journal-title":"CoRR"},{"doi-asserted-by":"publisher","key":"ref30","DOI":"10.7763\/IJMLC.2015.V5.489"},{"volume-title":"The Eleventh International Conference on Learning Representations","author":"Eberhard","article-title":"Pink noise is all you need: Colored noise exploration in deep reinforcement learning","key":"ref31"},{"doi-asserted-by":"publisher","key":"ref32","DOI":"10.2478\/s13230-010-0002-4"},{"key":"ref33","article-title":"Improving exploration in evolution strategies for deep reinforcement learning via a population of novelty-seeking agents","author":"Conti","year":"2018","journal-title":"Advances in Neural Information Processing Systems"},{"volume-title":"International Conference on Learning Representations","author":"Fortunato","article-title":"Noisy networks for exploration","key":"ref34"},{"volume-title":"6th International Conference on Learning Representations, ICLR 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings","author":"Plappert","article-title":"Parameter space noise for exploration","key":"ref35"},{"key":"ref36","first-page":"7611","article-title":"Maven: Multi-agent variational exploration","author":"Mahajan","year":"2019","journal-title":"Advances in Neural Information Processing Systems"},{"issue":"27","key":"ref37","first-page":"949","article-title":"Natural evolution strategies","volume-title":"Journal of Machine Learning Research","volume":"15","author":"Wierstra","year":"2014"},{"year":"2016","author":"Brockman","article-title":"Openai gym","key":"ref38"},{"doi-asserted-by":"publisher","key":"ref39","DOI":"10.1109\/ICRA.2015.7140074"},{"doi-asserted-by":"publisher","key":"ref40","DOI":"10.1007\/s10846-019-01085-z"}],"event":{"name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2024,5,13]]},"location":"Yokohama, Japan","end":{"date-parts":[[2024,5,17]]}},"container-title":["2024 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10609961\/10609862\/10610197.pdf?arnumber=10610197","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,10]],"date-time":"2024-08-10T05:45:38Z","timestamp":1723268738000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10610197\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":40,"URL":"https:\/\/doi.org\/10.1109\/icra57147.2024.10610197","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]}}}