{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T17:03:25Z","timestamp":1761930205059,"version":"3.28.0"},"reference-count":27,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,7,18]]},"DOI":"10.1109\/ijcnn52387.2021.9533791","type":"proceedings-article","created":{"date-parts":[[2021,9,20]],"date-time":"2021-09-20T21:27:41Z","timestamp":1632173261000},"page":"1-8","source":"Crossref","is-referenced-by-count":7,"title":["Online Virtual Training in Soft Actor-Critic for Autonomous Driving"],"prefix":"10.1109","author":[{"given":"Maryam","family":"Savari","sequence":"first","affiliation":[]},{"given":"Yoonsuck","family":"Choe","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","article-title":"Soft actor-critic algorithms and applications","author":"haarnoja","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref11","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"Int Conference on Machine Learning"},{"key":"ref12","first-page":"1","article-title":"A human-like agent based on a hybrid of reinforcement and imitation learning","author":"dossa","year":"2019","journal-title":"2019 International Joint Conference on Neural Networks (IJCNN)"},{"doi-asserted-by":"publisher","key":"ref13","DOI":"10.1109\/IJCNN.2019.8852307"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.1109\/ICCV.2019.00942"},{"key":"ref15","article-title":"Pretraining deep actor-critic reinforcement learning algorithms with expert demonstrations","author":"zhang","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref16","article-title":"Re-inforcement learning from imperfect demonstrations","author":"gao","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref17","first-page":"1008","article-title":"Actor-critic algorithms","author":"konda","year":"2000","journal-title":"Advances in neural information processing systems"},{"key":"ref18","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"ArXiv Preprint"},{"key":"ref19","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"Int Conference on Machine Learning"},{"key":"ref4","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"Int Conference on Machine Learning"},{"doi-asserted-by":"publisher","key":"ref27","DOI":"10.1109\/IJCNN48605.2020.9207663"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1109\/IJCNN.2019.8852110"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1109\/ICRA.2019.8793742"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.2352\/ISSN.2470-1173.2017.19.AVM-023"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.23919\/ChiCC.2018.8482790"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.1609\/aaai.v34i06.6602"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"7684","DOI":"10.1073\/pnas.1805770115","article-title":"On the future of transportation in an era of automated and autonomous vehicles","volume":"116","author":"hancock","year":"2019","journal-title":"Proceedings of the National Academy of Sciences"},{"key":"ref9","article-title":"Improving exploration in soft-actor-critic with normalizing flows policies","author":"ward","year":"2019","journal-title":"ArXiv Preprint"},{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.1109\/IJCNN.2012.6252823"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.1109\/ROBIO.2017.8324787"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.1609\/aaai.v33i01.33012462"},{"doi-asserted-by":"publisher","key":"ref21","DOI":"10.1609\/aaai.v33i01.33013739"},{"doi-asserted-by":"publisher","key":"ref24","DOI":"10.1007\/978-3-030-01234-2_36"},{"doi-asserted-by":"publisher","key":"ref23","DOI":"10.1007\/BFb0020167"},{"key":"ref26","article-title":"Approximately optimal approximate reinforcement learning","author":"kakade","year":"2002","journal-title":"In Proc 19th International Conference on Machine Learning"},{"key":"ref25","article-title":"Sample efficient actor-critic with experience replay","author":"wang","year":"2016","journal-title":"ArXiv Preprint"}],"event":{"name":"2021 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2021,7,18]]},"location":"Shenzhen, China","end":{"date-parts":[[2021,7,22]]}},"container-title":["2021 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9533266\/9533267\/09533791.pdf?arnumber=9533791","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T15:46:18Z","timestamp":1652197578000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9533791\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,18]]},"references-count":27,"URL":"https:\/\/doi.org\/10.1109\/ijcnn52387.2021.9533791","relation":{},"subject":[],"published":{"date-parts":[[2021,7,18]]}}}