{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T16:49:39Z","timestamp":1774025379501,"version":"3.50.1"},"reference-count":30,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Agency for Science, Technology and Research (A*STAR) under Advanced Manufacturing and Engineering (AME) Young Individual Research","award":["A2084c0156"],"award-info":[{"award-number":["A2084c0156"]}]},{"DOI":"10.13039\/501100001475","name":"Start-Up Grant, Nanyang Technological University, Singapore","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001475","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2024,1]]},"DOI":"10.1109\/tnnls.2022.3177685","type":"journal-article","created":{"date-parts":[[2022,6,10]],"date-time":"2022-06-10T20:25:40Z","timestamp":1654892740000},"page":"855-869","source":"Crossref","is-referenced-by-count":91,"title":["Prioritized Experience-Based Reinforcement Learning With Human Guidance for Autonomous Driving"],"prefix":"10.1109","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7336-4492","authenticated-orcid":false,"given":"Jingda","family":"Wu","sequence":"first","affiliation":[{"name":"School of Mechanical and Aerospace Engineering, Nanyang Technological University, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1592-7215","authenticated-orcid":false,"given":"Zhiyu","family":"Huang","sequence":"additional","affiliation":[{"name":"School of Mechanical and Aerospace Engineering, Nanyang Technological University, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7212-027X","authenticated-orcid":false,"given":"Wenhui","family":"Huang","sequence":"additional","affiliation":[{"name":"School of Mechanical and Aerospace Engineering, Nanyang Technological University, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6897-4512","authenticated-orcid":false,"given":"Chen","family":"Lv","sequence":"additional","affiliation":[{"name":"School of Mechanical and Aerospace Engineering, Nanyang Technological University, Singapore"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1126\/science.aar6404"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3071727"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8461233"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2020.3014599"},{"key":"ref5","volume-title":"Reinforcement Learning: An Introduction","author":"Sutton","year":"2018"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11796"},{"key":"ref7","article-title":"Proximal policy optimization algorithms","volume-title":"arXiv:1707.06347","author":"Schulman","year":"2017"},{"key":"ref8","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-019-0025-4"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1038\/nature14540"},{"key":"ref11","article-title":"Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards","volume-title":"arXiv:1707.08817","author":"Vecerik","year":"2017"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"ref13","first-page":"6611","article-title":"Guided exploration with proximal policy optimization using a single demonstration","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Libardi"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2016.2628365"},{"key":"ref16","first-page":"2285","article-title":"Interactive learning from policy-dependent human feedback","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"MacGlashan"},{"key":"ref17","first-page":"8022","article-title":"Reward learning from human preferences and demonstrations in Atari","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Ibarz"},{"key":"ref18","article-title":"Trial without error: Towards safe reinforcement learning via human intervention","volume-title":"17th Int. Conf. Auton. Agents MultiAgent Syst","author":"Saunders"},{"key":"ref19","article-title":"Intervention aided reinforcement learning for safe and practical policy optimization in navigation","volume-title":"Proc. 2nd Conf. Robot Learn., Mach. Learn. Res.","author":"Wang"},{"key":"ref20","article-title":"Making efficient use of demonstrations to solve hard exploration problems","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Gulcehre"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.aat1186"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.049"},{"key":"ref24","article-title":"Prioritized experience replay","volume-title":"arXiv:1511.05952","author":"Schaul","year":"2015"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2008.10.024"},{"key":"ref26","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fujimoto"},{"key":"ref27","first-page":"627","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","volume-title":"Proc. 14th Int. Conf. Artif. Intell. Statist., JMLR Workshop Conf.","author":"Ross"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/IVS.2012.6232131"},{"key":"ref29","first-page":"1","article-title":"CARLA: An open urban driving simulator","volume-title":"Proc. Conf. Robot Learn.","author":"Dosovitskiy"},{"key":"ref30","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","volume-title":"Proc. ICML","volume":"99","author":"Ng"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/10381493\/09793564.pdf?arnumber=9793564","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T00:13:38Z","timestamp":1705018418000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9793564\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,1]]},"references-count":30,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2022.3177685","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,1]]}}}