{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,24]],"date-time":"2026-01-24T15:00:50Z","timestamp":1769266850116,"version":"3.49.0"},"reference-count":47,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"11","license":[{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"RIAIoT Project: A Human-Guided Digital Twin Paradigm for the Training of Connected Autonomous Vehicles","award":["P0050293"],"award-info":[{"award-number":["P0050293"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Intell. Transport. Syst."],"published-print":{"date-parts":[[2025,11]]},"DOI":"10.1109\/tits.2025.3591239","type":"journal-article","created":{"date-parts":[[2025,7,31]],"date-time":"2025-07-31T18:32:45Z","timestamp":1753986765000},"page":"19493-19507","source":"Crossref","is-referenced-by-count":4,"title":["Toward Multi-Task Generalization in Autonomous Navigation: A Human-in-the-Loop Adversarial Reinforcement Learning With Diffusion Policy"],"prefix":"10.1109","volume":"26","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7172-8152","authenticated-orcid":false,"given":"Dong","family":"Hu","sequence":"first","affiliation":[{"name":"Department of Industrial and Systems Engineering, The Hong Kong Polytechnic University, Kowloon, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3023-4388","authenticated-orcid":false,"given":"Chao","family":"Huang","sequence":"additional","affiliation":[{"name":"Department of Industrial and Systems Engineering, The Hong Kong Polytechnic University, Kowloon, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7336-4492","authenticated-orcid":false,"given":"Jingda","family":"Wu","sequence":"additional","affiliation":[{"name":"School of Mechanical Engineering, Beijing Institute of Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5056-171X","authenticated-orcid":false,"given":"Xin","family":"Yuan","sequence":"additional","affiliation":[{"name":"School of Electrical and Mechanical Engineering, The University of Adelaide, Adelaide, SA, Australia"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.ado1010"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610665"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC58415.2024.10919536"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-061623-094742"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/MCS.2022.3187542"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3314762"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.adg1462"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/tte.2025.3590199"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2023.3259688"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2023.3257549"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2023.3312453"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/jiot.2025.3588176"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3261988"},{"key":"ref14","article-title":"Maximum entropy reinforcement learning with diffusion policy","author":"Dong","year":"2025","journal-title":"arXiv:2502.11612"},{"key":"ref15","first-page":"35550","article-title":"Imitating human behaviour with diffusion models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Pearce"},{"key":"ref16","first-page":"35162","article-title":"Diffusion policies as an expressive policy class for offline reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Wang"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.52202\/079017-1708"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793698"},{"key":"ref19","first-page":"1001","article-title":"Trial without error: Towards safe reinforcement learning via human intervention","volume-title":"Proc. 21st Int. Conf. Artif. Intell. Statist.","author":"Saunders"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2020.xvi.055"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3177685"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2024.3384992"},{"key":"ref23","article-title":"Pre-trained transformer-enabled strategies with human-guided fine-tuning for end-to-end navigation of autonomous vehicles","author":"Hu","year":"2024","journal-title":"arXiv:2402.12666"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2024.3524609"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2024.3420959"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/j.commtr.2024.100127"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3054625"},{"key":"ref28","first-page":"46323","article-title":"Synthetic experience replay","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Lu"},{"key":"ref29","first-page":"38245","article-title":"Reasoning with latent diffusion in offline reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Venkatraman"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3626235"},{"key":"ref31","first-page":"35528","article-title":"Planning with diffusion for flexible behavior synthesis","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"J\u00e4nner"},{"key":"ref32","article-title":"Diffusion models for reinforcement learning: A survey","author":"Zhu","year":"2023","journal-title":"arXiv:2311.01223"},{"key":"ref33","first-page":"38431","article-title":"Policy representation via diffusion probability model for reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Yang"},{"key":"ref34","first-page":"41163","article-title":"Learning a diffusion model policy from rewards via Q-score matching","volume-title":"Proc. 41st Int. Conf. Mach. Learn.","volume":"235","author":"Psenka"},{"issue":"12","key":"ref35","first-page":"22648","article-title":"A survey of deep learning-based visual navigation for autonomous robots","volume":"23","author":"Yang","year":"2022","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"ref36","article-title":"Position-based deep reinforcement learning for forest robot navigation using depth images","volume":"155","author":"Martini","year":"2022","journal-title":"Robot. Auto. Syst."},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2023.111358"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2024.3412638"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2025.3577523"},{"key":"ref40","first-page":"12077","article-title":"SegFormer: Simple and efficient design for semantic segmentation with transformers","volume-title":"Proc. Adv. Neural Inf. Process. Sys. (NIPS)","volume":"34","author":"Xie"},{"key":"ref41","first-page":"23272","article-title":"Tackling the generative learning trilemma with denoising diffusion GANs","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Xiao"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-58347-1_10"},{"key":"ref43","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja"},{"key":"ref44","first-page":"20132","article-title":"A minimalist approach to offline reinforcement learning","volume-title":"Proc. Int. Conf. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Fujimoto"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"ref46","first-page":"9621","article-title":"Learning to walk in the real world with minimal human effort","volume-title":"Proc. IEEE Int. Conf. Robot. Autom. (ICRA)","author":"Ha"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2016.11.007"}],"container-title":["IEEE Transactions on Intelligent Transportation Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6979\/11241045\/11106367.pdf?arnumber=11106367","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T18:43:16Z","timestamp":1762972996000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11106367\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11]]},"references-count":47,"journal-issue":{"issue":"11"},"URL":"https:\/\/doi.org\/10.1109\/tits.2025.3591239","relation":{},"ISSN":["1524-9050","1558-0016"],"issn-type":[{"value":"1524-9050","type":"print"},{"value":"1558-0016","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11]]}}}