{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T16:58:50Z","timestamp":1777568330882,"version":"3.51.4"},"reference-count":54,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,10,1]],"date-time":"2023-10-01T00:00:00Z","timestamp":1696118400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,10,1]],"date-time":"2023-10-01T00:00:00Z","timestamp":1696118400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,10,1]]},"DOI":"10.1109\/iros55552.2023.10342038","type":"proceedings-article","created":{"date-parts":[[2023,12,13]],"date-time":"2023-12-13T19:17:55Z","timestamp":1702495075000},"page":"7553-7560","source":"Crossref","is-referenced-by-count":60,"title":["Imitation Is Not Enough: Robustifying Imitation with Reinforcement Learning for Challenging Driving Scenarios"],"prefix":"10.1109","author":[{"given":"Yiren","family":"Lu","sequence":"first","affiliation":[{"name":"Waymo Research"}]},{"given":"Justin","family":"Fu","sequence":"additional","affiliation":[{"name":"Waymo Research"}]},{"given":"George","family":"Tucker","sequence":"additional","affiliation":[{"name":"Google Research, Brain Team"}]},{"given":"Xinlei","family":"Pan","sequence":"additional","affiliation":[{"name":"Waymo Research"}]},{"given":"Eli","family":"Bronstein","sequence":"additional","affiliation":[{"name":"Waymo Research"}]},{"given":"Rebecca","family":"Roelofs","sequence":"additional","affiliation":[{"name":"Waymo Research"}]},{"given":"Benjamin","family":"Sapp","sequence":"additional","affiliation":[{"name":"Waymo Research"}]},{"given":"Brandyn","family":"White","sequence":"additional","affiliation":[{"name":"Waymo Research"}]},{"given":"Aleksandra","family":"Faust","sequence":"additional","affiliation":[{"name":"Google Research, Brain Team"}]},{"given":"Shimon","family":"Whiteson","sequence":"additional","affiliation":[{"name":"Waymo Research"}]},{"given":"Dragomir","family":"Anguelov","sequence":"additional","affiliation":[{"name":"Waymo Research"}]},{"given":"Sergey","family":"Levine","sequence":"additional","affiliation":[{"name":"Google Research, Brain Team"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Alvinn: An autonomous land vehicle in a neural network","volume":"1","author":"Pomerleau","year":"1988","journal-title":"Advances in neural information processing systems"},{"key":"ref2","article-title":"End to end learning for self-driving cars","author":"Bojarski","year":"2016","journal-title":"arXiv preprint"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460487"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC55140.2022.9921749"},{"key":"ref5","first-page":"627","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","volume-title":"Proceedings of the fourteenth international conference on artificial intelligence and statistics. JMLR Workshop and Conference Proceedings","author":"Ross","year":"2011"},{"key":"ref6","article-title":"Causal confusion in imitation learning","volume":"32","author":"De Haan","year":"2019","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref7","article-title":"Virtual to real re-inforcement learning for autonomous driving","author":"Pan","year":"2017","journal-title":"arXiv preprint"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_36"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01494"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2022.103829"},{"key":"ref11","article-title":"Embedding synthetic off-policy experience for autonomous driving via zero-shot curricula","volume-title":"6th Annual Conference on Robot Learning","author":"Bronstein","year":"2022"},{"key":"ref12","article-title":"Multipath: Multiple probabilistic anchor trajectory hypotheses for behavior prediction","author":"Chai","year":"2019","journal-title":"arXiv preprint"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00291"},{"key":"ref14","article-title":"Tra-jectron+i-: Multi-agent generative trajectory forecasting with het-erogeneous data for control","author":"Salzmann","year":"2020","journal-title":"arXiv preprint"},{"key":"ref15","first-page":"663","article-title":"Algorithms for inverse reinforcement learning","volume-title":"Proceedings of 17th International Conference on Machine Learning","author":"Ng","year":"2000"},{"key":"ref16","article-title":"Generative adversarial imitation learning","volume":"29","author":"Ho","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref17","article-title":"Model-based adversarial imitation learning","author":"Baram","year":"2016","journal-title":"arXiv preprint"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref19","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume":"abs11801.01290","author":"Haarnoja","year":"2018","journal-title":"CoRR"},{"key":"ref20","first-page":"1179","article-title":"Conservative q-learning for offline reinforcement learning","volume":"33","author":"Kumar","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref21","first-page":"20132","article-title":"A minimalist approach to offline rein-forcement learning","volume":"34","author":"Fujimoto","year":"2021","journal-title":"Advances in neural information processing systems"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2018.xiv.049"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01494"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9811576"},{"key":"ref26","article-title":"Wayformer: Motion forecasting via simple & efficient attention networks","author":"Nayakanti","year":"2022","journal-title":"arXiv preprint"},{"key":"ref27","article-title":"Titrated: Learned human driving behavior without infractions via amortized inference","author":"Lioutas","year":"2022","journal-title":"Transactions on Machine Learning Research"},{"key":"ref28","article-title":"Deep imitative models for flexible inference, planning, and control","author":"Rhinehart","year":"2018","journal-title":"arXiv preprint"},{"key":"ref29","article-title":"Gohome: Graph-oriented heatmap output for future motion estimation","author":"Gilles","year":"2021","journal-title":"arXiv preprint"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58536-5_32"},{"key":"ref31","article-title":"Scene transformer: A unified multi-task model for behavior prediction and planning","volume":"abs\/2106.08417","author":"Ngiam","year":"2021","journal-title":"CoRR"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793742"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8461233"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/IVS.2018.8500556"},{"key":"ref35","article-title":"Nocturne: a scalable driving benchmark for bringing multi-agent learning one step closer to the real world","author":"Vinitsky","year":"2022","journal-title":"arXiv preprint"},{"key":"ref36","article-title":"Drivergym: Democratising reinforcement learning for autonomous driving","author":"Kothari","year":"2021","journal-title":"arXiv preprint"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3190471"},{"key":"ref38","article-title":"Critic sequential monte carlo","author":"Lioutas","year":"2022","journal-title":"arXiv preprint"},{"key":"ref39","article-title":"nuplan: A closed-loop ml-based planning benchmark for autonomous vehicles","author":"Caesar","year":"2021","journal-title":"arXiv preprint"},{"key":"ref40","first-page":"1","article-title":"Carla: An open urban driving simulator","volume-title":"Conference on robot learning","author":"Dosovitskiy"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/2898363"},{"key":"ref42","volume-title":"An environment for autonomous driving decision-making","author":"Leurent","year":"2018"},{"key":"ref43","article-title":"Interaction dataset: An international, adversarial and cooperative motion dataset in interactive driving scenarios with semantic maps","author":"Zhan","year":"2019","journal-title":"arXiv preprint"},{"key":"ref44","article-title":"Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards","author":"Vecerik","year":"2017","journal-title":"arXiv preprint"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2018.xiv.049"},{"key":"ref46","article-title":"Safe, multi-agent, reinforcement learning for autonomous driving","author":"Shalev-Shwartz","year":"2016","journal-title":"ar Xiv preprint"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8206247"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4614-1433-9"},{"key":"ref49","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"International conference on machine learning","author":"Fujimoto"},{"key":"ref50","first-page":"4651","article-title":"Perceiver: General perception with iterative attention","volume-title":"International conference on machine learning","author":"Jaegle"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390199"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.7249\/RR1478"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11687"},{"key":"ref54","article-title":"Fingerprint policy optimisation for robust reinforcement learning","volume-title":"International Conference on Machine Learning","author":"Paul","year":"2019"}],"event":{"name":"2023 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","location":"Detroit, MI, USA","start":{"date-parts":[[2023,10,1]]},"end":{"date-parts":[[2023,10,5]]}},"container-title":["2023 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10341341\/10341342\/10342038.pdf?arnumber=10342038","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,20]],"date-time":"2023-12-20T00:15:00Z","timestamp":1703031300000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10342038\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,1]]},"references-count":54,"URL":"https:\/\/doi.org\/10.1109\/iros55552.2023.10342038","relation":{},"subject":[],"published":{"date-parts":[[2023,10,1]]}}}