{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T18:41:47Z","timestamp":1773772907359,"version":"3.50.1"},"reference-count":82,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100002585","name":"Apollo-AIR Joint Research Center","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100002585","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,5,29]]},"DOI":"10.1109\/icra48891.2023.10160326","type":"proceedings-article","created":{"date-parts":[[2023,7,4]],"date-time":"2023-07-04T17:20:56Z","timestamp":1688491256000},"page":"7554-7561","source":"Crossref","is-referenced-by-count":66,"title":["ADAPT: Action-aware Driving Caption Transformer"],"prefix":"10.1109","author":[{"given":"Bu","family":"Jin","sequence":"first","affiliation":[{"name":"Institute of Automation,Chinese Academy of Sciences,China"}]},{"given":"Xinyu","family":"Liu","sequence":"additional","affiliation":[{"name":"Institute for AI Industry Research (AIR), Tsinghua University,China"}]},{"given":"Yupeng","family":"Zheng","sequence":"additional","affiliation":[{"name":"Institute of Automation,Chinese Academy of Sciences,China"}]},{"given":"Pengfei","family":"Li","sequence":"additional","affiliation":[{"name":"Tsinghua University,Department of Computer Science and Technology,China"}]},{"given":"Hao","family":"Zhao","sequence":"additional","affiliation":[{"name":"Institute for AI Industry Research (AIR), Tsinghua University,China"}]},{"given":"Tong","family":"Zhang","sequence":"additional","affiliation":[{"name":"Southern University of Science and Technology,China"}]},{"given":"Yuhang","family":"Zheng","sequence":"additional","affiliation":[{"name":"Institute for AI Industry Research (AIR), Tsinghua University,China"}]},{"given":"Guyue","family":"Zhou","sequence":"additional","affiliation":[{"name":"Institute for AI Industry Research (AIR), Tsinghua University,China"}]},{"given":"Jingjing","family":"Liu","sequence":"additional","affiliation":[{"name":"Institute for AI Industry Research (AIR), Tsinghua University,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/2752489.2752491"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2983149"},{"key":"ref3","first-page":"237","article-title":"Conditional affordance learning for driving in urban environments","volume-title":"Conference on Robot Learning","author":"Sauer","year":"2018"},{"key":"ref4","article-title":"Action-based representation learning for autonomous driving","author":"Xiao","year":"2020","journal-title":"arXiv preprint"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01530"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00718"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9341647"},{"key":"ref8","article-title":"End to end learning for self-driving cars","author":"Bojarski","year":"2016","journal-title":"arXiv preprint"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460487"},{"key":"ref10","article-title":"Driving policy transfer via modularity and abstraction","author":"M\u00fcller","year":"2018","journal-title":"arXiv preprint"},{"key":"ref11","article-title":"Alvinn: An autonomous land vehicle in a neural network","volume":"1","author":"Pomerleau","year":"1988","journal-title":"Advances in neural information processing systems"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561904"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00886"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01417"},{"key":"ref15","first-page":"66","article-title":"Learning by cheating","volume-title":"Conference on Robot Learning","author":"Chen","year":"2020"},{"key":"ref16","first-page":"3145","article-title":"Can autonomous vehicles identify, recover from, and adapt to distribution shifts?","volume-title":"International Conference on Machine Learning","author":"Filos","year":"2020"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9340641"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9340639"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00942"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/JSEN.2020.3003121"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01131"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01178"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2020.3013234"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.320"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01499"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58568-6_12"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561334"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01580"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58589-1_10"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58592-1_25"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9811901"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.337"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33863-2_37"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1023\/A:1020346032608"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.61"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123420"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3120867"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/3446792"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.497"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.111"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1117"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.515"},{"key":"ref43","article-title":"Translating videos to natural language using deep recurrent neural networks","author":"Venugopalan","year":"2014","journal-title":"arXiv preprint"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.512"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.496"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/164"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01088"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01329"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240667"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/88"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52729.2023.01838"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00500"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01670-0"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01607"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3197984"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1145\/3552482.3556555"},{"key":"ref59","article-title":"Toist: Task oriented instance segmentation transformer with noun-pronoun distillation","author":"Li","year":"2022","journal-title":"arXiv preprint"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3143224"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-019-01263-4"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.99"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01742"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1561\/9781680836899"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2020.3043505"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.abm4183"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.aay4663"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01216-8_35"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01903"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.330"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794214"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00376"},{"key":"ref73","first-page":"947","article-title":"Intentnet: Learning to predict intention from raw sensor data","volume-title":"Conference on Robot Learning","author":"Casas","year":"2018"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00320"},{"key":"ref75","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018","journal-title":"arXiv preprint"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"ref77","first-page":"65","article-title":"Meteor: An automatic metric for mt evaluation with improved correlation with human judgments","volume-title":"Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization","author":"Banerjee"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.3115\/1218955.1219032"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00271"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.29007\/h68j"},{"key":"ref82","first-page":"1","article-title":"Carla: An open urban driving simulator","volume-title":"Conference on robot learning","author":"Dosovitskiy","year":"2017"}],"event":{"name":"2023 IEEE International Conference on Robotics and Automation (ICRA)","location":"London, United Kingdom","start":{"date-parts":[[2023,5,29]]},"end":{"date-parts":[[2023,6,2]]}},"container-title":["2023 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10160211\/10160212\/10160326.pdf?arnumber=10160326","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,1]],"date-time":"2024-03-01T11:36:33Z","timestamp":1709292993000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10160326\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,29]]},"references-count":82,"URL":"https:\/\/doi.org\/10.1109\/icra48891.2023.10160326","relation":{},"subject":[],"published":{"date-parts":[[2023,5,29]]}}}