{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,15]],"date-time":"2026-04-15T18:34:32Z","timestamp":1776278072091,"version":"3.50.1"},"reference-count":52,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62201484"],"award-info":[{"award-number":["62201484"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"HKU Startup Fund"},{"name":"HKU Seed Fund for Basic Research"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2024,10]]},"DOI":"10.1109\/lra.2024.3440097","type":"journal-article","created":{"date-parts":[[2024,8,7]],"date-time":"2024-08-07T13:57:34Z","timestamp":1723039054000},"page":"8186-8193","source":"Crossref","is-referenced-by-count":293,"title":["DriveGPT4: Interpretable End-to-End Autonomous Driving Via Large Language Model"],"prefix":"10.1109","volume":"9","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0700-2335","authenticated-orcid":false,"given":"Zhenhua","family":"Xu","sequence":"first","affiliation":[{"name":"The University of Hong Kong, Hong Kong, SAR, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-3884-9730","authenticated-orcid":false,"given":"Yujia","family":"Zhang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6890-1049","authenticated-orcid":false,"given":"Enze","family":"Xie","sequence":"additional","affiliation":[{"name":"Huawei Noah&#x0027;s Ark Lab, Montreal, QC, Canada"}]},{"given":"Zhen","family":"Zhao","sequence":"additional","affiliation":[{"name":"University of Sydney, Camperdown, NSW, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3444-4588","authenticated-orcid":false,"given":"Yong","family":"Guo","sequence":"additional","affiliation":[{"name":"Huawei Noah&#x0027;s Ark Lab, Montreal, QC, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8560-9007","authenticated-orcid":false,"given":"Kwan-Yee K.","family":"Wong","sequence":"additional","affiliation":[{"name":"The University of Hong Kong, Hong Kong, SAR, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8492-3069","authenticated-orcid":false,"given":"Zhenguo","family":"Li","sequence":"additional","affiliation":[{"name":"Huawei Noah&#x0027;s Ark Lab, Montreal, QC, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8277-2706","authenticated-orcid":false,"given":"Hengshuang","family":"Zhao","sequence":"additional","affiliation":[{"name":"The University of Hong Kong, Hong Kong, SAR, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/MRA.2020.3045040"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.3390\/electronics11142162"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.660"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00169"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161508"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3264723"},{"key":"ref7","article-title":"Insightmapper: A closer look at inner-instance information for vectorized high-definition mapping","author":"Xu","year":"2023"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00700"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01712"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3435937"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1215"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01084"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3431437"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160326"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00110"},{"key":"ref16","first-page":"4171","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","volume-title":"Proc. 2019 Conf. North Amer. Chapter Assoc. Comput. Linguistics: Hum. Lang. Technol.","author":"Devlin","year":"2018"},{"key":"ref17","article-title":"Improving language understanding by generative pre-training","author":"Radford","year":"2018","journal-title":"OpenAI Blog"},{"key":"ref19","article-title":"Llama: Open and efficient foundation language models","author":"Touvron","year":"2023"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01069"},{"key":"ref21","first-page":"12888","article-title":"BLIP: Bootstrapping language-image pre-training for unified vision-language understanding and generation","volume-title":"Proc. 39th Int. Conf. Mach. Learn.","author":"Li","year":"2022"},{"key":"ref22","article-title":"Palm-e: An embodied multimodal language model","author":"Driess","year":"2023"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160591"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.7759\/cureus.39305"},{"key":"ref25","article-title":"LlaVA-Med: Training a large language-and-vision assistant for biomedicine in one day","volume-title":"Proc. Neural Inf. Process. Syst.","author":"Li","year":"2024"},{"key":"ref26","article-title":"Minigpt-4: Enhancing vision-language understanding with advanced large language models","author":"Zhu","year":"2023"},{"key":"ref27","first-page":"34892","article-title":"Visual instruction tuning","volume-title":"Proc. Neural Inf. Process. Syst.","author":"Liu","year":"2023"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01216-8_35"},{"key":"ref29","article-title":"End to end learning for self-driving cars","author":"Bojarski","year":"2016"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2020.3013234"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.320"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561334"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9811901"},{"issue":"240","key":"ref35","first-page":"1","article-title":"Palm: Scaling language modeling with pathways","volume":"24","author":"Chowdhery","year":"2023","journal-title":"J. Mach. Learn. Res."},{"key":"ref36","article-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023"},{"key":"ref37","article-title":"Instruction tuning with GPT-4","author":"Peng","year":"2023"},{"key":"ref38","first-page":"19730","article-title":"Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Li","year":"2023"},{"key":"ref39","article-title":"Valley: Video assistant with large language model enhanced ability","author":"Luo","year":"2023"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-demo.49"},{"key":"ref41","article-title":"Chatvideo: A tracklet-centric multimodal and versatile video understanding system","author":"Wang","year":"2023"},{"key":"ref42","article-title":"Videochat: Chat-centric video understanding","author":"Li","year":"2023"},{"key":"ref43","article-title":"Rt-2: Vision-language-action models transfer web knowledge to robotic control","author":"Brohan","year":"2023"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/WACVW60836.2024.00102"},{"key":"ref45","article-title":"Language prompt for autonomous driving","author":"Wu","year":"2023"},{"key":"ref46","article-title":"Drivelm: Driving with graph visual question answering","author":"Sima","year":"2023"},{"key":"ref47","article-title":"Real-time flying object detection with YOLOv8","author":"Reis","year":"2023"},{"key":"ref48","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Radford","year":"2021"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00175"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"ref51","first-page":"311","article-title":"Bleu: A method for automatic evaluation of machine translation","volume-title":"Proc. 40th Annu. Meeting Assoc. Comput. Linguistics","author":"Papineni","year":"2002"},{"key":"ref52","first-page":"74","article-title":"Rouge: A package for automatic evaluation of summaries","author":"Lin","year":"2004","journal-title":"Text Summarization Branches Out"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01164"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7083369\/10638067\/10629039.pdf?arnumber=10629039","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T06:03:44Z","timestamp":1769493824000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10629039\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10]]},"references-count":52,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/lra.2024.3440097","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"value":"2377-3766","type":"electronic"},{"value":"2377-3774","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10]]}}}