{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T16:21:31Z","timestamp":1773332491458,"version":"3.50.1"},"reference-count":377,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100019033","name":"Key-Area Research and Development Program of Guangdong Province","doi-asserted-by":"publisher","award":["2023B0909040001"],"award-info":[{"award-number":["2023B0909040001"]}],"id":[{"id":"10.13039\/501100019033","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Shenzhen Science and Technology Program","award":["KJZD20231023100304010"],"award-info":[{"award-number":["KJZD20231023100304010"]}]},{"DOI":"10.13039\/501100013114","name":"National Key Research and Development Program","doi-asserted-by":"publisher","award":["2022YFB2503203"],"award-info":[{"award-number":["2022YFB2503203"]}],"id":[{"id":"10.13039\/501100013114","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Intell. Transport. Syst."],"published-print":{"date-parts":[[2026,1]]},"DOI":"10.1109\/tits.2025.3624830","type":"journal-article","created":{"date-parts":[[2025,11,3]],"date-time":"2025-11-03T18:46:33Z","timestamp":1762195593000},"page":"119-144","source":"Crossref","is-referenced-by-count":1,"title":["Delving Into the Secrets of BEV 3D Object Detection in Autonomous Driving: A Comprehensive Survey"],"prefix":"10.1109","volume":"27","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-5494-0220","authenticated-orcid":false,"given":"Haoyu","family":"Li","sequence":"first","affiliation":[{"name":"National Engineering Research Center for Electric Vehicles, Beijing Institute of Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2067-7840","authenticated-orcid":false,"given":"Yueran","family":"Zhao","sequence":"additional","affiliation":[{"name":"National Engineering Research Center for Electric Vehicles, Beijing Institute of Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-5701-1720","authenticated-orcid":false,"given":"Jiaru","family":"Zhong","sequence":"additional","affiliation":[{"name":"National Engineering Research Center for Electric Vehicles, Beijing Institute of Technology, Beijing, China"}]},{"given":"Bo","family":"Wang","sequence":"additional","affiliation":[{"name":"National Engineering Research Center for Electric Vehicles, Beijing Institute of Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9324-0892","authenticated-orcid":false,"given":"Chao","family":"Sun","sequence":"additional","affiliation":[{"name":"National Engineering Research Center for Electric Vehicles, Beijing Institute of Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9524-9367","authenticated-orcid":false,"given":"Fengchun","family":"Sun","sequence":"additional","affiliation":[{"name":"National Engineering Research Center for Electric Vehicles, Beijing Institute of Technology, Beijing, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-023-01790-1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3333838"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3264658"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2024.3439557"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3307157"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/IV55152.2023.10186647"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3005434"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3346386"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.3390\/s22114208"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2024.3380244"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2024.3432634"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/tiv.2024.3395783"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/MITS.2023.3298534"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/jproc.2025.3600903"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/MITS.2023.3283864"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2022.3153815"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2024.3436012"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3122865"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20077-9_1"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.352"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2018.00141"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2018.8569552"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00252"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00895"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00271"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.350"},{"key":"ref29","article-title":"One million scenes for autonomous driving: ONCE dataset","author":"Mao","year":"2021","journal-title":"arXiv:2106.11037"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3192802"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02067"},{"key":"ref32","article-title":"BAAI-VANJEE roadside dataset: Towards the connected automated vehicle highway technologies in challenging environments of China","author":"Yongqiang","year":"2021","journal-title":"arXiv:2105.14370"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02065"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9811699"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/IV51971.2022.9827401"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-26348-4_29"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9812038"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19842-7_7"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01318"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00531"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i6.28370"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC57777.2023.10422289"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/IV55152.2023.10186693"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02109"},{"key":"ref45","article-title":"Multi-V2X: A large scale multi-modal multi-penetration-rate dataset for cooperative perception","author":"Li","year":"2024","journal-title":"arXiv:2409.04980"},{"key":"ref46","article-title":"V2X-real: A large-scale dataset for vehicle-to-everything cooperative perception","author":"Xiang","year":"2024","journal-title":"arXiv:2403.16034"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02139"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52733.2024.02089"},{"key":"ref49","article-title":"V2X-radar: A multi-modal dataset with 4D radar for cooperative perception","author":"Yang","year":"2024","journal-title":"arXiv:2411.10962"},{"key":"ref50","article-title":"Mixed signals: A diverse point cloud dataset for heterogeneous LiDAR V2X collaboration","author":"Luo","year":"2025","journal-title":"arXiv:2502.14156"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-025-06255-3"},{"key":"ref52","first-page":"1","article-title":"CARLA: An open urban driving simulator","volume-title":"Proc. 1st Annu. Conf. Robot Learn.","author":"Dosovitskiy"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-67361-5_40"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8968102"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC45102.2020.9294422"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00140"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01712"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i9.33040"},{"key":"ref59","article-title":"NuPlan: A closed-loop ML-based planning benchmark for autonomous vehicles","author":"Caesar","year":"2021","journal-title":"arXiv:2106.11810"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.52202\/079017-0025"},{"key":"ref61","article-title":"DriveArena: A closed-loop generative simulation platform for autonomous driving","author":"Yang","year":"2024","journal-title":"arXiv:2408.00415"},{"key":"ref62","volume-title":"MMDetection3D: OpenMMLab Next-Generation Platform for General 3D Object Detection","year":"2020"},{"issue":"1","key":"ref63","first-page":"105","article-title":"PaddlePaddle: An open-source deep learning platform from industrial practice","volume":"1","author":"Ma","year":"2019","journal-title":"Frontiers Data Domputing"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73195-2_4"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20080-9_3"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3479222"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01830"},{"key":"ref68","article-title":"BEVDet: High-performance multi-camera 3D object detection in bird-eye-view","author":"Huang","year":"2021","journal-title":"arXiv:2112.11790"},{"key":"ref69","article-title":"PETRv2: A unified framework for 3D perception from multi-camera images","author":"Liu","year":"2022","journal-title":"arXiv:2206.01256"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i2.25234"},{"key":"ref71","article-title":"BEVFormer v2: Adapting modern image backbones to bird\u2019s-eye-view recognition via perspective supervision","author":"Yang","year":"2022","journal-title":"arXiv:2211.10439"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00335"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72907-2_17"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73347-5_9"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72967-6_12"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW67362.2025.00236"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i9.33080"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01161"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02076"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01296"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.52202\/079017-2590"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.52202\/079017-0435"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00832"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00139"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01467"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58568-6_12"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00845"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3025077"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i2.25233"},{"key":"ref90","article-title":"BEVDet4D: Exploit temporal cues in multi-camera 3D object detection","author":"Huang","year":"2022","journal-title":"arXiv:2203.17054"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.236"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58571-6_1"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19812-0_31"},{"key":"ref94","article-title":"MatrixVT: Efficient multi-camera to BEV transformation for 3D perception","author":"Zhou","year":"2022","journal-title":"arXiv:2211.10593"},{"key":"ref95","article-title":"PointNet: Deep learning on point sets for 3D classification and segmentation","author":"Qi","year":"2016","journal-title":"arXiv:1612.00593"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1706.02413"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00086"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00472"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.3390\/s18103337"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01298"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01685"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73001-6_5"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01437"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2577031"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01105"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00798"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-32248-9_23"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.106"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.89"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00315"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00738"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.1109\/tcsvt.2025.3633725"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19839-7_29"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01413"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01170"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9196884"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW50498.2020.00058"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2021.3061270"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1177\/02783649211068535"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.23919\/FUSION49465.2021.9627037"},{"key":"ref122","article-title":"AiMotive dataset: A multimodal dataset for robust autonomous driving with long-range perception","author":"Matuszka","year":"2022","journal-title":"arXiv:2211.09445"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1177\/02783649231160195"},{"key":"ref124","first-page":"129","article-title":"Automotive radar dataset for deep learning based 3D object detection","volume-title":"Proc. 16th Eur. Radar Conf. (EuRAD)","author":"Meyer"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01651"},{"key":"ref126","first-page":"3819","article-title":"K-radar: 4D radar object detection for autonomous driving in various weather conditions","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Paek"},{"key":"ref127","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3147324"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC55140.2022.9922539"},{"key":"ref129","article-title":"Dual radar: A multi-modal dataset with dual 4D radar for autonomous driving","author":"Zhang","year":"2023","journal-title":"arXiv:2310.07602"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2024.3415772"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3307005"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC57777.2023.10422606"},{"key":"ref133","doi-asserted-by":"publisher","DOI":"10.1109\/TAES.1983.309350"},{"key":"ref134","article-title":"ADCNet: Learning from raw radar data via distillation","author":"Yang","year":"2023","journal-title":"arXiv:2303.11420"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i2.25270"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00157"},{"key":"ref137","first-page":"1","article-title":"Echoes beyond points: Unleashing the power of raw radar data in multi-modality fusion","volume-title":"Proc. NeurIPS","author":"Liu"},{"key":"ref138","doi-asserted-by":"publisher","DOI":"10.1109\/JSEN.2022.3219643"},{"key":"ref139","first-page":"493","article-title":"MVFAN: Multi-view feature assisted network for 4D radar object detection","volume-title":"Proc. Int. Conf. Neural Inf. Process.","author":"Qiao"},{"key":"ref140","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC48978.2021.9564754"},{"key":"ref141","doi-asserted-by":"publisher","DOI":"10.3390\/s21113854"},{"key":"ref142","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3322729"},{"key":"ref143","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00026"},{"key":"ref144","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2007.364024"},{"key":"ref145","article-title":"Fisheye camera and ultrasonic sensor fusion for near-field obstacle perception in bird\u2019s-eye-view","author":"Das","year":"2024","journal-title":"arXiv:2402.00637"},{"key":"ref146","doi-asserted-by":"publisher","DOI":"10.1109\/LSENS.2023.3345170"},{"key":"ref147","doi-asserted-by":"publisher","DOI":"10.1088\/1757-899X\/571\/1\/012114"},{"key":"ref148","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-32-9244-4_1"},{"key":"ref149","doi-asserted-by":"publisher","DOI":"10.1109\/TIM.2021.3130202"},{"key":"ref150","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2023.3268063"},{"key":"ref151","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.649"},{"key":"ref152","doi-asserted-by":"publisher","DOI":"10.1109\/TCI.2024.3369398"},{"key":"ref153","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00466"},{"key":"ref154","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58583-9_43"},{"key":"ref155","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC48978.2021.9564951"},{"key":"ref156","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01162"},{"key":"ref157","first-page":"16494","article-title":"Multimodal virtual point 3D detection","volume-title":"Proc. Annu. Conf. Neural Inf. Process. Syst. (NIPS)","author":"Yin"},{"key":"ref158","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00022"},{"key":"ref159","first-page":"10421","article-title":"BEVFusion: A simple and robust LiDAR-camera fusion framework","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Liang"},{"key":"ref160","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160968"},{"key":"ref161","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00116"},{"key":"ref162","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02073"},{"key":"ref163","first-page":"1992","article-title":"DeepInteraction: 3D object detection via modality interaction","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Yang"},{"key":"ref164","first-page":"18442","article-title":"Unifying voxel-based representation with transformer for 3D object detection","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Li"},{"key":"ref165","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00119"},{"key":"ref166","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/116"},{"key":"ref167","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20074-8_36"},{"key":"ref168","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00625"},{"key":"ref169","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01613"},{"key":"ref170","article-title":"BEVFusion4D: Learning LiDAR-camera fusion under bird\u2019s-eye-view via cross-modality guidance and temporal aggregation","author":"Cai","year":"2023","journal-title":"arXiv:2303.17099"},{"key":"ref171","article-title":"MmFUSION: Multimodal fusion for 3D objects detection","author":"Ahmad","year":"2023","journal-title":"arXiv:2311.04058"},{"key":"ref172","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2020.103955"},{"key":"ref173","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01675"},{"key":"ref174","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00311"},{"key":"ref175","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3306361"},{"key":"ref176","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73347-5_20"},{"key":"ref177","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS47924.2020.00029"},{"key":"ref178","first-page":"1","article-title":"RoboFusion: Towards robust multi-modal 3D object detection via SAM","volume-title":"Proc. IJCAI","author":"Song"},{"key":"ref179","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01412"},{"key":"ref180","article-title":"MambaFusion: Height-fidelity dense global fusion for multi-modal 3D object detection","author":"Wang","year":"2025","journal-title":"arXiv:2507.04369"},{"key":"ref181","doi-asserted-by":"publisher","DOI":"10.1109\/tits.2025.3589581"},{"key":"ref182","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW67362.2025.00363"},{"key":"ref183","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794195"},{"key":"ref184","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01667"},{"key":"ref185","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9341791"},{"key":"ref186","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00380"},{"key":"ref187","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.691"},{"key":"ref188","doi-asserted-by":"publisher","DOI":"10.1109\/dicta63115.2024.00064"},{"key":"ref189","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73242-3_7"},{"key":"ref190","article-title":"BEVDistill: Cross-modal BEV distillation for multi-view 3D object detection","author":"Chen","year":"2022","journal-title":"arXiv:2211.09386"},{"key":"ref191","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01656"},{"key":"ref192","doi-asserted-by":"publisher","DOI":"10.1109\/PRML59573.2023.10348338"},{"key":"ref193","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3414835"},{"key":"ref194","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01282"},{"key":"ref195","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01140"},{"key":"ref196","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01615"},{"key":"ref197","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161329"},{"key":"ref198","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3331972"},{"key":"ref199","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3059674"},{"key":"ref200","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10802147"},{"key":"ref201","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC48978.2021.9564904"},{"key":"ref202","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3240287"},{"key":"ref203","doi-asserted-by":"publisher","DOI":"10.1109\/TIM.2023.3280525"},{"key":"ref204","doi-asserted-by":"publisher","DOI":"10.1109\/IV55156.2024.10588781"},{"key":"ref205","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3329708"},{"key":"ref206","article-title":"Robust 3D object detection from LiDAR-radar point clouds via cross-modal feature augmentation","author":"Deng","year":"2023","journal-title":"arXiv:2309.17336"},{"key":"ref207","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i1.25198"},{"key":"ref208","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2023.3328953"},{"key":"ref209","doi-asserted-by":"publisher","DOI":"10.1186\/s13634-022-00944-6"},{"key":"ref210","article-title":"HVDetFusion: A simple and robust camera-radar fusion framework","author":"Lei","year":"2023","journal-title":"arXiv:2307.11323"},{"key":"ref211","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10341793"},{"key":"ref212","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10611449"},{"key":"ref213","article-title":"RCBEVDet++: Toward high-accuracy radar-camera fusion 3D perception network","author":"Lin","year":"2024","journal-title":"arXiv:2409.04979"},{"key":"ref214","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01465"},{"key":"ref215","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01414"},{"key":"ref216","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01592"},{"key":"ref217","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58523-5_29"},{"key":"ref218","doi-asserted-by":"publisher","DOI":"10.3390\/app11125598"},{"key":"ref219","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01287"},{"key":"ref220","doi-asserted-by":"publisher","DOI":"10.1109\/IV47402.2020.9304655"},{"key":"ref221","doi-asserted-by":"publisher","DOI":"10.1109\/JSEN.2022.3154980"},{"key":"ref222","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3193465"},{"key":"ref223","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3321240"},{"key":"ref224","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i3.32328"},{"key":"ref225","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2025.3554313"},{"key":"ref226","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2025.3536840"},{"key":"ref227","article-title":"CVFusion: Cross-view fusion of 4D radar and camera for 3D object detection","author":"Zhong","year":"2025","journal-title":"arXiv:2507.04587"},{"key":"ref228","doi-asserted-by":"publisher","DOI":"10.3390\/rs15184433"},{"key":"ref229","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00051"},{"key":"ref230","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00099"},{"key":"ref231","doi-asserted-by":"publisher","DOI":"10.1109\/RTCSA62462.2024.00014"},{"key":"ref232","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9982123"},{"key":"ref233","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2022.3230265"},{"key":"ref234","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610775"},{"key":"ref235","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-91767-7_13"},{"key":"ref236","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10887748"},{"key":"ref237","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i4.32397"},{"key":"ref238","article-title":"MoRAL: Motion-aware multi-frame 4D radar and LiDAR fusion for robust 3D object detection","author":"Peng","year":"2025","journal-title":"arXiv:2505.09422"},{"key":"ref239","article-title":"CenterRadarNet: Joint 3D object detection and tracking framework using 4D FMCW radar","author":"Cheng","year":"2023","journal-title":"arXiv:2311.01423"},{"key":"ref240","doi-asserted-by":"publisher","DOI":"10.3390\/s19204357"},{"key":"ref241","doi-asserted-by":"publisher","DOI":"10.1109\/ICCPS54341.2022.00013"},{"key":"ref242","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02103"},{"key":"ref243","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9981778"},{"key":"ref244","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160831"},{"key":"ref245","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00901"},{"key":"ref246","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2019.00058"},{"key":"ref247","doi-asserted-by":"publisher","DOI":"10.1145\/3318216.3363300"},{"key":"ref248","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00416"},{"key":"ref249","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58536-5_36"},{"key":"ref250","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2020.3028424"},{"key":"ref251","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2021.3053184"},{"key":"ref252","first-page":"29541","article-title":"Learning distilled collaboration graph for multi-agent perception","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Li"},{"key":"ref253","first-page":"1","article-title":"Flow-based feature fusion for vehicle-infrastructure cooperative 3D object detection","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Yu"},{"key":"ref254","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3143299"},{"key":"ref255","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3310580"},{"key":"ref256","article-title":"CoBEVT: Cooperative bird\u2019s eye view semantic segmentation with sparse transformers","author":"Xu","year":"2022","journal-title":"arXiv:2207.02202"},{"key":"ref257","first-page":"4874","article-title":"Where2comm: Communication-efficient collaborative perception via spatial confidence maps","volume-title":"Proc. 36th Conf. Neural Inf. Process. Syst.","author":"Hu"},{"key":"ref258","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3339399"},{"key":"ref259","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC57777.2023.10421963"},{"key":"ref260","doi-asserted-by":"publisher","DOI":"10.1109\/WCMEIM56910.2022.10021459"},{"key":"ref261","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00124"},{"key":"ref262","article-title":"VIMI: Vehicle-infrastructure multi-view intermediate fusion for camera-based 3D object detection","author":"Wang","year":"2023","journal-title":"arXiv:2303.10975"},{"key":"ref263","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00892"},{"key":"ref264","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3282567"},{"key":"ref265","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00800"},{"key":"ref266","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00033"},{"key":"ref267","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00334"},{"key":"ref268","article-title":"An extensible framework for open heterogeneous collaborative perception","author":"Lu","year":"2024","journal-title":"arXiv:2401.13964"},{"key":"ref269","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2023.3314919"},{"key":"ref270","doi-asserted-by":"publisher","DOI":"10.3390\/s24134101"},{"key":"ref271","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3368404"},{"key":"ref272","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2025.3560738"},{"key":"ref273","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA55743.2025.11128057"},{"key":"ref274","doi-asserted-by":"publisher","DOI":"10.1109\/TIM.2025.3548801"},{"key":"ref275","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM55648.2025.11044682"},{"key":"ref276","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.02077"},{"key":"ref277","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2024.110939"},{"key":"ref278","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2025.3531145"},{"key":"ref279","doi-asserted-by":"publisher","DOI":"10.1109\/IVS.2018.8500388"},{"key":"ref280","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73021-4_8"},{"key":"ref281","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2023.3322764"},{"key":"ref282","doi-asserted-by":"publisher","DOI":"10.1109\/IV55156.2024.10588608"},{"key":"ref283","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3310674"},{"key":"ref284","article-title":"CoopTrack: Exploring end-to-end learning for efficient cooperative sequential perception","author":"Zhong","year":"2025","journal-title":"arXiv:2507.19239"},{"key":"ref285","doi-asserted-by":"publisher","DOI":"10.1109\/ICII.2019.00055"},{"key":"ref286","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160546"},{"key":"ref287","doi-asserted-by":"publisher","DOI":"10.1109\/CCNC51644.2023.10060139"},{"key":"ref288","doi-asserted-by":"publisher","DOI":"10.1109\/IV55152.2023.10186723"},{"key":"ref289","doi-asserted-by":"publisher","DOI":"10.1109\/AIoTSys58602.2023.00058"},{"key":"ref290","doi-asserted-by":"publisher","DOI":"10.1109\/ICIVC58118.2023.10269906"},{"key":"ref291","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC55140.2022.9921947"},{"key":"ref292","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2023.3272027"},{"key":"ref293","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02070"},{"key":"ref294","article-title":"BEVHeight++: Toward robust visual centric 3D object detection","author":"Yang","year":"2023","journal-title":"arXiv:2309.16179"},{"key":"ref295","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610214"},{"key":"ref296","doi-asserted-by":"publisher","DOI":"10.23919\/JCC.2021.07.001"},{"key":"ref297","doi-asserted-by":"publisher","DOI":"10.1109\/IVS.2019.8813886"},{"key":"ref298","first-page":"1022","article-title":"BM2CP: Efficient collaborative perception with LiDAR-camera modalities","volume-title":"Proc. Conf. Robot Learn.","author":"Zhao"},{"key":"ref299","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3293954"},{"key":"ref300","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2024.3387700"},{"key":"ref301","article-title":"Optimizing LiDAR placements for robust driving perception in adverse conditions","author":"Li","journal-title":"arXiv:2403.17009"},{"key":"ref302","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161027"},{"key":"ref303","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01685"},{"key":"ref304","doi-asserted-by":"publisher","DOI":"10.1016\/j.autcon.2022.104629"},{"key":"ref305","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160871"},{"key":"ref306","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611948"},{"key":"ref307","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2024.3374168"},{"key":"ref308","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00139"},{"key":"ref309","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC58415.2024.10920140"},{"key":"ref310","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197364"},{"key":"ref311","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01466"},{"key":"ref312","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-97-3682-9_30"},{"key":"ref313","article-title":"AutoCast: Scalable infrastructure-less cooperative perception for distributed collaborative driving","author":"Qiu","year":"2021","journal-title":"arXiv:2112.14947"},{"key":"ref314","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2022.3153346"},{"key":"ref315","doi-asserted-by":"publisher","DOI":"10.1109\/TCOMM.2021.3126650"},{"key":"ref316","doi-asserted-by":"publisher","DOI":"10.1109\/TNSE.2021.3103124"},{"key":"ref317","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02079"},{"key":"ref318","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00620"},{"key":"ref319","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2024.3392844"},{"key":"ref320","doi-asserted-by":"publisher","DOI":"10.1109\/TR.2022.3159664"},{"key":"ref321","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161384"},{"key":"ref322","doi-asserted-by":"publisher","DOI":"10.1109\/TIM.2023.3341122"},{"key":"ref323","doi-asserted-by":"publisher","DOI":"10.1109\/IV55156.2024.10588664"},{"key":"ref324","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00801"},{"key":"ref325","doi-asserted-by":"publisher","DOI":"10.1109\/IV55156.2024.10588783"},{"key":"ref326","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2024.3407408"},{"key":"ref327","article-title":"RoboBEV: Towards robust bird\u2019s eye view perception under corruptions","author":"Xie","year":"2023","journal-title":"arXiv:2304.06719"},{"key":"ref328","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3260040"},{"key":"ref329","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2024.3371974"},{"key":"ref330","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19824-3_19"},{"key":"ref331","doi-asserted-by":"publisher","DOI":"10.1109\/IV55152.2023.10186727"},{"key":"ref332","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2023.3266247"},{"key":"ref333","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2023.3324439"},{"key":"ref334","doi-asserted-by":"publisher","DOI":"10.1016\/j.vehcom.2021.100428"},{"key":"ref335","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00501"},{"key":"ref336","doi-asserted-by":"publisher","DOI":"10.1145\/3558052"},{"key":"ref337","article-title":"GaussianObject: Just taking four images to get a high-quality 3D object with Gaussian splatting","author":"Yang","year":"2024","journal-title":"arXiv:2402.10259"},{"key":"ref338","doi-asserted-by":"publisher","DOI":"10.1109\/tiv.2024.3398357"},{"key":"ref339","article-title":"DeepSeek-r1: Incentivizing reasoning capability in LLMs via reinforcement learning","author":"Guo","year":"2025","journal-title":"arXiv:2501.12948"},{"key":"ref340","article-title":"OpenAI o1 system card","author":"Jaech","year":"2024","journal-title":"arXiv:2412.16720"},{"key":"ref341","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i5.28253"},{"key":"ref342","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10611485"},{"key":"ref343","article-title":"BEV-LLM: Leveraging multimodal BEV maps for scene captioning in autonomous driving","author":"Brandstaetter","year":"2025","journal-title":"arXiv:2507.19370"},{"key":"ref344","article-title":"NuPlanQA: A large-scale dataset and benchmark for multi-view driving scene understanding in multi-modal large language models","author":"Park","year":"2025","journal-title":"arXiv:2503.12772"},{"key":"ref345","first-page":"1","article-title":"HERMES: A unified self-driving world model for simultaneous 3D scene understanding and generation","volume-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis.","author":"Zhou"},{"key":"ref346","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73650-6_6"},{"key":"ref347","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA55743.2025.11128800"},{"key":"ref348","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01124"},{"key":"ref349","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00766"},{"key":"ref350","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02105"},{"key":"ref351","doi-asserted-by":"publisher","DOI":"10.1109\/IROS60139.2025.11247237"},{"key":"ref352","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2023.3307589"},{"key":"ref353","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2024.3361093"},{"key":"ref354","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3200245"},{"key":"ref355","first-page":"1","article-title":"Unraveling the effects of synthetic data on end-to-end autonomous driving","volume-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis.","author":"Ge"},{"key":"ref356","first-page":"1","article-title":"ORION: A holistic end-to-end autonomous driving framework by vision-language instructed action generation","volume-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis.","author":"Fu"},{"key":"ref357","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA55743.2025.11128829"},{"key":"ref358","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i9.33040"},{"issue":"1","key":"ref359","first-page":"1","article-title":"A path towards autonomous machine intelligence version 0.9. 2, 2022-06-27","volume":"62","author":"LeCun","year":"2022","journal-title":"Open Rev."},{"key":"ref360","article-title":"GAIA-1: A generative world model for autonomous driving","author":"Hu","year":"2023","journal-title":"arXiv:2309.17080"},{"key":"ref361","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2024.3368234"},{"key":"ref362","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01397"},{"key":"ref363","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01122"},{"key":"ref364","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72652-1_11"},{"key":"ref365","article-title":"BEVWorld: A multimodal world simulator for autonomous driving via scene-level BEV latents","author":"Zhang","year":"2024","journal-title":"arXiv:2407.05679"},{"key":"ref366","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73229-4_27"},{"key":"ref367","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i7.28546"},{"key":"ref368","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00635"},{"key":"ref369","first-page":"1","article-title":"End-to-end driving with online trajectory evaluation via bev world model","volume-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis.","author":"Li"},{"key":"ref370","article-title":"ReSim: Reliable world simulation for autonomous driving","author":"Yang","year":"2025","journal-title":"arXiv:2506.09981"},{"key":"ref371","first-page":"1","article-title":"Epona: Autoregressive diffusion world model for autonomous driving","volume-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis.","author":"Zhang"},{"key":"ref372","first-page":"1","article-title":"World4Drive: End-to-end autonomous driving via intention-aware physical latent world model","volume-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis.","author":"Zheng"},{"key":"ref373","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2024.3400227"},{"key":"ref374","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i10.33130"},{"key":"ref375","doi-asserted-by":"publisher","DOI":"10.1109\/TSP55681.2022.9851276"},{"key":"ref376","doi-asserted-by":"publisher","DOI":"10.5220\/0009826400680080"},{"key":"ref377","article-title":"UniVision: A unified framework for vision-centric 3D perception","author":"Hong","year":"2024","journal-title":"arXiv:2401.06994"}],"container-title":["IEEE Transactions on Intelligent Transportation Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6979\/11322649\/11223966.pdf?arnumber=11223966","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T18:17:49Z","timestamp":1767377869000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11223966\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1]]},"references-count":377,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/tits.2025.3624830","relation":{},"ISSN":["1524-9050","1558-0016"],"issn-type":[{"value":"1524-9050","type":"print"},{"value":"1558-0016","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1]]}}}