{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T03:59:25Z","timestamp":1771646365878,"version":"3.50.1"},"reference-count":122,"publisher":"Tech Science Press","issue":"3","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["CMC"],"published-print":{"date-parts":[[2025]]},"DOI":"10.32604\/cmc.2025.063205","type":"journal-article","created":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T05:08:41Z","timestamp":1743484121000},"page":"3877-3917","source":"Crossref","is-referenced-by-count":1,"title":["Research Progress on Multi-Modal Fusion Object Detection Algorithms for Autonomous Driving: A Review"],"prefix":"10.32604","volume":"83","author":[{"given":"Peicheng","family":"Shi","sequence":"first","affiliation":[]},{"given":"Li","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Xinlong","family":"Dong","sequence":"additional","affiliation":[]},{"given":"Heng","family":"Qi","sequence":"additional","affiliation":[]},{"given":"Aixi","family":"Yang","sequence":"additional","affiliation":[]}],"member":"17807","published-online":{"date-parts":[[2025]]},"reference":[{"key":"ref1","doi-asserted-by":"crossref","first-page":"101603","DOI":"10.1109\/ACCESS.2024.3431437","article-title":"Explainable artificial intelligence for autonomous driving: a comprehensive overview and field guide for future research directions","volume":"12","author":"Atakishiyev","year":"2024","journal-title":"IEEE Access"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"2122","DOI":"10.1007\/s11263-023-01784-z","article-title":"Multi-modal 3D object detection in autonomous driving: a survey","volume":"131","author":"Wang","year":"2023","journal-title":"Int J Comput Vis"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"12163","DOI":"10.1109\/JSEN.2022.3175192","article-title":"TSF: two-stage sequential fusion for 3D object detection","volume":"22","author":"Qi","year":"2022","journal-title":"IEEE Sens J"},{"key":"ref4","series-title":"2017 36th Chinese Control Conference (CCC)","first-page":"11104","article-title":"A review of object detection based on convolutional neural network","author":"Wang","year":"2017"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"107021","DOI":"10.1016\/j.engappai.2023.107021","article-title":"Transformer for object detection: review and benchmark","volume":"126","author":"Li","year":"2023","journal-title":"Eng Appl Artif Intell"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"3212","DOI":"10.1109\/TNNLS.2018.2876865","article-title":"Object detection with deep learning: a review","volume":"30","author":"Zhao","year":"2019","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"ref7","series-title":"2012 IEEE Conference on Computer Vision and Pattern Recognition","first-page":"3354","article-title":"Are we ready for autonomous driving? The KITTI vision benchmark suite","author":"Geiger","year":"2012"},{"key":"ref8","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"11618","article-title":"nuScenes: a multimodal dataset for autonomous driving","author":"Caesar","year":"2020"},{"key":"ref9","series-title":"2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"2443","article-title":"Scalability in perception for autonomous driving: waymo open dataset","author":"Sun","year":"2020"},{"key":"ref10","unstructured":"Mao J, Niu M, Jiang C, Liang H, Chen J, Liang X, et al. One million scenes for autonomous driving: once dataset. arXiv:2106.11037. 2021."},{"key":"ref11","series-title":"Computer vision\u2013ECCV 2022","first-page":"550","article-title":"PersFormer: 3D lane detection via perspective transformer and the OpenLane benchmark","author":"Chen","year":"2022"},{"key":"ref12","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence; 2024 Feb 20\u201327","first-page":"5599","author":"Wang","year":"2024","journal-title":"A motion and accident prediction benchmark for V2X autonomous driving"},{"key":"ref13","doi-asserted-by":"crossref","first-page":"2702","DOI":"10.1109\/TPAMI.2019.2926463","article-title":"The ApolloScape open dataset for autonomous driving and its application","volume":"42","author":"Huang","year":"2020","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"ref14","unstructured":"Houston J, Zuidhof G, Bergamini L, Ye Y, Chen L, Jain A, et al. One thousand and one hours: self-driving motion prediction dataset. arXiv:2006.14480. 2020."},{"key":"ref15","series-title":"2020 IEEE International Conference on Robotics and Automation (ICRA)","first-page":"2267","article-title":"A*3D dataset: towards autonomous driving in challenging environments","author":"Pham"},{"key":"ref16","series-title":"2019 International Conference on Robotics and Automation (ICRA)","first-page":"9552","article-title":"The H3D dataset for full-surround 3D multi-object detection and tracking in crowded urban scenes","author":"Patil"},{"key":"ref17","unstructured":"Geyer J, Kassahun Y, Mahmudi M, Ricou X, Durgesh R, Chung AS, et al. A2D2: audi autonomous driving dataset. arXiv:2004.06320. 2020."},{"key":"ref18","unstructured":"G\u00e4hlert N, Jourdan N, Cordts M, Franke U, Denzler J. Cityscapes 3D: dataset and benchmark for 9 DoF vehicle detection. arXiv:2006.07864. 2020."},{"key":"ref19","series-title":"2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","article-title":"Argoverse: 3D tracking and forecasting with rich maps","author":"Chang","year":"2019"},{"key":"ref20","unstructured":"Weng X, Man Y, Cheng D, Yuan Y, O\u2019Toole M, Kitani K. All-in-one drive: a large-scale comprehensive perception dataset with high-density long-range point clouds; 2020. doi:10.13140\/RG.2.2.21621.81122."},{"key":"ref21","doi-asserted-by":"crossref","first-page":"3292","DOI":"10.1109\/TPAMI.2022.3179507","article-title":"KITTI-360: a novel dataset and benchmarks for urban scene understanding in 2D and 3D","volume":"45","author":"Liao","year":"2023","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"ref22","series-title":"Proceedings of the 2019 IEEE\/CVF International Conference on Computer Vision (ICCV)","first-page":"6569","article-title":"CenterNet: keypoint triplets for object detection","author":"Duan","year":"2019"},{"key":"ref23","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV)","first-page":"9287","article-title":"M3D-RPN: monocular 3D region proposal network for object detection","author":"Brazil","year":"2019"},{"key":"ref24","series-title":"2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"2064","article-title":"ROI-10D: monocular lifting of 2D detection to 6D pose and metric shape","author":"Manhardt","year":"2019"},{"key":"ref25","doi-asserted-by":"crossref","first-page":"42","DOI":"10.1016\/j.neucom.2018.03.030","article-title":"Face detection using deep learning: an improved faster RCNN approach","volume":"299","author":"Sun","year":"2018","journal-title":"Neurocomputing"},{"key":"ref26","series-title":"2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"2147","article-title":"Monocular 3D object detection for autonomous driving","author":"Chen","year":"2016"},{"key":"ref27","series-title":"2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"1827","article-title":"Deep MANTA: a coarse-to-fine many-task network for joint 2D and 3D vehicle analysis from monocular image","author":"Chabot","year":"2017"},{"key":"ref28","first-page":"8409","article-title":"Mono3D++: monocular 3D vehicle detection with two-scale 3D hypotheses and task priors","volume":"33","author":"He","year":"2019","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"ref29","series-title":"2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"3559","article-title":"3D-RCNN: instance-level 3D object reconstruction via render-and-compare","author":"Kundu","year":"2018"},{"key":"ref30","series-title":"2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"10545","article-title":"Disp R-CNN: stereo 3D object detection via shape prior guided instance disparity estimation","author":"Sun","year":"2020"},{"key":"ref31","series-title":"2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"7636","article-title":"Stereo R-CNN based 3D object detection for autonomous driving","author":"Li","year":"2019"},{"key":"ref32","series-title":"2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"7607","article-title":"Triangulation learning network: from monocular to stereo 3D object detection","author":"Qin"},{"key":"ref33","unstructured":"Lin X, Pei Z, Lin T, Huang L, Su Z. Sparse4D v3: advancing end-to-end 3D detection and tracking. arXiv:2311.11722. 2023."},{"key":"ref34","first-page":"2561","article-title":"Far3D: expanding the horizon for surround-view 3D object detection","volume":"38","author":"Jiang","year":"2024","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"ref35","unstructured":"Li B, Zhang T, Xia T. Vehicle detection from 3D lidar using fully convolutional network. arXiv:1608.07916. 2016."},{"key":"ref36","series-title":"2018 21st International Conference on Intelligent Transportation Systems (ITSC)","first-page":"3517","article-title":"BirdNet: a 3D object detection framework from LiDAR information","author":"Beltr\u00e1n","year":"2018"},{"key":"ref37","series-title":"2020 IEEE 23rd International Conference on Intelligent Transportation Systems (ITSC)","first-page":"1","article-title":"BirdNet+: end-to-end 3D object detection in LiDAR bird\u2019s eye view","author":"Barrera","year":"2020"},{"key":"ref38","series-title":"Proceedings of the Conference on Robot Learning","first-page":"923","article-title":"End-to-end multi-view fusion for 3D object detection in LiDAR point clouds","author":"Zhou","year":"2020"},{"key":"ref39","series-title":"2023 IEEE\/CVF International Conference on Computer Vision (ICCV)","first-page":"8360","article-title":"FocalFormer3D: focusing on hard instance for 3D object detection","author":"Chen","year":"2023"},{"key":"ref40","series-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"4490","article-title":"VoxelNet: end-to-end learning for point cloud based 3D object detection","author":"Zhou","year":"2018"},{"key":"ref41","series-title":"2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"11870","article-title":"Structure aware single-stage 3D object detection from point cloud","author":"He","year":"2020"},{"key":"ref42","doi-asserted-by":"crossref","first-page":"5705212","DOI":"10.1109\/TGRS.2023.3328929","article-title":"Fully sparse transformer 3-D detector for LiDAR point cloud","volume":"61","author":"Zhang","year":"2023","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"ref43","series-title":"2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"770","article-title":"PointRCNN: 3d object proposal generation and detection from point cloud","author":"Shi"},{"key":"ref44","unstructured":"Qi CR, Yi L, Su H, Guibas LJ. PointNet++: deep hierarchical feature learning on point sets in a metric space. arXiv:1706.02413. 2017."},{"key":"ref45","series-title":"2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"11037","article-title":"3DSSD: point-based 3D single stage object detector","author":"Yang","year":"2020"},{"key":"ref46","series-title":"2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"12689","article-title":"PointPillars: fast encoders for object detection from point clouds","author":"Lang","year":"2019"},{"key":"ref47","series-title":"2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"77","article-title":"PointNet: deep learning on point sets for 3D classification and segmentation","author":"Charles","year":"2017"},{"key":"ref48","series-title":"Proceedings of the 16th European Conference on Computer Vision","first-page":"18","article-title":"Pillar-based object detection for autonomous driving","author":"Wang"},{"key":"ref49","series-title":"2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"1708","article-title":"Point-GNN: graph neural network for 3D object detection in a point cloud","author":"Shi","year":"2020"},{"key":"ref50","first-page":"870","article-title":"SVGA-net: sparse voxel-graph attention network for 3D object detection from point clouds","volume":"36","author":"He","year":"2022","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"ref51","doi-asserted-by":"crossref","first-page":"110080","DOI":"10.1016\/j.knosys.2022.110080","article-title":"SAT-GCN: self-attention graph convolutional network-based 3D object detection for autonomous driving","volume":"259","author":"Wang","year":"2023","journal-title":"Knowl Based Syst"},{"key":"ref52","series-title":"IEEE International Conference on Image Processing (ICIP)","first-page":"61","article-title":"Shift R-CNN: deep monocular 3D object detection with closed-form geometric constraints","author":"Naiden","year":"2019"},{"key":"ref53","series-title":"Proceedings of the 8th International Conference on Learning Representations","article-title":"Pseudo- LiDAR++: accurate depth for 3D object detection in autonomous driving","author":"You"},{"key":"ref54","series-title":"2020 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","first-page":"5776","article-title":"Confidence guided stereo 3D object detection with split depth estimation","author":"Li","year":"2020"},{"key":"ref55","series-title":"2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"918","article-title":"Frustum PointNets for 3D object detection from RGB-D data","author":"Qi","year":"2018"},{"key":"ref56","series-title":"2019 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","first-page":"1742","article-title":"Frustum ConvNet: sliding Frustums to aggregate local point-wise features for amodal 3D object detection","author":"Wang","year":"2019"},{"key":"ref57","series-title":"2021 IEEE\/CVF International Conference on Computer Vision Workshops (ICCVW)","first-page":"2926","article-title":"Frustum-PointPillars: a multi-stage approach for 3D object detection using RGB camera and LiDAR","author":"Paigwar","year":"2021"},{"key":"ref58","series-title":"2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"6526","article-title":"Multi-view 3D object detection network for autonomous driving","author":"Chen","year":"2017"},{"key":"ref59","first-page":"12460","article-title":"PI-RCNN: an efficient multi-sensor 3D object detector with point-based attentive cont-conv fusion module","volume":"34","author":"Xie","year":"2020","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"ref60","series-title":"ICASSP 2019\u20132019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"1992","article-title":"Scanet: spatial-channel attention network for 3D object detection","author":"Lu","year":"2019"},{"key":"ref61","series-title":"2021 IEEE International Intelligent Transportation Systems Conference (ITSC)","first-page":"3047","article-title":"FusionPainting: multimodal fusion with adaptive attention for 3D object detection","author":"Xu"},{"key":"ref62","series-title":"Proceedings of the European Conference on Computer Vision (ECCV2018)","first-page":"670","article-title":"Graph R-CNN for scene graph generation","author":"Yang","year":"2018"},{"key":"ref63","doi-asserted-by":"crossref","first-page":"1839","DOI":"10.3390\/rs15071839","article-title":"FusionRCNN: lidar-camera fusion for two-stage 3D object detection","volume":"15","author":"Xu","year":"2023","journal-title":"Remote Sens"},{"key":"ref64","series-title":"2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"1080","article-title":"TransFusion: robust LiDAR-camera fusion for 3D object detection with transformers","author":"Bai","year":"2022"},{"key":"ref65","series-title":"Attention is all you need. In: Proceedings of the 31st Conference on Neural Information Processing Systems (NIPS 2017)","first-page":"6000","author":"Vaswanic","year":"2017"},{"key":"ref66","series-title":"2020 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","first-page":"10386","article-title":"CLOCs: camera-LiDAR object candidates fusion for 3D object detection","author":"Pang","year":"2020"},{"key":"ref67","series-title":"2022 IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV)","first-page":"3747","article-title":"Fast-CLOCs: fast camera-LiDAR object candidates fusion for 3D object detection","author":"Pang","year":"2022"},{"key":"ref68","series-title":"IEEE International Conference on Image Processing (ICIP)","first-page":"3896","article-title":"Multi-view frustum pointnet for object detection in autonomous driving","author":"Cao","year":"2019"},{"key":"ref69","doi-asserted-by":"crossref","first-page":"5130","DOI":"10.3390\/rs15215130","article-title":"ConCs-fusion: a context clustering-based radar and camera fusion for three-dimensional object detection","volume":"15","author":"He","year":"2023","journal-title":"Remote Sens"},{"key":"ref70","series-title":"2018 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","first-page":"1","article-title":"Joint 3D proposal generation and object detection from view aggregation","author":"Ku","year":"2018"},{"key":"ref71","series-title":"2019 IEEE International Conference on Robotics and Biomimetics (ROBIO)","first-page":"2181","article-title":"A multi-sensor fusion based 2D-driven 3D object detection approach for large scene applications","author":"Liu","year":"2019"},{"key":"ref72","doi-asserted-by":"crossref","unstructured":"Chen Z, Li Z, Zhang S, Fang L, Jiang Q, Zhao F, et al. AutoAlign: pixel-instance feature aggregation for multi-modal 3D object detection. arXiv:2201.06493. 2022.","DOI":"10.24963\/ijcai.2022\/116"},{"key":"ref73","doi-asserted-by":"crossref","unstructured":"Chen Z, Li Z, Zhang S, Fang L, Jiang Q, Zhao F. AutoAlignV2: deformable feature aggregation for dynamic multi-modal 3D object detection. arXiv:2207.10316. 2022.","DOI":"10.1007\/978-3-031-20074-8_36"},{"key":"ref74","doi-asserted-by":"crossref","first-page":"20","DOI":"10.3390\/wevj15010020","article-title":"Emerging trends in autonomous vehicle perception: multimodal fusion for 3D object detection","volume":"15","author":"Alaba","year":"2024","journal-title":"World Electr Veh J"},{"key":"ref75","first-page":"5703114","article-title":"Multi-sem fusion: multimodal semantic fusion for 3-D object detection","volume":"62","author":"Xu","year":"2024","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"ref76","first-page":"18442","article-title":"Unifying voxel-based representation with transformer for 3d object detection","volume":"35","author":"Li","year":"2022","journal-title":"Adv Neural Inf Process Syst"},{"key":"ref77","doi-asserted-by":"crossref","unstructured":"Song Z, Zhang G, Xie J, Liu L, Jia C, Xu S, et al. VoxelNextFusion: a simple, unified and effective voxel fusion framework for multi-modal 3D object detection. arXiv:2401.02702. 2024.","DOI":"10.1109\/TGRS.2023.3331893"},{"key":"ref78","doi-asserted-by":"crossref","first-page":"4718","DOI":"10.3390\/s24144718","article-title":"BAFusion: bidirectional attention fusion for 3D object detection based on LiDAR and camera","volume":"24","author":"Liu","year":"2024","journal-title":"Sensors"},{"key":"ref79","unstructured":"Deng J, Zhang S, Dayoub F, Ouyang W, Zhang Y, Reid I. PoIFusion: multi-modal 3D object detection via fusion at points of interest. arXiv:2403.09212. 2024."},{"key":"ref80","doi-asserted-by":"crossref","first-page":"8527","DOI":"10.1109\/TITS.2024.3392783","article-title":"VoPiFNet: voxel-pixel fusion network for multi-class 3D object detection","volume":"25","author":"Wang","year":"2024","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"ref81","doi-asserted-by":"crossref","first-page":"1348","DOI":"10.3390\/app14041348","article-title":"Multi-layer fusion 3D object detection via lidar point cloud and camera image","volume":"14","author":"Guo","year":"2024","journal-title":"Appl Sci"},{"key":"ref82","unstructured":"Zhu X, Su W, Lu L, Li B, Wang X, Dai J, et al. Deformable DETR: deformable transformers for end-to-end object detection. arXiv:2010.04159. 2020."},{"key":"ref83","series-title":"2023 IEEE\/CVF International Conference on Computer Vision (ICCV)","first-page":"18222","article-title":"Cross modal transformer: towards fast and robust 3D object detection","author":"Yan","year":"2023"},{"key":"ref84","series-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"898","article-title":"CAT-det: contrastively augmented transformer for multimodal 3D object detection","author":"Zhang","year":"2022"},{"key":"ref85","doi-asserted-by":"crossref","first-page":"79","DOI":"10.34133\/cbsystems.0079","article-title":"Camera-radar fusion with modality interaction and radar Gaussian expansion for 3D object detection","volume":"5","author":"Liu","year":"2024","journal-title":"Cyborg Bionic Syst"},{"key":"ref86","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"5418","article-title":"Sparse fuse dense: towards high quality 3D detection with depth completion","author":"Wu","year":"2022"},{"key":"ref87","series-title":"2016 IEEE 19th International Conference on Intelligent Transportation Systems (ITSC)","first-page":"271","article-title":"Robust detection of non-motorized road users using deep learning on optical and LIDAR data","author":"Kim"},{"key":"ref88","doi-asserted-by":"crossref","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","article-title":"Faster R-CNN: towards real-time object detection with region proposal networks","volume":"39","author":"Ren","year":"2017","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"ref89","doi-asserted-by":"crossref","first-page":"35","DOI":"10.1007\/978-3-030-58555-6_3","author":"Huang","year":"2020","journal-title":"Computer vision\u2013ECCV 2020"},{"key":"ref90","series-title":"2021 IEEE\/CVF International Conference on Computer Vision (ICCV)","first-page":"3133","article-title":"LIGA-stereo: learning LiDAR geometry aware representations for stereo-based 3D detector","author":"Guo","year":"2021"},{"key":"ref91","doi-asserted-by":"crossref","first-page":"4158","DOI":"10.3390\/s24134158","article-title":"Snow-CLOCs: camera-LiDAR object candidate fusion for 3D object detection in snowy conditions","volume":"24","author":"Fan","year":"2024","journal-title":"Sensors"},{"key":"ref92","series-title":"2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"5672","article-title":"InceptionNeXt: when inception meets ConvNeXt","author":"Yu","year":"2024"},{"key":"ref93","author":"Jocher","year":"2022","journal-title":"ultralytics\/yolov5: v7. 0-yolov5 sota realtime instance segmentation"},{"key":"ref94","doi-asserted-by":"crossref","first-page":"3337","DOI":"10.3390\/s18103337","article-title":"SECOND: sparsely embedded convolutional detection","volume":"18","author":"Yan","year":"2018","journal-title":"Sensors"},{"key":"ref95","series-title":"2023 IEEE International Conference on Robotics and Automation (ICRA)","first-page":"2774","article-title":"BEVFusion: multi-task multi-sensor fusion with unified bird\u2019s-eye view representation","author":"Liu","year":"2023"},{"key":"ref96","series-title":"2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"14905","article-title":"IS-fusion: instance-scene collaborative fusion for multimodal 3D object detection","author":"Yin","year":"2024"},{"key":"ref97","series-title":"2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"21209","article-title":"GAFusion: adaptive fusing LiDAR and camera with multiple guidance for 3D object detection","author":"Li","year":"2024"},{"key":"ref98","doi-asserted-by":"crossref","unstructured":"Zhao H, Guan R, Wu T, Man KL, Yu L, Yue Y. UniBEVFusion: unified radar-vision BEVFusion for 3D object detection. arXiv:2409.14751. 2024.","DOI":"10.1109\/ICRA55743.2025.11128067"},{"key":"ref99","series-title":"2024 IEEE International Conference on Robotics and Automation (ICRA)","first-page":"18236","article-title":"RCM-fusion: radar-camera multi-level fusion for 3D object detection","author":"Kim","year":"2024"},{"key":"ref100","doi-asserted-by":"crossref","first-page":"101","DOI":"10.52396\/JUSTC-2023-0006","article-title":"BEV-radar: bidirectional radar-camera fusion for 3D object detection","volume":"54","author":"Zhao","year":"2024","journal-title":"Justc"},{"key":"ref101","doi-asserted-by":"crossref","first-page":"102551","DOI":"10.1016\/j.inffus.2024.102551","article-title":"Coarse to fine-based image-point cloud fusion network for 3D object detection","volume":"112","author":"Hao","year":"2024","journal-title":"Inf Fusion"},{"key":"ref102","series-title":"2021 IEEE Winter Conference on Applications of Computer Vision (WACV)","first-page":"1526","article-title":"CenterFusion: center-based radar and camera fusion for 3D object detection","author":"Nabati","year":"2021"},{"key":"ref103","doi-asserted-by":"crossref","first-page":"5811","DOI":"10.1007\/s11760-024-03273-3","article-title":"CenRadfusion: fusing image center detection and millimeter wave radar for 3D object detection","volume":"18","author":"Shi","year":"2024","journal-title":"Signal Image Video Process"},{"key":"ref104","doi-asserted-by":"crossref","first-page":"114","DOI":"10.3390\/fi16040114","article-title":"NeXtFusion: attention-based camera-radar fusion network for improved three-dimensional object detection and tracking","volume":"16","author":"Kalgaonkar","year":"2024","journal-title":"Future Internet"},{"key":"ref105","series-title":"2024 7th International Conference on Advanced Algorithms and Control Engineering (ICAACE)","first-page":"384","article-title":"MWRC3D: 3d object detection with millimeter-wave radar and camera fusion","author":"Wang","year":"2024"},{"key":"ref106","first-page":"1","article-title":"Temporal-enhanced radar and camera fusion for object detection","volume":"21","author":"Kong","year":"2025","journal-title":"ACM Trans Multimedia Comput Commun Appl"},{"key":"ref107","unstructured":"Lin Z, Liu Z, Wang Y, Zhang L, Zhu C. RCBEVDet++: toward high-accuracy radar-camera fusion 3D perception network. arXiv:2409.04979. 2024."},{"key":"ref108","unstructured":"Lei K, Chen Z, Jia S, Zhang X. HVDetFusion: a simple and robust camera-radar fusion framework. arXiv:2307.11323. 2023."},{"key":"ref109","doi-asserted-by":"crossref","first-page":"5598","DOI":"10.3390\/app11125598","article-title":"Radar voxel fusion for 3D object detection","volume":"11","author":"Nobis","year":"2021","journal-title":"Appl Sci"},{"key":"ref110","doi-asserted-by":"crossref","first-page":"6964","DOI":"10.1109\/JSEN.2022.3154980","article-title":"Camera, LiDAR, and radar sensor fusion based on Bayesian neural network (CLR-BNN)","volume":"22","author":"Ravindran","year":"2022","journal-title":"IEEE Sens J"},{"key":"ref111","doi-asserted-by":"crossref","first-page":"11182","DOI":"10.1109\/LRA.2022.3193465","article-title":"EZFusion: a close look at the integration of LiDAR, millimeter-wave radar, and camera for accurate 3D object detection and tracking","volume":"7","author":"Li","year":"2022","journal-title":"IEEE Robot Autom Lett"},{"key":"ref112","unstructured":"Hendy N, Sloan C, Tian F, Duan P, Charchut N, Xie Y, et al. FISHING net: future inference of semantic heatmaps in grids. arXiv:2006.09917. 2020."},{"key":"ref113","doi-asserted-by":"crossref","first-page":"32630","DOI":"10.1109\/JSEN.2024.3448428","article-title":"Data fusion of roadside camera, LiDAR, and millimeter-wave radar","volume":"24","author":"Liu","year":"2024","journal-title":"IEEE Sens J"},{"key":"ref114","unstructured":"Das A, Paul S, Scholz N, Malviya AK, Sistu G, Bhattacharya U, et al. Fisheye camera and ultrasonic sensor fusion for near-field obstacle perception in bird\u2019s-eye-view. arXiv:2402.00637. 2024."},{"key":"ref115","series-title":"2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"5987","article-title":"Aggregated residual transformations for deep neural networks","author":"Xie","year":"2017"},{"key":"ref116","doi-asserted-by":"crossref","unstructured":"Aniobi A. Sensor fusion for real-time object detection and spatial positioning in unmanned vehicles using YOLOv8 and ESP32-Cam; 2024. doi:10.20944\/preprints202411.0611.v1 .","DOI":"10.20944\/preprints202411.0611.v1"},{"key":"ref117","doi-asserted-by":"crossref","first-page":"1680","DOI":"10.3390\/make5040083","article-title":"A comprehensive review of YOLO architectures in computer vision: from YOLOv1 to YOLOv8 and YOLO-NAS","volume":"5","author":"Terven","year":"2023","journal-title":"Mach Learn Knowl Extr"},{"key":"ref118","doi-asserted-by":"crossref","first-page":"1761","DOI":"10.18280\/ijht.420530","article-title":"Enhancing target detection and recognition in advanced driver assistance systems using infrared thermal imaging and the YOLOv5 algorithm","volume":"42","author":"Zhong","year":"2024","journal-title":"Int J Heat Technol"},{"key":"ref119","series-title":"Proceedings of the Conference on Robot Learning","first-page":"146","article-title":"HDNET: exploiting hd maps for 3D object detection","author":"Yang","year":"2018"},{"key":"ref120","series-title":"2021 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","first-page":"3406","article-title":"MapFusion: a general framework for 3D object detection with HDMaps","author":"Fang","year":"2021"},{"key":"ref121","series-title":"Proceedings of the Asian Conference on Computer Vision","first-page":"3291","article-title":"PointFormer: a dual perception attention-based network for point cloud classification","author":"Chen","year":"2022"},{"key":"ref122","first-page":"7464","article-title":"Graph transformer for graph-to-sequence learning","volume":"34","author":"Cai","year":"2020","journal-title":"Proc AAAI Conf Artif Intell"}],"container-title":["Computers, Materials &amp; Continua"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/cdn.techscience.cn\/files\/cmc\/2025\/TSP_CMC-83-3\/TSP_CMC_63205\/TSP_CMC_63205.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T01:28:32Z","timestamp":1763342912000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.techscience.com\/cmc\/v83n3\/61036"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":122,"journal-issue":{"issue":"3","published-online":{"date-parts":[[2025]]},"published-print":{"date-parts":[[2025]]}},"URL":"https:\/\/doi.org\/10.32604\/cmc.2025.063205","relation":{},"ISSN":["1546-2226"],"issn-type":[{"value":"1546-2226","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]}}}