{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,14]],"date-time":"2026-02-14T10:30:20Z","timestamp":1771065020087,"version":"3.50.1"},"reference-count":60,"publisher":"MDPI AG","issue":"11","license":[{"start":{"date-parts":[[2024,11,19]],"date-time":"2024-11-19T00:00:00Z","timestamp":1731974400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"National Natural Science Foundation of China Project","award":["62172441"],"award-info":[{"award-number":["62172441"]}]},{"name":"National Natural Science Foundation of China Project","award":["62172449"],"award-info":[{"award-number":["62172449"]}]},{"name":"National Natural Science Foundation of China Project","award":["2021Szvup166"],"award-info":[{"award-number":["2021Szvup166"]}]},{"name":"National Natural Science Foundation of China Project","award":["GZSYS-KY-2022-018"],"award-info":[{"award-number":["GZSYS-KY-2022-018"]}]},{"name":"National Natural Science Foundation of China Project","award":["GZSYS-KY-2022-024"],"award-info":[{"award-number":["GZSYS-KY-2022-024"]}]},{"name":"National Natural Science Foundation of China Project","award":["202208183000751"],"award-info":[{"award-number":["202208183000751"]}]},{"name":"National Natural Science Foundation of China Project","award":["2023JJ30696"],"award-info":[{"award-number":["2023JJ30696"]}]},{"name":"Local Science and Technology Developing Foundation Guided by the Central Government of China","award":["62172441"],"award-info":[{"award-number":["62172441"]}]},{"name":"Local Science and Technology Developing Foundation Guided by the Central Government of China","award":["62172449"],"award-info":[{"award-number":["62172449"]}]},{"name":"Local Science and Technology Developing Foundation Guided by the Central Government of China","award":["2021Szvup166"],"award-info":[{"award-number":["2021Szvup166"]}]},{"name":"Local Science and Technology Developing Foundation Guided by the Central Government of China","award":["GZSYS-KY-2022-018"],"award-info":[{"award-number":["GZSYS-KY-2022-018"]}]},{"name":"Local Science and Technology Developing Foundation Guided by the Central Government of China","award":["GZSYS-KY-2022-024"],"award-info":[{"award-number":["GZSYS-KY-2022-024"]}]},{"name":"Local Science and Technology Developing Foundation Guided by the Central Government of China","award":["202208183000751"],"award-info":[{"award-number":["202208183000751"]}]},{"name":"Local Science and Technology Developing Foundation Guided by the Central Government of China","award":["2023JJ30696"],"award-info":[{"award-number":["2023JJ30696"]}]},{"name":"Opening Project of State Key Laboratory of Nickel and Cobalt Resources Comprehensive Utilization","award":["62172441"],"award-info":[{"award-number":["62172441"]}]},{"name":"Opening Project of State Key Laboratory of Nickel and Cobalt Resources Comprehensive Utilization","award":["62172449"],"award-info":[{"award-number":["62172449"]}]},{"name":"Opening Project of State Key Laboratory of Nickel and Cobalt Resources Comprehensive Utilization","award":["2021Szvup166"],"award-info":[{"award-number":["2021Szvup166"]}]},{"name":"Opening Project of State Key Laboratory of Nickel and Cobalt Resources Comprehensive Utilization","award":["GZSYS-KY-2022-018"],"award-info":[{"award-number":["GZSYS-KY-2022-018"]}]},{"name":"Opening Project of State Key Laboratory of Nickel and Cobalt Resources Comprehensive Utilization","award":["GZSYS-KY-2022-024"],"award-info":[{"award-number":["GZSYS-KY-2022-024"]}]},{"name":"Opening Project of State Key Laboratory of Nickel and Cobalt Resources Comprehensive Utilization","award":["202208183000751"],"award-info":[{"award-number":["202208183000751"]}]},{"name":"Opening Project of State Key Laboratory of Nickel and Cobalt Resources Comprehensive Utilization","award":["2023JJ30696"],"award-info":[{"award-number":["2023JJ30696"]}]},{"name":"Key Project of Shenzhen City Special Fund for Fundamental Research","award":["62172441"],"award-info":[{"award-number":["62172441"]}]},{"name":"Key Project of Shenzhen City Special Fund for Fundamental Research","award":["62172449"],"award-info":[{"award-number":["62172449"]}]},{"name":"Key Project of Shenzhen City Special Fund for Fundamental Research","award":["2021Szvup166"],"award-info":[{"award-number":["2021Szvup166"]}]},{"name":"Key Project of Shenzhen City Special Fund for Fundamental Research","award":["GZSYS-KY-2022-018"],"award-info":[{"award-number":["GZSYS-KY-2022-018"]}]},{"name":"Key Project of Shenzhen City Special Fund for Fundamental Research","award":["GZSYS-KY-2022-024"],"award-info":[{"award-number":["GZSYS-KY-2022-024"]}]},{"name":"Key Project of Shenzhen City Special Fund for Fundamental Research","award":["202208183000751"],"award-info":[{"award-number":["202208183000751"]}]},{"name":"Key Project of Shenzhen City Special Fund for Fundamental Research","award":["2023JJ30696"],"award-info":[{"award-number":["2023JJ30696"]}]},{"name":"National Natural Science Foundation of Hunan Province","award":["62172441"],"award-info":[{"award-number":["62172441"]}]},{"name":"National Natural Science Foundation of Hunan Province","award":["62172449"],"award-info":[{"award-number":["62172449"]}]},{"name":"National Natural Science Foundation of Hunan Province","award":["2021Szvup166"],"award-info":[{"award-number":["2021Szvup166"]}]},{"name":"National Natural Science Foundation of Hunan Province","award":["GZSYS-KY-2022-018"],"award-info":[{"award-number":["GZSYS-KY-2022-018"]}]},{"name":"National Natural Science Foundation of Hunan Province","award":["GZSYS-KY-2022-024"],"award-info":[{"award-number":["GZSYS-KY-2022-024"]}]},{"name":"National Natural Science Foundation of Hunan Province","award":["202208183000751"],"award-info":[{"award-number":["202208183000751"]}]},{"name":"National Natural Science Foundation of Hunan Province","award":["2023JJ30696"],"award-info":[{"award-number":["2023JJ30696"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Information"],"abstract":"<jats:p>Accurate 3D object detection is essential for autonomous driving, yet traditional LiDAR models often struggle with sparse point clouds. We propose perspective-aware hierarchical vision transformer-based LiDAR-camera fusion (PLC-Fusion) for 3D object detection to address this. This efficient, multi-modal 3D object detection framework integrates LiDAR and camera data for improved performance. First, our method enhances LiDAR data by projecting them onto a 2D plane, enabling the extraction of object perspective features from a probability map via the Object Perspective Sampling (OPS) module. It incorporates a lightweight perspective detector, consisting of interconnected 2D and monocular 3D sub-networks, to extract image features and generate object perspective proposals by predicting and refining top-scored 3D candidates. Second, it leverages two independent transformers\u2014CamViT for 2D image features and LidViT for 3D point cloud features. These ViT-based representations are fused via the Cross-Fusion module for hierarchical and deep representation learning, improving performance and computational efficiency. These mechanisms enhance the utilization of semantic features in a region of interest (ROI) to obtain more representative point features, leading to a more effective fusion of information from both LiDAR and camera sources. PLC-Fusion outperforms existing methods, achieving a mean average precision (mAP) of 83.52% and 90.37% for 3D and BEV detection, respectively. Moreover, PLC-Fusion maintains a competitive inference time of 0.18 s. Our model addresses computational bottlenecks by eliminating the need for dense BEV searches and global attention mechanisms while improving detection range and precision.<\/jats:p>","DOI":"10.3390\/info15110739","type":"journal-article","created":{"date-parts":[[2024,11,19]],"date-time":"2024-11-19T11:58:07Z","timestamp":1732017487000},"page":"739","update-policy":"https:\/\/doi.org\/10.3390\/mdpi_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["PLC-Fusion: Perspective-Based Hierarchical and Deep LiDAR Camera Fusion for 3D Object Detection in Autonomous Vehicles"],"prefix":"10.3390","volume":"15","author":[{"given":"Husnain","family":"Mushtaq","sequence":"first","affiliation":[{"name":"School of Computer Science and Engineering, Central South University, Changsha 410083, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2740-8025","authenticated-orcid":false,"given":"Xiaoheng","family":"Deng","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Central South University, Changsha 410083, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fizza","family":"Azhar","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Chenab, Gujrat 50700, Pakistan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mubashir","family":"Ali","sequence":"additional","affiliation":[{"name":"School of Computer Science, University of Birmingham, Birmingham B15 2TT, UK"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hafiz Husnain","family":"Raza Sherazi","sequence":"additional","affiliation":[{"name":"School of Computing, Newcastle University, Newcastle Upon Tyne NE4 5TG, UK"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"1968","published-online":{"date-parts":[[2024,11,19]]},"reference":[{"key":"ref_1","doi-asserted-by":"crossref","first-page":"1524","DOI":"10.1109\/TIV.2023.3331972","article-title":"Sparsefusion3d: Sparse sensor fusion for 3d object detection by radar and camera in environmental perception","volume":"9","author":"Yu","year":"2023","journal-title":"IEEE Trans. Intell. Veh."},{"key":"ref_2","doi-asserted-by":"crossref","first-page":"5291","DOI":"10.1109\/TMM.2022.3189778","article-title":"Vpfnet: Improving 3d object detection with virtual point based lidar and stereo data fusion","volume":"25","author":"Zhu","year":"2022","journal-title":"IEEE Trans. Multimed."},{"key":"ref_3","doi-asserted-by":"crossref","first-page":"5707515","DOI":"10.1109\/TGRS.2024.3476072","article-title":"Channel-wise and spatially-guided Multimodal feature fusion network for 3D Object Detection in Autonomous Vehicles","volume":"62","author":"Uzair","year":"2024","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"ref_4","doi-asserted-by":"crossref","first-page":"1242","DOI":"10.1109\/TETCI.2023.3259441","article-title":"3D object detection and tracking based on lidar-camera fusion and IMM-UKF algorithm towards highway driving","volume":"7","author":"Nie","year":"2023","journal-title":"IEEE Trans. Emerg. Top. Comput. Intell."},{"key":"ref_5","doi-asserted-by":"crossref","unstructured":"Yan, Y., Mao, Y., and Li, B. (2018). Second: Sparsely embedded convolutional detection. Sensors, 18.","DOI":"10.3390\/s18103337"},{"key":"ref_6","doi-asserted-by":"crossref","unstructured":"Chen, Q., Li, P., Xu, M., and Qi, X. (2021, January 19\u201325). Sparse Activation Maps for Interpreting 3D Object Detection. Proceedings of the 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), Nashville, TN, USA.","DOI":"10.1109\/CVPRW53098.2021.00017"},{"key":"ref_7","doi-asserted-by":"crossref","unstructured":"Chen, Y., Li, Y., Zhang, X., Sun, J., and Jia, J. (2022, January 18\u201324). Focal Sparse Convolutional Networks for 3D Object Detection. Proceedings of the 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), New Orleans, LA, USA.","DOI":"10.1109\/CVPR52688.2022.00535"},{"key":"ref_8","doi-asserted-by":"crossref","unstructured":"Shi, S., Wang, X., and Li, H. (2019, January 15\u201320). PointRCNN: 3D object proposal generation and detection from point cloud. Proceedings of the 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), Long Beach, CA, USA.","DOI":"10.1109\/CVPR.2019.00086"},{"key":"ref_9","doi-asserted-by":"crossref","unstructured":"Mushtaq, H., Deng, X., Ullah, I., Ali, M., and Malik, B.H. (2024). O2SAT: Object-Oriented-Segmentation-Guided Spatial-Attention Network for 3D Object Detection in Autonomous Vehicles. Information, 15.","DOI":"10.3390\/info15070376"},{"key":"ref_10","doi-asserted-by":"crossref","unstructured":"Vora, S., Lang, A.H., Helou, B., and Beijbom, O. (2020, January 13\u201319). Pointpainting: Sequential fusion for 3D object detection. Proceedings of the 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), Seattle, WA, USA.","DOI":"10.1109\/CVPR42600.2020.00466"},{"key":"ref_11","doi-asserted-by":"crossref","unstructured":"Xie, L., Xiang, C., Yu, Z., Xu, G., Yang, Z., Cai, D., and He, X. (2020, January 7\u201312). PI-RCNN: An efficient multi-sensor 3D object detector with point-based attentive cont-conv fusion module. Proceedings of the AAAI Conference on Artificial Intelligence, New York, NY, USA.","DOI":"10.1609\/aaai.v34i07.6933"},{"key":"ref_12","doi-asserted-by":"crossref","unstructured":"Wang, H., Tang, H., Shi, S., Li, A., Li, Z., Schiele, B., and Wang, L. (2023, January 11\u201315). Unitr: A unified and efficient multi-modal transformer for bird\u2019s-eye-view representation. Proceedings of the IEEE\/CVF International Conference on Computer Vision, Nashville, TN, USA.","DOI":"10.1109\/ICCV51070.2023.00625"},{"key":"ref_13","doi-asserted-by":"crossref","unstructured":"Yan, J., Liu, Y., Sun, J., Jia, F., Li, S., Wang, T., and Zhang, X. (2023, January 2\u20136). Cross modal transformer: Towards fast and robust 3d object detection. Proceedings of the IEEE\/CVF International Conference on Computer Vision, Paris, France.","DOI":"10.1109\/ICCV51070.2023.01675"},{"key":"ref_14","doi-asserted-by":"crossref","unstructured":"Bai, X., Hu, Z., Zhu, X., Huang, Q., Chen, Y., Fu, H., and Tai, C.L. (2022, January 18\u201324). Transfusion: Robust lidar-camera fusion for 3d object detection with transformers. Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, New Orleans, LA, USA.","DOI":"10.1109\/CVPR52688.2022.00116"},{"key":"ref_15","doi-asserted-by":"crossref","first-page":"12878","DOI":"10.1109\/TPAMI.2022.3200245","article-title":"Transfuser: Imitation with transformer-based sensor fusion for autonomous driving","volume":"45","author":"Chitta","year":"2022","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"ref_16","doi-asserted-by":"crossref","unstructured":"Ku, J., Mozifian, M., Lee, J., Harakeh, A., and Waslander, S.L. (2018, January 1\u20135). Joint 3D Proposal Generation and Object Detection from View Aggregation. Proceedings of the 2018 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), Madrid, Spain.","DOI":"10.1109\/IROS.2018.8594049"},{"key":"ref_17","doi-asserted-by":"crossref","unstructured":"Chen, X., Ma, H., Wan, J., Li, B., and Xia, T. (2017, January 21\u201326). Multi-view 3D object detection network for autonomous driving. Proceedings of the 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Honolulu, HI, USA.","DOI":"10.1109\/CVPR.2017.691"},{"key":"ref_18","doi-asserted-by":"crossref","unstructured":"Li, Y., Yu, A.W., Meng, T., Caine, B., Ngiam, J., Peng, D., Shen, J., Lu, Y., Zhou, D., and Le, Q.V. (2022, January 18-24). DeepFusion: Lidar-Camera Deep Fusion for Multi-Modal 3D Object Detection. Proceedings of the 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), New Orleans, LA, USA.","DOI":"10.1109\/CVPR52688.2022.01667"},{"key":"ref_19","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., and Urtasun, R. (2012, January 16\u201321). Are we ready for autonomous driving? the KITTI vision benchmark suite. Proceedings of the 2012 IEEE Conference on Computer Vision and Pattern Recognition, Providence, RI, USA.","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"ref_20","doi-asserted-by":"crossref","unstructured":"Wang, Y., Chao, W.L., Garg, D., Hariharan, B., Campbell, M., and Weinberger, K.Q. (2019, January 15\u201320). Pseudo-lidar from visual depth estimation: Bridging the gap in 3d object detection for autonomous driving. Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Long Beach, CA, USA.","DOI":"10.1109\/CVPR.2019.00864"},{"key":"ref_21","doi-asserted-by":"crossref","unstructured":"Weng, X., and Kitani, K. (2019, January 27\u201328). Monocular 3d object detection with pseudo-lidar point cloud. Proceedings of the IEEE\/CVF International Conference on Computer Vision Workshops, Seoul, Republic of Korea.","DOI":"10.1109\/ICCVW.2019.00114"},{"key":"ref_22","unstructured":"You, Y., Wang, Y., Chao, W.L., Garg, D., Pleiss, G., Hariharan, B., Campbell, M., and Weinberger, K.Q. (2019). Pseudo-lidar++: Accurate depth for 3d object detection in autonomous driving. arXiv."},{"key":"ref_23","doi-asserted-by":"crossref","unstructured":"Rukhovich, D., Vorontsova, A., and Konushin, A. (2022, January 3\u20138). ImVoxelNet: Image to Voxels Projection for Monocular and Multi-View General-Purpose 3D Object Detection. Proceedings of the 2022 IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), Waikoloa, HI, USA.","DOI":"10.1109\/WACV51458.2022.00133"},{"key":"ref_24","doi-asserted-by":"crossref","unstructured":"Ding, M., Huo, Y., Yi, H., Wang, Z., Shi, J., Lu, Z., and Luo, P. (2020, January 13\u201319). Learning depth-guided convolutions for monocular 3d object detection. Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops, Seattle, WA, USA.","DOI":"10.1109\/CVPRW50498.2020.00508"},{"key":"ref_25","doi-asserted-by":"crossref","unstructured":"Park, D., Ambru\u015f, R., Guizilini, V., Li, J., and Gaidon, A. (2021, January 10\u201317). Is Pseudo-Lidar needed for Monocular 3D Object detection?. Proceedings of the 2021 IEEE\/CVF International Conference on Computer Vision (ICCV), Montreal, QC, Canada.","DOI":"10.1109\/ICCV48922.2021.00313"},{"key":"ref_26","doi-asserted-by":"crossref","unstructured":"Landrieu, L., and Simonovsky, M. (2018, January 18\u201323). Large-Scale Point Cloud Semantic Segmentation with Superpoint Graphs. Proceedings of the 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Salt Lake City, UT, USA.","DOI":"10.1109\/CVPR.2018.00479"},{"key":"ref_27","doi-asserted-by":"crossref","unstructured":"Zhou, Y., and Tuzel, O. (2018, January 18\u201323). VoxelNet: End-to-End Learning for Point Cloud Based 3D Object Detection. Proceedings of the 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Salt Lake City, UT, USA.","DOI":"10.1109\/CVPR.2018.00472"},{"key":"ref_28","doi-asserted-by":"crossref","unstructured":"Lang, A.H., Vora, S., Caesar, H., Zhou, L., Yang, J., and Beijbom, O. (2019, January 15\u201320). Pointpillars: Fast encoders for object detection from point clouds. Proceedings of the 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), Long Beach, CA, USA.","DOI":"10.1109\/CVPR.2019.01298"},{"key":"ref_29","doi-asserted-by":"crossref","unstructured":"Pan, X., Xia, Z., Song, S., Li, L.E., and Huang, G. (2021, January 20\u201325). 3D Object Detection with Pointformer. Proceedings of the 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), Nashville, TN, USA.","DOI":"10.1109\/CVPR46437.2021.00738"},{"key":"ref_30","unstructured":"He, Q., Wang, Z., Zeng, H., Zeng, Y., and Liu, Y. (March, January 22). Svga-net: Sparse voxel-graph attention network for 3d object detection from point clouds. Proceedings of the AAAI Conference on Artificial Intelligence, Online."},{"key":"ref_31","doi-asserted-by":"crossref","first-page":"103295","DOI":"10.1016\/j.cviu.2021.103295","article-title":"Deep structural information fusion for 3D object detection on LiDAR\u2013camera system","volume":"214","author":"An","year":"2022","journal-title":"Comput. Vis. Image Underst."},{"key":"ref_32","doi-asserted-by":"crossref","unstructured":"Yoo, J.H., Kim, Y., Kim, J., and Choi, J.W. (2020). 3d-cvf: Generating joint camera and lidar features using cross-view spatial feature fusion for 3d object detection. Proceedings of the Computer Vision\u2014ECCV 2020: 16th European Conference, Glasgow, UK, 23\u201328 August 2020, Springer. Proceedings, Part XXVII 16.","DOI":"10.1007\/978-3-030-58583-9_43"},{"key":"ref_33","doi-asserted-by":"crossref","unstructured":"Huang, T., Liu, Z., Chen, X., and Bai, X. (2020). Epnet: Enhancing point features with image semantics for 3d object detection. Proceedings of the Computer Vision\u2014ECCV 2020: 16th European Conference, Glasgow, UK, 23\u201328 August 2020, Springer. Proceedings, Part XV 16.","DOI":"10.1007\/978-3-030-58555-6_3"},{"key":"ref_34","doi-asserted-by":"crossref","first-page":"105815","DOI":"10.1016\/j.engappai.2022.105815","article-title":"LiDAR-camera fusion: Dual transformer enhancement for 3D object detection","volume":"120","author":"Chen","year":"2023","journal-title":"Eng. Appl. Artif. Intell."},{"key":"ref_35","unstructured":"Hu, C., Zheng, H., Li, K., Xu, J., Mao, W., Luo, M., Wang, L., Chen, M., Liu, K., and Zhao, Y. (2023). FusionFormer: A multi-sensory fusion in bird\u2019s-eye-view and temporal consistent transformer for 3D object detection. arXiv."},{"key":"ref_36","doi-asserted-by":"crossref","unstructured":"Huang, J., Ye, Y., Liang, Z., Shan, Y., and Du, D. (2023). Detecting as labeling: Rethinking LiDAR-camera fusion in 3D object detection. arXiv.","DOI":"10.1007\/978-3-031-72670-5_25"},{"key":"ref_37","unstructured":"Cai, H., Zhang, Z., Zhou, Z., Li, Z., Ding, W., and Zhao, J. (2023). BEVFusion4D: Learning LiDAR-camera fusion under bird\u2019s-eye-view via cross-modality guidance and temporal aggregation. arXiv."},{"key":"ref_38","doi-asserted-by":"crossref","unstructured":"Khamsehashari, R., and Schill, K. (2021, January 9\u201311). Improving deep multi-modal 3D object detection for autonomous driving. Proceedings of the 2021 7th International Conference on Automation, Robotics and Applications (ICARA), Auckland, New Zealand.","DOI":"10.1109\/ICARA51699.2021.9376453"},{"key":"ref_39","doi-asserted-by":"crossref","unstructured":"Chen, Z., Li, Z., Zhang, S., Fang, L., Jiang, Q., and Zhao, F. (2022, January 23\u201327). Deformable feature aggregation for dynamic multi-modal 3D object detection. Proceedings of the European Conference on Computer Vision (ECCV), Tel Aviv, Israel.","DOI":"10.1007\/978-3-031-20074-8_36"},{"key":"ref_40","doi-asserted-by":"crossref","unstructured":"Liu, Z., Tang, H., Amini, A., Yang, X., Mao, H., Rus, D.L., and Han, S. (June, January 29). BEVFusion: Multi-task multi-sensor fusion with unified bird\u2019s-eye view representation. Proceedings of the 2023 IEEE International Conference on Robotics and Automation (ICRA), London, UK.","DOI":"10.1109\/ICRA48891.2023.10160968"},{"key":"ref_41","doi-asserted-by":"crossref","unstructured":"Liu, X., Zhang, B., and Liu, N. (2023). The Graph Neural Network Detector Based on Neighbor Feature Alignment Mechanism in LIDAR Point Clouds. Machines, 11.","DOI":"10.3390\/machines11010116"},{"key":"ref_42","doi-asserted-by":"crossref","unstructured":"Sindagi, V.A., Zhou, Y., and Tuzel, O. (2019, January 20\u201324). MVX-net: Multimodal VoxelNet for 3D object detection. Proceedings of the 2019 International Conference on Robotics and Automation (ICRA), Montreal, QC, Canada.","DOI":"10.1109\/ICRA.2019.8794195"},{"key":"ref_43","doi-asserted-by":"crossref","first-page":"23","DOI":"10.1016\/j.neucom.2022.04.075","article-title":"MSL3D: 3D object detection from monocular, stereo and point cloud for autonomous driving","volume":"494","author":"Chen","year":"2022","journal-title":"Neurocomputing"},{"key":"ref_44","doi-asserted-by":"crossref","unstructured":"Zhu, M., Ma, C., Ji, P., and Yang, X. (2021, January 5\u20139). Cross-modality 3d object detection. Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, Online.","DOI":"10.1109\/WACV48630.2021.00382"},{"key":"ref_45","doi-asserted-by":"crossref","unstructured":"Wei, Z., Zhang, F., Chang, S., Liu, Y., Wu, H., and Feng, Z. (2022). MmWave Radar and Vision Fusion for Object Detection in Autonomous Driving: A Review. Sensors, 22.","DOI":"10.3390\/s22072542"},{"key":"ref_46","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., and Polosukhin, I. (2017, January 4\u20139). Attention is all you need. Proceedings of the Advances in Neural Information Processing Systems, Long Beach, CA, USA."},{"key":"ref_47","doi-asserted-by":"crossref","unstructured":"Xiang, P., Wen, X., Liu, Y.S., Cao, Y.P., Wan, P., Zheng, W., and Han, Z. (2021, January 10\u201317). SnowflakeNet: Point Cloud Completion by Snowflake Point Deconvolution with Skip-Transformer. Proceedings of the 2021 IEEE\/CVF International Conference on Computer Vision (ICCV), Montreal, QC, Canada.","DOI":"10.1109\/ICCV48922.2021.00545"},{"key":"ref_48","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., and Zagoruyko, S. (2020). End-to-end object detection with transformers. Proceedings of the Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, 23\u201328 August 2020, Springer. Proceedings, Part I 16."},{"key":"ref_49","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., and Dai, J. (2020). Deformable detr: Deformable transformers for end-to-end object detection. arXiv."},{"key":"ref_50","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., and Gelly, S. (2021). An Image Is Worth 16X16 Words: Transformers for Image Recognition at Scale. arXiv."},{"key":"ref_51","doi-asserted-by":"crossref","unstructured":"Hua, B.S., Tran, M.K., and Yeung, S.K. (2018, January 18\u201323). Pointwise Convolutional Neural Networks. Proceedings of the 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Salt Lake City, UT, USA.","DOI":"10.1109\/CVPR.2018.00109"},{"key":"ref_52","doi-asserted-by":"crossref","unstructured":"Mushtaq, H., Deng, X., Ali, M., Hayat, B., and Raza Sherazi, H.H. (2023). DFA-SAT: Dynamic Feature Abstraction with Self-Attention-Based 3D Object Detection for Autonomous Driving. Sustainability, 15.","DOI":"10.3390\/su151813667"},{"key":"ref_53","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TGRS.2024.3351286","article-title":"PointDifformer: Robust Point Cloud Registration With Neural Diffusion and Transformer","volume":"62","author":"She","year":"2024","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"ref_54","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TGRS.2024.3485590","article-title":"3DGTN: 3-D Dual-Attention GLocal Transformer Network for Point Cloud Classification and Segmentation","volume":"62","author":"Lu","year":"2024","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"ref_55","doi-asserted-by":"crossref","unstructured":"Fei, J., Chen, W., Heidenreich, P., Wirges, S., and Stiller, C. (2020, January 14\u201316). SemanticVoxels: Sequential Fusion for 3D Pedestrian Detection using LiDAR Point Cloud and Semantic Segmentation. Proceedings of the 2020 IEEE International Conference on Multisensor Fusion and Integration for Intelligent Systems (MFI), Karlsruhe, Germany.","DOI":"10.1109\/MFI49285.2020.9235240"},{"key":"ref_56","doi-asserted-by":"crossref","unstructured":"Mahmoud, A., and Waslander, S.L. (2021, January 26\u201328). Sequential Fusion via Bounding Box and Motion PointPainting for 3D Objection Detection. Proceedings of the 2021 18th Conference on Robots and Vision (CRV), Burnaby, BC, Canada.","DOI":"10.1109\/CRV52889.2021.00013"},{"key":"ref_57","doi-asserted-by":"crossref","unstructured":"Shi, S., Guo, C., Jiang, L., Wang, Z., Shi, J., Wang, X., and Li, H. (2020, January 13\u201319). PV-RCNN: Point-voxel feature set abstraction for 3D object detection. Proceedings of the 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), Seattle, WA, USA.","DOI":"10.1109\/CVPR42600.2020.01054"},{"key":"ref_58","unstructured":"OpenPCDet Development Team (2024, October 01). Openpcdet: An Opensource Toolbox for 3d Object Detection from Point Clouds. Available online: https:\/\/github.com\/open-mmlab\/OpenPCDet."},{"key":"ref_59","doi-asserted-by":"crossref","unstructured":"Qi, C.R., Liu, W., Wu, C., Su, H., and Guibas, L.J. (2018, January 18\u201323). Frustum pointnets for 3d object detection from rgb-d data. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, Salt Lake City, UT, USA.","DOI":"10.1109\/CVPR.2018.00102"},{"key":"ref_60","doi-asserted-by":"crossref","unstructured":"Liang, M., Yang, B., Wang, S., and Urtasun, R. (2018, January 8\u201314). Deep continuous fusion for multi-sensor 3d object detection. Proceedings of the European Conference on Computer Vision (ECCV), Munich, Germany.","DOI":"10.1007\/978-3-030-01270-0_39"}],"container-title":["Information"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.mdpi.com\/2078-2489\/15\/11\/739\/pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T16:35:29Z","timestamp":1760114129000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.mdpi.com\/2078-2489\/15\/11\/739"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,19]]},"references-count":60,"journal-issue":{"issue":"11","published-online":{"date-parts":[[2024,11]]}},"alternative-id":["info15110739"],"URL":"https:\/\/doi.org\/10.3390\/info15110739","relation":{},"ISSN":["2078-2489"],"issn-type":[{"value":"2078-2489","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,19]]}}}