{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T15:23:04Z","timestamp":1772119384613,"version":"3.50.1"},"reference-count":73,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T00:00:00Z","timestamp":1755734400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T00:00:00Z","timestamp":1755734400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Interdisciplinary Research Support Program of HUST","award":["2024JCYJ027"],"award-info":[{"award-number":["2024JCYJ027"]}]},{"name":"Interdisciplinary Research Support Program of HUST","award":["2024JCYJ027"],"award-info":[{"award-number":["2024JCYJ027"]}]},{"name":"Interdisciplinary Research Support Program of HUST","award":["2024JCYJ027"],"award-info":[{"award-number":["2024JCYJ027"]}]},{"name":"Interdisciplinary Research Support Program of HUST","award":["2024JCYJ027"],"award-info":[{"award-number":["2024JCYJ027"]}]},{"name":"Interdisciplinary Research Support Program of HUST","award":["2024JCYJ027"],"award-info":[{"award-number":["2024JCYJ027"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1007\/s00530-025-01938-8","type":"journal-article","created":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T11:47:30Z","timestamp":1755776850000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Lidar-camera range-view fusion for 3D object detection in autonomous driving"],"prefix":"10.1007","volume":"31","author":[{"given":"Xuzhong","family":"Hu","sequence":"first","affiliation":[]},{"given":"Zaipeng","family":"Duan","sequence":"additional","affiliation":[]},{"given":"Pei","family":"An","sequence":"additional","affiliation":[]},{"given":"Jun","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Jie","family":"Ma","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,8,21]]},"reference":[{"key":"1938_CR1","doi-asserted-by":"crossref","unstructured":"Huang, T., Liu, Z., Chen, X., Bai, X.: Epnet: Enhancing point features with image semantics for 3d object detection. In: European Conference on Computer Vision (2020). https:\/\/api.semanticscholar.org\/CorpusID:220633447","DOI":"10.1007\/978-3-030-58555-6_3"},{"key":"1938_CR2","doi-asserted-by":"publisher","unstructured":"Liu, Z., Huang, T., Li, B., Chen, X., Wang, X., Bai, X.: EPNet++: Cascade Bi-Directional Fusion for Multi-Modal 3D Object Detection. In: IEEE Transactions on Pattern Analysis and Machine Intelligence, 1\u201318 (2022) https:\/\/doi.org\/10.1109\/TPAMI.2022.3228806. Accessed 2024-07-22","DOI":"10.1109\/TPAMI.2022.3228806"},{"key":"1938_CR3","doi-asserted-by":"publisher","unstructured":"Zhang, Y., Chen, J., Huang, D.: CAT-Det: Contrastively Augmented Transformer for Multimodal 3D Object Detection. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 898\u2013907 (2022) https:\/\/doi.org\/10.1109\/CVPR52688.2022.00098. Conference Name: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) ISBN: 9781665469463 Place: New Orleans, LA, USA Publisher: IEEE. Accessed 2024-07-22","DOI":"10.1109\/CVPR52688.2022.00098"},{"key":"1938_CR4","doi-asserted-by":"publisher","unstructured":"Li, X., Shi, B., Hou, Y., Wu, X., Ma, T., Li, Y., He, L.: Homogeneous Multi-modal Feature Fusion and Interaction for 3D Object Detection (2022) https:\/\/doi.org\/10.48550\/ARXIV.2210.09615. Publisher: arXiv Version Number: 1. Accessed 2024-09-03","DOI":"10.48550\/ARXIV.2210.09615"},{"key":"1938_CR5","doi-asserted-by":"crossref","unstructured":"Chen, Y., Li, Y., Zhang, X., Sun, J., Jia, J.: Focal sparse convolutional networks for 3d object detection. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 5418\u20135427 (2022)","DOI":"10.1109\/CVPR52688.2022.00535"},{"key":"1938_CR6","doi-asserted-by":"publisher","unstructured":"Kim, Y., Park, K., Kim, M., Kum, D., Choi, J.W.: 3D Dual-Fusion: Dual-Domain Dual-Query Camera-LiDAR Fusion for 3D Object Detection (2022) https:\/\/doi.org\/10.48550\/ARXIV.2211.13529. Publisher: arXiv Version Number: 2. Accessed 2024-07-21","DOI":"10.48550\/ARXIV.2211.13529"},{"key":"1938_CR7","doi-asserted-by":"crossref","unstructured":"Yoo, J.H., Kim, Y., Kim, J.S., Choi, J.W.: 3d-cvf: Generating joint camera and lidar features using cross-view spatial feature fusion for 3d object detection. In: European Conference on Computer Vision (2020). https:\/\/api.semanticscholar.org\/CorpusID:216552886","DOI":"10.1007\/978-3-030-58583-9_43"},{"issue":"8","key":"1938_CR8","doi-asserted-by":"publisher","first-page":"8581","DOI":"10.1109\/JSEN.2023.3252178","volume":"23","author":"B Tao","year":"2023","unstructured":"Tao, B., Yan, F., Yin, Z., Nie, L., Miao, M., Jiao, Y., Lei, C.: A multimodal 3-D detector with attention from the corresponding modal. IEEE Sens. J. 23(8), 8581\u20138590 (2023). https:\/\/doi.org\/10.1109\/JSEN.2023.3252178","journal-title":"IEEE Sens. J."},{"key":"1938_CR9","doi-asserted-by":"publisher","unstructured":"Liu, Z., Tang, H., Amini, A., Yang, X., Mao, H., Rus, D.L., Han, S.: BEVFusion: Multi-Task Multi-Sensor Fusion with Unified Bird\u2019s-Eye View Representation. In: 2023 IEEE International Conference on Robotics and Automation (ICRA), 2774\u20132781 (2023) https:\/\/doi.org\/10.1109\/ICRA48891.2023.10160968. Conference Name: 2023 IEEE International Conference on Robotics and Automation (ICRA) ISBN: 9798350323658 Place: London, United Kingdom Publisher: IEEE. Accessed 2024-08-18","DOI":"10.1109\/ICRA48891.2023.10160968"},{"key":"1938_CR10","doi-asserted-by":"crossref","unstructured":"Sindagi, V.A., Zhou, Y., Tuzel, O.: Mvx-net: Multimodal voxelnet for 3d object detection. In: 2019 International Conference on Robotics and Automation (ICRA), 7276\u20137282 (2019)","DOI":"10.1109\/ICRA.2019.8794195"},{"key":"1938_CR11","doi-asserted-by":"publisher","unstructured":"Vora, S., Lang, A.H., Helou, B., Beijbom, O.: PointPainting: Sequential Fusion for 3D Object Detection. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 4603\u20134611 (2020) https:\/\/doi.org\/10.1109\/CVPR42600.2020.00466. Conference Name: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) ISBN: 9781728171685 Place: Seattle, WA, USA Publisher: IEEE. Accessed 2024-09-12","DOI":"10.1109\/CVPR42600.2020.00466"},{"issue":"4","key":"1938_CR12","doi-asserted-by":"publisher","first-page":"9358","DOI":"10.1109\/LRA.2022.3191208","volume":"7","author":"L Zhao","year":"2022","unstructured":"Zhao, L., Wang, M., Yue, Y.: Sem-Aug: improving camera-LiDAR feature fusion with semantic augmentation for 3D vehicle detection. IEEE Robot. Autom. Lett. 7(4), 9358\u20139365 (2022). https:\/\/doi.org\/10.1109\/LRA.2022.3191208","journal-title":"IEEE Robot. Autom. Lett."},{"key":"1938_CR13","doi-asserted-by":"publisher","unstructured":"Wang, M., Zhao, L., Yue, Y.: PA3DNet: 3-D Vehicle Detection With Pseudo Shape Segmentation and Adaptive Camera-LiDAR Fusion. IEEE Trans. Industr. Inf. 19(11), 10693\u201310703 (2023). https:\/\/doi.org\/10.1109\/TII.2023.3241585. Conference Name: IEEE Transactions on Industrial Informatics. Accessed 2024-06-07","DOI":"10.1109\/TII.2023.3241585"},{"key":"1938_CR14","doi-asserted-by":"crossref","unstructured":"Zhang, W., Wang, Z., Loy, C.C.: Improving data augmentation for multi-modality 3d object detection. In: International Conference on Learning Representations 2023 Workshop on Scene Representations for Autonomous Driving (2023). https:\/\/openreview.net\/forum?id=qYU4v9zCk9","DOI":"10.2139\/ssrn.4398254"},{"key":"1938_CR15","doi-asserted-by":"publisher","unstructured":"Zhang, X., Wang, L., Zhang, G., Lan, T., Zhang, H., Zhao, L., Li, J., Zhu, L., Liu, H.: RI-Fusion: 3D Object Detection Using Enhanced Point Features With Range-Image Fusion for Autonomous Driving. In: IEEE Transactions on Instrumentation and Measurement 72, 1\u201313 (2023) https:\/\/doi.org\/10.1109\/TIM.2022.3224525. Conference Name: IEEE Transactions on Instrumentation and Measurement. Accessed 2024-06-30","DOI":"10.1109\/TIM.2022.3224525"},{"key":"1938_CR16","unstructured":"Qi, C., Yi, L., Su, H., Guibas, L.J.: Pointnet++: Deep hierarchical feature learning on point sets in a metric space. In: Neural Information Processing Systems (2017). https:\/\/api.semanticscholar.org\/CorpusID:1745976"},{"key":"1938_CR17","doi-asserted-by":"crossref","unstructured":"Shi, S., Wang, X., Li, H.: Pointrcnn: 3d object proposal generation and detection from point cloud. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 770\u2013779 (2018)","DOI":"10.1109\/CVPR.2019.00086"},{"key":"1938_CR18","doi-asserted-by":"crossref","unstructured":"Yang, Z., Sun, Y., Liu, S., Jia, J.: 3dssd: Point-based 3d single stage object detector. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 11037\u201311045 (2020)","DOI":"10.1109\/CVPR42600.2020.01105"},{"key":"1938_CR19","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Hu, Q., Xu, G., Ma, Y., Wan, J.-H., Guo, Y.: Not all points are equal: Learning highly efficient point-based detectors for 3d lidar point clouds. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 18931\u201318940 (2022)","DOI":"10.1109\/CVPR52688.2022.01838"},{"key":"1938_CR20","doi-asserted-by":"crossref","unstructured":"Chen, C., Chen, Z., Zhang, J., Tao, D.: Sasa: Semantics-augmented set abstraction for point-based 3d object detection. In: AAAI Conference on Artificial Intelligence (2022). https:\/\/api.semanticscholar.org\/CorpusID:245769803","DOI":"10.1609\/aaai.v36i1.19897"},{"key":"1938_CR21","doi-asserted-by":"crossref","unstructured":"Yan, Y., Mao, Y., Li, B.: Second: Sparsely embedded convolutional detection. Sensors (Basel, Switzerland) 18 (2018)","DOI":"10.3390\/s18103337"},{"key":"1938_CR22","doi-asserted-by":"publisher","unstructured":"Zheng, W., Tang, W., Chen, S., Jiang, L., Fu, C.-W.: CIA-SSD: Confident IoU-Aware Single-Stage Object Detector From Point Cloud. In: Proceedings of the AAAI Conference on Artificial Intelligence 35(4), 3555\u20133562 (2021). https:\/\/doi.org\/10.1609\/aaai.v35i4.16470","DOI":"10.1609\/aaai.v35i4.16470"},{"key":"1938_CR23","doi-asserted-by":"crossref","unstructured":"Deng, J., Shi, S., Li, P.-C., Zhou, W.-g., Zhang, Y., Li, H.: Voxel r-cnn: Towards high performance voxel-based 3d object detection. ArXiv arXiv:2012.15712 (2020)","DOI":"10.1609\/aaai.v35i2.16207"},{"key":"1938_CR24","doi-asserted-by":"crossref","unstructured":"Yin, T., Zhou, X., Kr\u00e4henb\u00fchl, P.: Center-based 3d object detection and tracking. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 11779\u201311788 (2020)","DOI":"10.1109\/CVPR46437.2021.01161"},{"key":"1938_CR25","doi-asserted-by":"crossref","unstructured":"Chen, Y., Liu, J., Zhang, X., Qi, X., Jia, J.: Voxelnext: Fully sparse voxelnet for 3d object detection and tracking. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 21674\u201321683 (2023)","DOI":"10.1109\/CVPR52729.2023.02076"},{"key":"1938_CR26","doi-asserted-by":"crossref","unstructured":"Lee, J.-K., Lee, J.-H., Lee, J., Kwon, S., Jung, H.: Re-voxeldet: Rethinking neck and head architectures for high-performance voxel-based 3d detection. In: 2024 IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), 7488\u20137497 (2024)","DOI":"10.1109\/WACV57701.2024.00733"},{"key":"1938_CR27","doi-asserted-by":"crossref","unstructured":"Lang, A.H., Vora, S., Caesar, H., Zhou, L., Yang, J., Beijbom, O.: Pointpillars: Fast encoders for object detection from point clouds. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 12689\u201312697 (2018)","DOI":"10.1109\/CVPR.2019.01298"},{"key":"1938_CR28","doi-asserted-by":"crossref","unstructured":"Yang, B., Luo, W., Urtasun, R.: Pixor: Real-time 3d object detection from point clouds. In: 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 7652\u20137660 (2018)","DOI":"10.1109\/CVPR.2018.00798"},{"key":"1938_CR29","doi-asserted-by":"crossref","unstructured":"Shi, G.-H., Li, R., Ma, C.: Pillarnet: Real-time and high-performance pillar-based 3d object detection. In: European Conference on Computer Vision (2022). https:\/\/api.semanticscholar.org\/CorpusID:248811748","DOI":"10.1007\/978-3-031-20080-9_3"},{"key":"1938_CR30","unstructured":"Zhou, S., Tian, Z., Chu, X., Zhang, X., Zhang, B., Lu, X., Feng, C., Jie, Z., Chiang, P., Ma, L.: Fastpillars: A deployment-friendly pillar-based 3d detector. ArXiv arXiv:2302.02367 (2023)"},{"key":"1938_CR31","first-page":"1","volume":"73","author":"Z Cao","year":"2024","unstructured":"Cao, Z., Wang, T., Sun, P., Cao, F., Shao, S., Wang, S.: Scorepillar: a real-time small object detection method based on pillar scoring of lidar measurement. IEEE Trans. Instrum. Meas. 73, 1\u201313 (2024)","journal-title":"IEEE Trans. Instrum. Meas."},{"key":"1938_CR32","doi-asserted-by":"publisher","first-page":"15824","DOI":"10.1109\/TITS.2022.3145588","volume":"23","author":"K Peng","year":"2021","unstructured":"Peng, K., Fei, J., Yang, K., Roitberg, A., Zhang, J., Bieder, F., Heidenreich, P., Stiller, C., Stiefelhagen, R.: Mass: multi-attentional semantic segmentation of lidar data for dense top-view understanding. IEEE Trans. Intell. Transp. Syst. 23, 15824\u201315840 (2021)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"1938_CR33","doi-asserted-by":"crossref","unstructured":"Fei, J., Peng, K., Heidenreich, P., Bieder, F., Stiller, C.: Pillarsegnet: Pillar-based semantic grid map estimation using sparse lidar data. In: 2021 IEEE Intelligent Vehicles Symposium (IV), 838\u2013844 (2021)","DOI":"10.1109\/IV48863.2021.9575694"},{"key":"1938_CR34","doi-asserted-by":"crossref","unstructured":"Park, K., Kim, Y., Koh, J., Park, B., Choi, J.W.: Fine-grained pillar feature encoding via spatio-temporal virtual grid for 3d object detection. In: 2024 IEEE International Conference on Robotics and Automation (ICRA), 4259\u20134265 (2024)","DOI":"10.1109\/ICRA57147.2024.10611414"},{"key":"1938_CR35","doi-asserted-by":"publisher","first-page":"2399","DOI":"10.1109\/TIV.2023.3323377","volume":"9","author":"C Wang","year":"2024","unstructured":"Wang, C., Liu, Z.: Cafi-pillars: infusing geometry priors for pillar-based 3d detectors through centroid-aware feature interaction. IEEE Trans. Intell. Vehicles 9, 2399\u20132408 (2024)","journal-title":"IEEE Trans. Intell. Vehicles"},{"key":"1938_CR36","doi-asserted-by":"crossref","unstructured":"Chen, Y., Liu, J., Zhang, X., Qi, X., Jia, J.: Largekernel3d: Scaling up kernels in 3d sparse cnns. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 13488\u201313498 (2022)","DOI":"10.1109\/CVPR52729.2023.01296"},{"key":"1938_CR37","doi-asserted-by":"crossref","unstructured":"Lu, T., Ding, X., Liu, H., Wu, G., Wang, L.: Link: Linear kernel for lidar-based 3d perception. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 1105\u20131115 (2023)","DOI":"10.1109\/CVPR52729.2023.00113"},{"key":"1938_CR38","unstructured":"Cui, L., Li, X., Meng, M.: Large receptive field strategy and important feature extraction strategy in 3d object detection. ArXiv arXiv:2401.11913 (2024)"},{"key":"1938_CR39","doi-asserted-by":"crossref","unstructured":"Mao, J., Xue, Y., Niu, M., Bai, H., Feng, J., Liang, X., Xu, H., Xu, C.: Voxel transformer for 3d object detection. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV), 3144\u20133153 (2021)","DOI":"10.1109\/ICCV48922.2021.00315"},{"key":"1938_CR40","unstructured":"Dong, S., Ding, L., Wang, H., Xu, T., Xu, X., Wang, J., Bian, Z., Wang, Y., Li, J.: Mssvt: Mixed-scale sparse voxel transformer for 3d object detection on point clouds. In: Neural Information Processing Systems (2022). https:\/\/api.semanticscholar.org\/CorpusID:258509432"},{"key":"1938_CR41","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3618331","volume":"42","author":"P-S Wang","year":"2023","unstructured":"Wang, P.-S.: Octformer: octree-based transformers for 3d point clouds. ACM Trans. Grap. (TOG) 42, 1\u201311 (2023)","journal-title":"ACM Trans. Grap. (TOG)"},{"key":"1938_CR42","doi-asserted-by":"crossref","unstructured":"Wang, H., Shi, C., Shi, S., Lei, M., Wang, S., He, D., Schiele, B., Wang, L.: Dsvt: Dynamic sparse voxel transformer with rotated sets. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 13520\u201313529 (2023)","DOI":"10.1109\/CVPR52729.2023.01299"},{"key":"1938_CR43","doi-asserted-by":"publisher","first-page":"5844","DOI":"10.1109\/TCSVT.2023.3260115","volume":"33","author":"X Tian","year":"2023","unstructured":"Tian, X., Yang, M., Yu, Q., Yong, J., Xu, D.: Medoidsformer: a strong 3d object detection backbone by exploiting interaction with adjacent medoid tokens. IEEE Trans. Circuits Syst. Video Technol. 33, 5844\u20135854 (2023)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1938_CR44","doi-asserted-by":"publisher","unstructured":"Fan, L., Xiong, X., Wang, F., Wang, N., Zhang, Z.: RangeDet:In Defense of Range View for LiDAR-based 3D Object Detection. arXiv. arXiv:2103.10039 [cs] (2021). https:\/\/doi.org\/10.48550\/arXiv.2103.10039. Accessed 2024-08-18","DOI":"10.48550\/arXiv.2103.10039"},{"key":"1938_CR45","doi-asserted-by":"publisher","unstructured":"Liang, Z., Zhang, M., Zhang, Z., Zhao, X., Pu, S.: RangeRCNN: Towards Fast and Accurate 3D Object Detection with Range Image Representation. arXiv. arXiv:2009.00206 [cs] (2021). https:\/\/doi.org\/10.48550\/arXiv.2009.00206. Accessed 2024-08-18","DOI":"10.48550\/arXiv.2009.00206"},{"key":"1938_CR46","doi-asserted-by":"crossref","unstructured":"Liang, Z., Zhang, Z., Zhang, M., Zhao, X., Pu, S.: Rangeioudet: Range image based real-time 3d object detector optimized by intersection over union. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 7136\u20137145 (2021)","DOI":"10.1109\/CVPR46437.2021.00706"},{"key":"1938_CR47","doi-asserted-by":"publisher","unstructured":"Wilson, B., Mitchell, N.A., Pontes, J.K., Hays, J.: What Matters in Range View 3D Object Detection. arXiv:2407.16789 [cs] (2024). https:\/\/doi.org\/10.48550\/arXiv.2407.16789. Accessed 2024-07-28","DOI":"10.48550\/arXiv.2407.16789"},{"key":"1938_CR48","doi-asserted-by":"publisher","first-page":"1508","DOI":"10.1109\/TCSVT.2023.3296583","volume":"34","author":"C Yu","year":"2024","unstructured":"Yu, C., Peng, B., Huang, Q., Lei, J.: Pipc-3ddet: harnessing perspective information and proposal correlation for 3d point cloud object detection. IEEE Trans. Circuits Syst. Video Technol. 34, 1508\u20131518 (2024)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1938_CR49","doi-asserted-by":"crossref","unstructured":"Qiao, R., Ji, H., Zhu, Z., Zhang, W.: Local-to-global semantic learning for multi-view 3d object detection from point cloud. In: IEEE Transactions on Circuits and Systems for Video Technology (2024)","DOI":"10.1109\/TCSVT.2024.3396870"},{"key":"1938_CR50","doi-asserted-by":"crossref","unstructured":"Wang, N., Yang, A., Cui, Z., Ding, Y., Xue, Y., Su, Y.: Capsule attention network for hyperspectral image classification. Remote Sensing (2024)","DOI":"10.3390\/rs16214001"},{"key":"1938_CR51","doi-asserted-by":"crossref","unstructured":"Xue, Y., Zhong, B., Jin, G., Shen, T., Tan, L., Li, N., Zheng, Y.: Avltrack: Dynamic sparse learning for aerial vision-language tracking. In: IEEE Transactions on Circuits and Systems for Video Technology (2025)","DOI":"10.1109\/TCSVT.2025.3549953"},{"key":"1938_CR52","first-page":"1","volume":"61","author":"Y Xue","year":"2023","unstructured":"Xue, Y., Jin, G., Shen, T., Tan, L., Wang, N., Gao, J., Wang, L.: Smalltrack: wavelet pooling and graph enhanced classification for uav small object tracking. IEEE Trans. Geosci. Remote Sens. 61, 1\u201315 (2023)","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"1938_CR53","doi-asserted-by":"publisher","first-page":"10845","DOI":"10.1109\/TCSVT.2024.3411301","volume":"34","author":"Y Xue","year":"2024","unstructured":"Xue, Y., Jin, G., Shen, T., Tan, L., Wang, N., Gao, J., Wang, L.: Consistent representation mining for multi-drone single object tracking. IEEE Trans. Circuits Syst. Video Technol. 34, 10845\u201310859 (2024)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1938_CR54","doi-asserted-by":"publisher","first-page":"3917","DOI":"10.1109\/TIV.2023.3348099","volume":"9","author":"Y Yang","year":"2024","unstructured":"Yang, Y., Yin, H., Chong, A., Wan, J., Liu, Q.-Y.: Sacinet: semantic-aware cross-modal interaction network for real-time 3d object detection. IEEE Trans. Intell. Vehicles 9, 3917\u20133927 (2024)","journal-title":"IEEE Trans. Intell. Vehicles"},{"key":"1938_CR55","doi-asserted-by":"publisher","unstructured":"Li, Y., Yu, A.W., Meng, T., Caine, B., Ngiam, J., Peng, D., Shen, J., Lu, Y., Zhou, D., Le, Q.V., Yuille, A., Tan, M.: DeepFusion: Lidar-Camera Deep Fusion for Multi-Modal 3D Object Detection. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 17161\u201317170 (2022) https:\/\/doi.org\/10.1109\/CVPR52688.2022.01667. Conference Name: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) ISBN: 9781665469463 Place: New Orleans, LA, USA Publisher: IEEE. Accessed 2024-07-22","DOI":"10.1109\/CVPR52688.2022.01667"},{"key":"1938_CR56","unstructured":"Kim, Y., Park, K., Kim, M., Kum, D., Choi, J.W.: 3d dual-fusion: Dual-domain dual-query camera-lidar fusion for 3d object detection. ArXiv arXiv:2211.13529 (2022)"},{"key":"1938_CR57","unstructured":"Li, Y., Chen, Y., Qi, X., Li, Z., Sun, J., Jia, J.: Unifying voxel-based representation with transformer for 3d object detection. ArXiv arXiv:2206.00630 (2022)"},{"key":"1938_CR58","doi-asserted-by":"crossref","unstructured":"Cortinhal, T., Tzelepis, G., Aksoy, E.E.: Salsanext: Fast, uncertainty-aware semantic segmentation of lidar point clouds. In: International Symposium on Visual Computing (2020). https:\/\/api.semanticscholar.org\/CorpusID:220424521","DOI":"10.1007\/978-3-030-64559-5_16"},{"issue":"2","key":"1938_CR59","doi-asserted-by":"publisher","first-page":"1391","DOI":"10.1109\/JSEN.2021.3127626","volume":"22","author":"Z Zhang","year":"2022","unstructured":"Zhang, Z., Liang, Z., Zhang, M., Zhao, X., Li, H., Yang, M., Tan, W., Pu, S.: RangeLVDet: boosting 3D object detection in LIDAR with range image and RGB image. IEEE Sens. J. 22(2), 1391\u20131403 (2022). https:\/\/doi.org\/10.1109\/JSEN.2021.3127626","journal-title":"IEEE Sens. J."},{"key":"1938_CR60","doi-asserted-by":"crossref","unstructured":"Hassani, A., Walton, S., Li, J., Li, S., Shi, H.: Neighborhood attention transformer. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 6185\u20136194 (2022)","DOI":"10.1109\/CVPR52729.2023.00599"},{"key":"1938_CR61","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2021.108468","volume":"124","author":"H Zhou","year":"2021","unstructured":"Zhou, H., Qi, L., Huang, H., Yang, X., Wan, Z., Wen, X.: Canet: co-attention network for rgb-d semantic segmentation. Pattern Recognit. 124, 108468 (2021)","journal-title":"Pattern Recognit."},{"key":"1938_CR62","first-page":"14679","volume":"24","author":"H Liu","year":"2022","unstructured":"Liu, H., Zhang, J., Yang, K., Hu, X., Stiefelhagen, R.: Cmx: cross-modal fusion for rgb-x semantic segmentation with transformers. IEEE Trans. Intell. Transp. Syst. 24, 14679\u201314694 (2022)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"1938_CR63","doi-asserted-by":"crossref","unstructured":"Du, Z., Hu, Z., Zhao, G., Jin, Y., Ma, H.: Cross-layer feature pyramid transformer for small object detection in aerial images. ArXiv arXiv:2407.19696 (2024)","DOI":"10.1109\/TGRS.2025.3572706"},{"key":"1938_CR64","doi-asserted-by":"publisher","unstructured":"Yang, H., Wang, W., Chen, M., Lin, B., He, T., Chen, H., He, X., Ouyang, W.: PVT-SSD: Single-Stage 3D Object Detector with Point-Voxel Transformer. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 13476\u201313487 (2023) https:\/\/doi.org\/10.1109\/CVPR52729.2023.01295. Conference Name: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) ISBN: 9798350301298 Place: Vancouver, BC, Canada Publisher: IEEE. Accessed 2024-06-09","DOI":"10.1109\/CVPR52729.2023.01295"},{"key":"1938_CR65","doi-asserted-by":"crossref","unstructured":"He, C., Zeng, H., Huang, J., Hua, X., Zhang, L.: Structure aware single-stage 3d object detection from point cloud. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 11870\u201311879 (2020)","DOI":"10.1109\/CVPR42600.2020.01189"},{"key":"1938_CR66","doi-asserted-by":"crossref","unstructured":"Shi, S., Guo, C., Jiang, L., Wang, Z., Shi, J., Wang, X., Li, H.: Pv-rcnn: Point-voxel feature set abstraction for 3d object detection. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 10526\u201310535 (2019)","DOI":"10.1109\/CVPR42600.2020.01054"},{"issue":"12","key":"1938_CR67","doi-asserted-by":"publisher","first-page":"4722","DOI":"10.1109\/TCSVT.2021.3100848","volume":"31","author":"J Deng","year":"2021","unstructured":"Deng, J., Zhou, W., Zhang, Y., Li, H.: From multi-view to hollow-3D: hallucinated hollow-3D R-CNN for 3D object detection. IEEE Trans. Circuits Syst. Video Technol. 31(12), 4722\u20134734 (2021). https:\/\/doi.org\/10.1109\/TCSVT.2021.3100848","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1938_CR68","doi-asserted-by":"crossref","unstructured":"Liang, M., Yang, B., Wang, S., Urtasun, R.: Deep continuous fusion for multi-sensor 3d object detection. In: European Conference on Computer Vision (2018). https:\/\/api.semanticscholar.org\/CorpusID:52211898","DOI":"10.1007\/978-3-030-01270-0_39"},{"key":"1938_CR69","doi-asserted-by":"publisher","unstructured":"Xie, L., Xiang, C., Yu, Z., Xu, G., Yang, Z., Cai, D., He, X.: PI-RCNN: An Efficient Multi-Sensor 3D Object Detector with Point-Based Attentive Cont-Conv Fusion Module. Proceedings of the AAAI Conference on Artificial Intelligence 34(07), 12460\u201312467 (2020). https:\/\/doi.org\/10.1609\/aaai.v34i07.6933","DOI":"10.1609\/aaai.v34i07.6933"},{"key":"1938_CR70","doi-asserted-by":"publisher","first-page":"5598","DOI":"10.1109\/TITS.2023.3347078","volume":"25","author":"G Xie","year":"2024","unstructured":"Xie, G., Chen, Z., Gao, M., Hu, M., Qin, X.: Ppf-det: point-pixel fusion for multi-modal 3d object detection. IEEE Trans. Intell. Transp. Syst. 25, 5598\u20135611 (2024)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"1938_CR71","doi-asserted-by":"publisher","first-page":"9397","DOI":"10.1109\/TITS.2024.3387398","volume":"25","author":"M Liu","year":"2024","unstructured":"Liu, M., Chen, Y., Xie, J., Zhu, Y., Zhang, Y., Yao, L., Bing, Z., Zhuang, G., Huang, K., Zhou, J.T.: Menet: multi-modal mapping enhancement network for 3d object detection in autonomous driving. IEEE Trans. Intell. Transp. Syst. 25, 9397\u20139410 (2024)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"1938_CR72","doi-asserted-by":"crossref","unstructured":"Chen, B., Shen, H., Zhao, Z., Yu, L., Zhao, Y.: Lidar-camera cross fusion network towards 3d object detection in self-driving. IEEE Sensors Journal (2024)","DOI":"10.1109\/JSEN.2024.3425390"},{"key":"1938_CR73","doi-asserted-by":"publisher","first-page":"707","DOI":"10.1109\/TMM.2023.3270638","volume":"27","author":"Z Liu","year":"2025","unstructured":"Liu, Z., Cheng, J., Fan, J., Lin, S., Wang, Y., Zhao, X.: Multi-modal fusion based on depth adaptive mechanism for 3d object detection. IEEE Trans. Multimed. 27, 707\u2013717 (2025)","journal-title":"IEEE Trans. Multimed."}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-01938-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-025-01938-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-01938-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T10:25:28Z","timestamp":1761387928000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-025-01938-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,21]]},"references-count":73,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2025,10]]}},"alternative-id":["1938"],"URL":"https:\/\/doi.org\/10.1007\/s00530-025-01938-8","relation":{"has-preprint":[{"id-type":"doi","id":"10.21203\/rs.3.rs-5654887\/v1","asserted-by":"object"}]},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,8,21]]},"assertion":[{"value":"16 December 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 July 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 August 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"363"}}