{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,19]],"date-time":"2026-01-19T01:57:19Z","timestamp":1768787839586,"version":"3.49.0"},"reference-count":54,"publisher":"Institute of Electronics, Information and Communications Engineers (IEICE)","issue":"6","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEICE Trans. Inf. &amp; Syst."],"published-print":{"date-parts":[[2025,6,1]]},"DOI":"10.1587\/transinf.2024edp7158","type":"journal-article","created":{"date-parts":[[2024,11,26]],"date-time":"2024-11-26T22:34:26Z","timestamp":1732660466000},"page":"593-603","source":"Crossref","is-referenced-by-count":1,"title":["FusionReg: LiDAR-Camera Fusion Regression Enhancement for 3D Object Detection"],"prefix":"10.1587","volume":"E108.D","author":[{"given":"Rongchun","family":"XIAO","sequence":"first","affiliation":[{"name":"Beijing Key Laboratory of Information Service Engineering"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuansheng","family":"LIU","sequence":"additional","affiliation":[{"name":"School of Robotics, Beijing Union University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jun","family":"ZHANG","sequence":"additional","affiliation":[{"name":"School of Robotics, Beijing Union University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanliang","family":"HUANG","sequence":"additional","affiliation":[{"name":"Beijing Key Laboratory of Information Service Engineering"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xi","family":"HAN","sequence":"additional","affiliation":[{"name":"School of Robotics, Beijing Union University"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"532","reference":[{"key":"1","doi-asserted-by":"publisher","unstructured":"[1] H. Li, C. Sima, J. Dai, W. Wang, L. Lu, H. Wang, J. Zeng, Z. Li, J. Yang, H. Deng, H. Tian, E. Xie, J. Xie, L. Chen, T. Li, Y. Li, Y. Gao, X. Jia, S. Li, J. Shi, D. Lin, and Y. Qiao, \u201cDelving into the devils of bird\u2019s-eye-view perception: A review, evaluation and recipe,\u201d IEEE Trans. Pattern Anal. Mach. Intell., vol.46, no.4, pp.2151-2170, 2023. DOI: 10.1109\/TPAMI.2023.3333838 10.1109\/TPAMI.2023.3333838","DOI":"10.1109\/TPAMI.2023.3333838"},{"key":"2","doi-asserted-by":"publisher","unstructured":"[2] R. Qian, X. Lai, and X. Li, \u201c3D object detection for autonomous driving: A survey,\u201d Pattern Recognition, vol.130, 108796, 2022. DOI: 10.1016\/j.patcog.2022.108796 10.1016\/j.patcog.2022.108796","DOI":"10.1016\/j.patcog.2022.108796"},{"key":"3","unstructured":"[3] S. Xie, Z. Li, Z. Wang, and C. Xie, \u201cOn the adversarial robustness of camera-based 3D object detection ,\u201d arXiv preprint arXiv:2301.10766, 2023. 10.48550\/arXiv.2301.10766"},{"key":"4","doi-asserted-by":"publisher","unstructured":"[4] K. Yoshioka. \u201cA tutorial and review of automobile direct ToF LiDAR SoCs: Evolution of next-generation LiDARs,\u201d IEICE Trans. Electron., vol.E105-C, no.10, pp.534-543, Oct. 2022. DOI: 10.1587\/transele.2021CTI0002 10.1587\/transele.2021CTI0002","DOI":"10.1587\/transele.2021CTI0002"},{"key":"5","doi-asserted-by":"publisher","unstructured":"[5] J. Mao, S. Shi, X. Wang, and H. Li, \u201c3D object detection for autonomous driving: A comprehensive survey,\u201d International Journal of Computer Vision, vol.131, pp.1909-1963, 2023. DOI: 10.1007\/s11263-023-01790-1 10.1007\/s11263-023-01790-1","DOI":"10.1007\/s11263-023-01790-1"},{"key":"6","doi-asserted-by":"publisher","unstructured":"[6] J. Fan, L. Fan, Q. Ni, J. Wang, Y. Liu, R. Li, Y. Wang, and S. Wang, \u201cPerception and planning of intelligent vehicles based on BEV in extreme off-road scenarios,\u201d IEEE Trans. Intell. Veh., vol.9, no.4, pp.4568-4572, 2024. DOI: 10.1109\/TIV.2024.3392753 10.1109\/TIV.2024.3392753","DOI":"10.1109\/TIV.2024.3392753"},{"key":"7","doi-asserted-by":"crossref","unstructured":"[7] K. Feng, C. Li, D. Ren, Y. Yuan, and G. Wang, \u201cOn the road to portability: Compressing end-to-end motion planner for autonomous driving,\u201d IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.15099-15108, 2024. DOI: 10.1109\/CVPR52733.2024.01430 10.1109\/CVPR52733.2024.01430","DOI":"10.1109\/CVPR52733.2024.01430"},{"key":"8","doi-asserted-by":"crossref","unstructured":"[8] Y. Li, W. Yang, J. Tao, Q. Wang, Z. Cui, and X. Qin, \u201cAVM-SLAM: Semantic visual SLAM with multi-sensor fusion in a bird\u2019s eye view for automated valet parking,\u201d arXiv preprint arXiv:2309.08180, 2023. 10.48550\/arXiv.2309.08180","DOI":"10.1109\/IROS58592.2024.10802668"},{"key":"9","doi-asserted-by":"crossref","unstructured":"[9] N. Khedekar, M. Kulkarni, and K. Alexis, \u201cMIMOSA: A multi-modal SLAM framework for resilient autonomy against sensor degradation,\u201d IEEE\/RSJ International Conference on Intelligent Robots and Systems, pp.7153-7159, 2022. DOI: 10.1109\/iros47612.2022.9981108 10.1109\/iros47612.2022.9981108","DOI":"10.1109\/IROS47612.2022.9981108"},{"key":"10","doi-asserted-by":"publisher","unstructured":"[10] P. Karle, M. Geisslinger, J. Betz, and M. Lienkamp, \u201cScenario understanding and motion prediction for autonomous vehicles\u2014Review and comparison,\u201d IEEE Trans. Intell. Transp. Syst., vol.23, no.10, pp.16962-16982, 2022. DOI: 10.1109\/TITS.2022.3156011 10.1109\/TITS.2022.3156011","DOI":"10.1109\/TITS.2022.3156011"},{"key":"11","doi-asserted-by":"crossref","unstructured":"[11] X. Bai, Z. Hu, X. Zhu, Q. Huang, Y. Chen, H. Fu, and C.-L. Tai, \u201cTransfusion: Robust LiDAR-camera fusion for 3D object detection with transformers,\u201d IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.1080-1089, 2022. DOI: 10.1109\/CVPR52688.2022.00116 10.1109\/CVPR52688.2022.00116","DOI":"10.1109\/CVPR52688.2022.00116"},{"key":"12","doi-asserted-by":"crossref","unstructured":"[12] Y. Jiao, Z. Jie, S. Chen, J. Chen, L. Ma, and Y. -G. Jiang, \u201cMSMDFusion: Fusing LiDAR and camera at multiple scales with multi-depth seeds for 3d object detection,\u201d IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.21643-21652, 2023. DOI: 10.1109\/CVPR52729.2023.02073 10.1109\/CVPR52729.2023.02073","DOI":"10.1109\/CVPR52729.2023.02073"},{"key":"13","doi-asserted-by":"publisher","unstructured":"[13] C. Yan and E. Salman, \u201cMono3D: Open source cell library for monolithic 3-D integrated circuits,\u201d IEEE Trans. Circuits Syst. I: Reg. Papers, vol.65, no.3, pp.1075-1085, 2017. DOI: 10.1109\/TCSI.2017.2768330 10.1109\/TCSI.2017.2768330","DOI":"10.1109\/TCSI.2017.2768330"},{"key":"14","doi-asserted-by":"crossref","unstructured":"[14] X. Chen, H. Ma, J. Wan, B. Li, and T. Xia, \u201cMulti-view 3D object detection network for autonomous driving,\u201d IEEE Conference on Computer Vision and Pattern Recognition, pp.6526-6534, 2017. DOI: 10.1109\/CVPR.2017.691 10.1109\/CVPR.2017.691","DOI":"10.1109\/CVPR.2017.691"},{"key":"15","doi-asserted-by":"crossref","unstructured":"[15] J. Ku, A.D. Pon, and S.L. Waslander, \u201cMonocular 3D object detection leveraging accurate proposals and shape reconstruction,\u201d IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.11859-11868, 2019. DOI: 10.1109\/CVPR.2019.01214 10.1109\/CVPR.2019.01214","DOI":"10.1109\/CVPR.2019.01214"},{"key":"16","unstructured":"[16] A. Dosovitskiy, L. Beyer, A. Kolesnikov, D. Weissenborn, X. Zhai, T. Unterthiner, M. Dehghani, M. Minderer, G. Heigold, S. Gelly, J. Uszkoreit, and N. Houlsby, \u201cAn image is worth 16x16 words: Transformers for image recognition at scale,\u201d arXiv preprint arXiv: 2010.11929, 2020. 10.48550\/arXiv.2010.11929"},{"key":"17","doi-asserted-by":"crossref","unstructured":"[17] N. Carion, F. Massa, G. Synnaeve, N. Usunier, A. Kirillov, and S. Zagoruyko, \u201cEnd-to-end object detection with transformers,\u201d European Conference on Computer Vision, pp.213-229, Springer International Publishing, Cham, 2020. DOI: 10.1007\/978-3-030-58452-8_13 10.1007\/978-3-030-58452-8_13","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"18","unstructured":"[18] Y. Wang, V.C. Guizilini, T. Zhang, Y. Wang, H. Zhao, and J. Solomon, \u201cDETR3D: 3D object detection from multi-view images via 3D-to-2D queries,\u201d Proc. 5th Conference on Robot Learning, PMLR, vol.164, pp.180-191, 2022."},{"key":"19","doi-asserted-by":"crossref","unstructured":"[19] Y. Liu, T. Wang, X. Zhang, and J. Sun, \u201cPETR: Position embedding transformation for multi-view 3D object detection,\u201d European Conference on Computer Vision, pp.531-548, Springer Nature Switzerland, Cham, 2022. DOI: 10.1007\/978-3-031-19812-0_31 10.1007\/978-3-031-19812-0_31","DOI":"10.1007\/978-3-031-19812-0_31"},{"key":"20","doi-asserted-by":"crossref","unstructured":"[20] Y. Liu, J. Yan, F. Jia, S. Li, A. Gao, T. Wang, and X. Zhang, \u201cPETRv2: A unified framework for 3D perception from multi-camera images,\u201d IEEE\/CVF International Conference on Computer Vision, pp.3239-3249, 2023. DOI: 10.1109\/ICCV51070.2023.00302 10.1109\/ICCV51070.2023.00302","DOI":"10.1109\/ICCV51070.2023.00302"},{"key":"21","doi-asserted-by":"crossref","unstructured":"[21] J. Philion and S. Fidler, \u201cLift, splat, shoot: Encoding images from arbitrary camera rigs by implicitly unprojecting to 3D,\u201d European Conference on Computer Vision, Springer International Publishing, pp.194-210, Cham, 2020. DOI: 10.1007\/978-3-030-58568-6_12 10.1007\/978-3-030-58568-6_12","DOI":"10.1007\/978-3-030-58568-6_12"},{"key":"22","doi-asserted-by":"crossref","unstructured":"[22] C. Reading, A. Harakeh, J. Chae, and S. L. Waslander, \u201cCategorical depth distribution network for monocular 3D object detection,\u201d IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.8551-8560, 2021. DOI: 10.1109\/CVPR46437.2021.00845 10.1109\/CVPR46437.2021.00845","DOI":"10.1109\/CVPR46437.2021.00845"},{"key":"23","doi-asserted-by":"crossref","unstructured":"[23] C. Yang, Y. Chen, H. Tian, C. Tao, X. Zhu, Z. Zhang, G. Huang, H. Li, Y. Qiao, L. Lu, J. Zhou, and J. Dai, \u201cBEVFormer v2: Adapting modern image backbones to bird\u2019s-eye-view recognition via perspective supervision,\u201d IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.17830-17839, 2023. DOI: 10.1109\/CVPR52729.2023.01710 10.1109\/CVPR52729.2023.01710","DOI":"10.1109\/CVPR52729.2023.01710"},{"key":"24","unstructured":"[24] J. Huang, G. Huang, Z. Zhu, and D. Du, \u201cBEVDet: High-performance multi-camera 3D object detection in bird-eye-view,\u201d arXiv preprint arXiv:2112.11790, 2021. 10.48550\/arXiv.2112.11790"},{"key":"25","doi-asserted-by":"publisher","unstructured":"[25] Y. Li, Z. Ge, G. Yu, J. Yang, Z. Wang, Y. Shi, J. Sun, and Z. Li, \u201cBEVDepth: Acquisition of reliable depth for multi-view 3D object detection,\u201d AAAI Conference on Artificial Intelligence, vol.37, no.2, pp.1477-1485, 2023. DOI: 10.1609\/aaai.v37i2.25233 10.1609\/aaai.v37i2.25233","DOI":"10.1609\/aaai.v37i2.25233"},{"key":"26","doi-asserted-by":"crossref","unstructured":"[26] Y. Zhou and O. Tuzel, \u201cVoxelNet: End-to-end learning for point cloud based 3D object detection,\u201d IEEE Conference on Computer Vision and Pattern Recognition, pp.4490-4499, 2018. DOI: 10.1109\/CVPR.2018.00472 10.1109\/CVPR.2018.00472","DOI":"10.1109\/CVPR.2018.00472"},{"key":"27","doi-asserted-by":"publisher","unstructured":"[27] Y. Yan, Y. Mao, and B. Li, \u201cSECOND: Sparsely embedded convolutional detection,\u201d Sensors, vol.18, no.10, 3337, 2018. DOI: 10.3390\/s18103337 10.3390\/s18103337","DOI":"10.3390\/s18103337"},{"key":"28","doi-asserted-by":"crossref","unstructured":"[28] A.H. Lang, S. Vora, H. Caesar, L. Zhou, J. Yang, and O. Beijbom, \u201cPointPillars: Fast encoders for object detection from point clouds,\u201d IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.12689-12697, 2019. DOI: 10.1109\/CVPR.2019.01298 10.1109\/CVPR.2019.01298","DOI":"10.1109\/CVPR.2019.01298"},{"key":"29","doi-asserted-by":"crossref","unstructured":"[29] T. Yin, X. Zhou, and P. Kr\u00e4henbuhl, \u201cCenter-based 3D object detection and tracking,\u201d IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.11779-11788, 2021. DOI: 10.1109\/CVPR46437.2021.01161 10.1109\/CVPR46437.2021.01161","DOI":"10.1109\/CVPR46437.2021.01161"},{"key":"30","doi-asserted-by":"crossref","unstructured":"[30] S. Shi, X. Wang, and H. Li, \u201cPointRCNN: 3D object proposal generation and detection from point cloud,\u201d IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.770-779, 2019. DOI: 10.1109\/CVPR.2019.00086 10.1109\/CVPR.2019.00086","DOI":"10.1109\/CVPR.2019.00086"},{"key":"31","doi-asserted-by":"crossref","unstructured":"[31] H. Wang, C. Shi, S. Shi, M. Lei, S. Wang, D. He, B. Schiele, and L. Wang, \u201cDSVT: Dynamic sparse voxel transformer with rotated sets,\u201d IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.13520-13529, 2023. DOI: 10.1109\/CVPR52729.2023.01299 10.1109\/CVPR52729.2023.01299","DOI":"10.1109\/CVPR52729.2023.01299"},{"key":"32","unstructured":"[32] G. Zhang, L. Fan, C. He, Z. Lei, Z. Zhang, and L. Zhang, \u201cVoxel Mamba: Group-free state space models for point cloud based 3D object detection,\u201d arXiv preprint arXiv:2406.10700, 2024. 10.48550\/arXiv.2406.10700"},{"key":"33","unstructured":"[33] Y. Li, Y. Chen, X. Qi, Z. Li, J. Sun, and J. Jia, \u201cUnifying voxel-based representation with transformer for 3D object detection,\u201d Neural Information Processing Systems, vol.36, pp.18442-18455, 2022."},{"key":"34","doi-asserted-by":"crossref","unstructured":"[34] Z. Liu, H. Tang, A. Amini, X. Yang, H. Mao, D.L. Rus, and S. Han, \u201cBEVFusion: Multi-task multi-sensor fusion with unified bird\u2019s-eye view representation,\u201d IEEE International Conference on Robotics and Automation, pp.2774-2781, 2023. DOI: 10.1109\/ICRA48891.2023.10160968 10.1109\/ICRA48891.2023.10160968","DOI":"10.1109\/ICRA48891.2023.10160968"},{"key":"35","unstructured":"[35] T. Liang, H. Xie, K. Yu, Z. Xia, Z. Lin, Y. Wang, T. Tang, B. Wang, and Z. Tang, \u201cBEVFusion: A simple and robust LiDAR-camera fusion framework,\u201d Neural Information Processing Systems, vol.36, pp.10421-10434, 2022."},{"key":"36","doi-asserted-by":"crossref","unstructured":"[36] J. Fu, C. Gao, Z. Wang, L. Yang, X. Wang, B. Mu, and S. Liu, \u201cEliminating cross-modal conflicts in BEV space for LiDAR-camera 3D object detection,\u201d arXiv preprint arXiv:2403.07372, 2024. 10.48550\/arXiv.2403.07372","DOI":"10.1109\/ICRA57147.2024.10610230"},{"key":"37","unstructured":"[37] H. Cai, Z. Zhang, Z. Zhou, Z. Li, W. Ding, and J. Zhao, \u201cBEVFusion4D: Learning LiDAR-camera fusion under bird\u2019s-eye-view via cross-modality guidance and temporal aggregation,\u201d arXiv preprint arXiv:2303.17099, 2023. 10.48550\/arXiv.2303.17099"},{"key":"38","doi-asserted-by":"crossref","unstructured":"[38] T.-Y. Lin, P. Doll\u00e1r, R. Girshick, K. He, B. Hariharan, and S. Belongie, \u201cFeature pyramid networks for object detection,\u201d IEEE Conference on Computer Vision and Pattern Recognition, pp.936-944, 2017. DOI: 10.1109\/CVPR.2017.106 10.1109\/CVPR.2017.106","DOI":"10.1109\/CVPR.2017.106"},{"key":"39","doi-asserted-by":"crossref","unstructured":"[39] R.Q. Charles, H. Su, M. Kaichun, and L.J. Guibas, \u201cPointNet: Deep learning on point sets for 3D classification and segmentation,\u201d IEEE Conference on Computer Vision and Pattern Recognition, pp.77-85, 2017. DOI: 10.1109\/CVPR.2017.16 10.1109\/CVPR.2017.16","DOI":"10.1109\/CVPR.2017.16"},{"key":"40","unstructured":"[40] C.R. Qi, L. Yi, H. Su, and L.J. Guibas, \u201cPointNet++: Deep hierarchical feature learning on point sets in a metric space,\u201d Neural Information Processing Systems, vol.31, pp.5105-5114, 2017."},{"key":"41","doi-asserted-by":"crossref","unstructured":"[41] S. Woo, J. Park, J.-Y. Lee, and I.S. Kweon, \u201cCBAM: Convolutional block attention module,\u201d European Conference on Computer Vision, pp.3-19, 2018. DOI: 10.1007\/978-3-030-01234-2_1 10.1007\/978-3-030-01234-2_1","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"42","doi-asserted-by":"crossref","unstructured":"[42] J. Huang, Y. Ye, Z. Liang, Y. Shan, and D. Du, \u201cDetecting as labeling: Rethinking LiDAR-camera fusion in 3D object detection,\u201d arXiv preprint arXiv:2311.07152, 2023. 10.48550\/arXiv.2311.07152","DOI":"10.1007\/978-3-031-72670-5_25"},{"key":"43","doi-asserted-by":"crossref","unstructured":"[43] H. Caesar, V. Bankiti, A.H. Lang, S. Vora, V.E. Liong, Q. Xu, A. Krishnan, Y. Pan, G. Baldan, and O. Beijbom, \u201cnuScenes: A multimodal dataset for autonomous driving,\u201d IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.11618-11628, 2020. DOI: 10.1109\/CVPR42600.2020.01164 10.1109\/CVPR42600.2020.01164","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"44","doi-asserted-by":"crossref","unstructured":"[44] Z. Liu, Y. Lin, Y. Cao, H. Hu, Y. Wei, Z. Zhang, S. Lin, and B. Guo, \u201cSwin transformer: Hierarchical vision transformer using shifted windows,\u201d IEEE\/CVF International Conference on Computer Vision, pp.9992-10002, 2021. DOI: 10.1109\/ICCV48922.2021.00986 10.1109\/ICCV48922.2021.00986","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"45","unstructured":"[45] I. Loshchilov and F. Hutter, \u201cDecoupled weight decay regularization,\u201d arXiv preprint arXiv:1711.05101, 2017. 10.48550\/arXiv.1711.05101"},{"key":"46","unstructured":"[46] B. Zhu, Z. Jiang, X. Zhou, Z. Li, and G. Yu, \u201cClass-balanced grouping and sampling for point cloud 3D object detection,\u201d arXiv preprint arXiv:1908.09492, 2019. 10.48550\/arXiv.1908.09492"},{"key":"47","unstructured":"[47] P. Chen, S. Liu, H. Zhao, and J. Jia, \u201cGridMask data augmentation,\u201d arXiv preprint arXiv:2001.04086, 2020. 10.48550\/arXiv.2001.04086"},{"key":"48","doi-asserted-by":"crossref","unstructured":"[48] K. Yu, T. Tao, H. Xie, Z. Lin, T. Liang, B. Wang, P. Chen, D. Hao, Y. Wang, and X. Liang, \u201cBenchmarking the robustness of LiDAR-camera fusion for 3D object detection,\u201d IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.3188-3198, 2023. DOI: 10.1109\/CVPRW59228.2023.00321 10.1109\/CVPRW59228.2023.00321","DOI":"10.1109\/CVPRW59228.2023.00321"},{"key":"49","doi-asserted-by":"crossref","unstructured":"[49] Z. Li, Z. Yu, W. Wang, A. Anandkumar, T. Lu, and J.M. Alvarez, \u201cFB-BEV: BEV representation from forward-backward view transformations,\u201d IEEE\/CVF International Conference on Computer Vision, pp.6896-6905, 2023. DOI: 10.1109\/ICCV51070.2023.00637 10.1109\/ICCV51070.2023.00637","DOI":"10.1109\/ICCV51070.2023.00637"},{"key":"50","unstructured":"[50] D. Zhang, G. Wang, R. Zhu, J. Zhao, X. Chen, S. Zhang, J. Gong, Q. Zhou, W. Zhang, N. Wang, F. Tan, H. Zhou, Z. Xu, H. Yao, C. Zhang, X. Liu, X. Di, and B. Li, \u201cSparseAD: Sparse query-centric paradigm for efficient end-to-end autonomous driving,\u201c arXiv preprint arXiv:2404.06892, 2024. 10.48550\/arXiv.2404.06892"},{"key":"51","doi-asserted-by":"crossref","unstructured":"[51] Y. Chen, J. Liu, X. Zhang, X. Qi, and J. Jia, \u201cLargeKernel3D: Scaling up kernels in 3D sparse CNNs,\u201d IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.13488-13498, 2023. DOI: 10.1109\/CVPR52729.2023.01296 10.1109\/CVPR52729.2023.01296","DOI":"10.1109\/CVPR52729.2023.01296"},{"key":"52","doi-asserted-by":"crossref","unstructured":"[52] J. Yan, Y. Liu, J. Sun, F. Jia, S. Li, T. Wang, and X. Zhang, \u201cCross modal transformer: Towards fast and robust 3D object detection,\u201d IEEE\/CVF International Conference on Computer Vision, pp.18222-18232, 2023. DOI: 10.1109\/ICCV51070.2023.01675 10.1109\/ICCV51070.2023.01675","DOI":"10.1109\/ICCV51070.2023.01675"},{"key":"53","doi-asserted-by":"crossref","unstructured":"[53] Z. Chen, Z. Li, S. Zhang, L. Fang, Q. Jiang, and F. Zhao, \u201cDeformable feature aggregation for dynamic multi-modal 3D object detection,\u201d European Conference on Computer Vision, pp.628-644, Springer Nature Switzerland, Cham, 2022. DOI: 10.1007\/978-3-031-20074-8_36 10.1007\/978-3-031-20074-8_36","DOI":"10.1007\/978-3-031-20074-8_36"},{"key":"54","doi-asserted-by":"crossref","unstructured":"[54] L. Wu, D. Wang, M. Li, Y. Xiong, R. Krishnamoorthi, Q. Liu, and V. Chandra, \u201cPathFusion: Path-consistent LiDAR-camera deep feature fusion,\u201d 2024 International Conference on 3D Vision (3DV), pp.313-323, IEEE, 2024. 10.1109\/3DV62453.2024.00106","DOI":"10.1109\/3DV62453.2024.00106"}],"container-title":["IEICE Transactions on Information and Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transinf\/E108.D\/6\/E108.D_2024EDP7158\/_pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,7]],"date-time":"2025-06-07T03:42:35Z","timestamp":1749267755000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transinf\/E108.D\/6\/E108.D_2024EDP7158\/_article"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,1]]},"references-count":54,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2025]]}},"URL":"https:\/\/doi.org\/10.1587\/transinf.2024edp7158","relation":{},"ISSN":["0916-8532","1745-1361"],"issn-type":[{"value":"0916-8532","type":"print"},{"value":"1745-1361","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,6,1]]},"article-number":"2024EDP7158"}}