{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,30]],"date-time":"2025-12-30T08:51:59Z","timestamp":1767084719122,"version":"3.37.3"},"reference-count":51,"publisher":"Springer Science and Business Media LLC","issue":"19","license":[{"start":{"date-parts":[[2024,7,16]],"date-time":"2024-07-16T00:00:00Z","timestamp":1721088000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,7,16]],"date-time":"2024-07-16T00:00:00Z","timestamp":1721088000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2024,10]]},"DOI":"10.1007\/s10489-024-05627-3","type":"journal-article","created":{"date-parts":[[2024,7,16]],"date-time":"2024-07-16T10:04:50Z","timestamp":1721124290000},"page":"9412-9428","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["DMFusion: LiDAR-camera fusion framework with depth merging and temporal aggregation"],"prefix":"10.1007","volume":"54","author":[{"given":"Xinyi","family":"Yu","sequence":"first","affiliation":[]},{"given":"Ke","family":"Lu","sequence":"additional","affiliation":[]},{"given":"Yang","family":"Yang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6916-974X","authenticated-orcid":false,"given":"Linlin","family":"Ou","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,7,16]]},"reference":[{"key":"5627_CR1","doi-asserted-by":"crossref","unstructured":"Chen X, Zhang T, Wang Y, Wang Y, Zhao H (2023) Futr3d: A unified sensor fusion framework for 3d detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 172\u2013181","DOI":"10.1109\/CVPRW59228.2023.00022"},{"key":"5627_CR2","doi-asserted-by":"crossref","unstructured":"Li Y, Yu AW, Meng T, Caine B, Ngiam J, Peng D, Shen J, Lu Y, Zhou D, Le QV et al (2022) Deepfusion: Lidar-camera deep fusion for multi-modal 3d object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 17182\u201317191","DOI":"10.1109\/CVPR52688.2022.01667"},{"key":"5627_CR3","doi-asserted-by":"crossref","unstructured":"Bai X, Hu Z, Zhu X, Huang Q, Chen Y, Fu H, Tai CL (2022) Transfusion: Robust lidar-camera fusion for 3d object detection with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1090\u20131099","DOI":"10.1109\/CVPR52688.2022.00116"},{"key":"5627_CR4","first-page":"10421","volume":"35","author":"T Liang","year":"2022","unstructured":"Liang T, Xie H, Yu K, Xia Z, Lin Z, Wang Y, Tang T, Wang B, Tang Z (2022) Bevfusion: A simple and robust lidar-camera fusion framework. Adv Neural Inf Process Syst 35:10421\u201310434","journal-title":"Adv Neural Inf Process Syst"},{"key":"5627_CR5","doi-asserted-by":"crossref","unstructured":"Liu Z, Tang H, Amini A, Yang X, Mao H, Rus DL, Han S (2023) Bevfusion: Multi-task multi-sensor fusion with unified bird\u2019s-eye view representation. In: 2023 IEEE International Conference on Robotics and Automation (ICRA), pp. 2774\u20132781. IEEE","DOI":"10.1109\/ICRA48891.2023.10160968"},{"key":"5627_CR6","first-page":"18442","volume":"35","author":"Y Li","year":"2022","unstructured":"Li Y, Chen Y, Qi X, Li Z, Sun J, Jia J (2022) Unifying voxel-based representation with transformer for 3d object detection. Adv Neural Inf Process Syst 35:18442\u201318455","journal-title":"Adv Neural Inf Process Syst"},{"key":"5627_CR7","doi-asserted-by":"publisher","unstructured":"Philion J, Fidler S (2020) Lift, Splat, Shoot: Encoding Images From Arbitrary Camera Rigs by Implicitly Unprojecting to 3D, pp. 194\u2013210. https:\/\/doi.org\/10.1007\/978-3-030-58568-6_12","DOI":"10.1007\/978-3-030-58568-6_12"},{"key":"5627_CR8","doi-asserted-by":"crossref","unstructured":"Zhou S, Liu W, Hu C, Zhou S, Ma C (2023) Unidistill: A universal cross-modality knowledge distillation framework for 3d object detection in bird\u2019s-eye view. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5116\u20135125","DOI":"10.1109\/CVPR52729.2023.00495"},{"key":"5627_CR9","unstructured":"Cai H, Zhang Z, Zhou Z, Li Z, Ding W, Zhao J (2023) Bevfusion4d: Learning lidar-camera fusion under bird\u2019s-eye-view via cross-modality guidance and temporal aggregation. arXiv:2303.17099"},{"key":"5627_CR10","doi-asserted-by":"publisher","first-page":"1477","DOI":"10.1609\/aaai.v37i2.25233","volume":"37","author":"Y Li","year":"2023","unstructured":"Li Y, Ge Z, Yu G, Yang J, Wang Z, Shi Y, Sun J, Li Z (2023) Bevdepth: Acquisition of reliable depth for multi-view 3d object detection. Proceedings of the AAAI Conference on Artificial Intelligence 37:1477\u20131485","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"5627_CR11","doi-asserted-by":"crossref","unstructured":"Zeng Y, Zhang D, Wang C, Miao Z, Liu T, Zhan X, Hao D, Ma C (2022) Lift: Learning 4d lidar image fusion transformer for 3d object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 17172\u201317181","DOI":"10.1109\/CVPR52688.2022.01666"},{"key":"5627_CR12","doi-asserted-by":"crossref","unstructured":"Piergiovanni A, Casser V, Ryoo MS, Angelova A (2021) 4d-net for learned multi-modal alignment. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 15435\u201315445","DOI":"10.1109\/ICCV48922.2021.01515"},{"key":"5627_CR13","doi-asserted-by":"publisher","unstructured":"Shi S, Wang X, Li H (2019) Pointrcnn: 3d object proposal generation and detection from point cloud. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). https:\/\/doi.org\/10.1109\/cvpr.2019.00086","DOI":"10.1109\/cvpr.2019.00086"},{"key":"5627_CR14","unstructured":"Qi CR, Yi L, Su H, Guibas LJ (2017) Pointnet++: Deep hierarchical feature learning on point sets in a metric space. Advances in neural information processing systems 30"},{"key":"5627_CR15","doi-asserted-by":"crossref","unstructured":"Shi W, Rajkumar R (2020) Point-gnn: Graph neural network for 3d object detection in a point cloud. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1711\u20131719","DOI":"10.1109\/CVPR42600.2020.00178"},{"key":"5627_CR16","doi-asserted-by":"publisher","unstructured":"Yan Y, Mao Y, Li B (2018) Second: Sparsely embedded convolutional detection. Sensors 3337. https:\/\/doi.org\/10.3390\/s18103337","DOI":"10.3390\/s18103337"},{"key":"5627_CR17","doi-asserted-by":"publisher","unstructured":"Lang AH, Vora S, Caesar H, Zhou L, Yang J, Beijbom O (2019) Pointpillars: Fast encoders for object detection from point clouds. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). https:\/\/doi.org\/10.1109\/cvpr.2019.01298","DOI":"10.1109\/cvpr.2019.01298"},{"key":"5627_CR18","doi-asserted-by":"crossref","unstructured":"Sheng H, Cai S, Liu Y, Deng B, Huang J, Hua XS, Zhao MJ (2021) Improving 3d object detection with channel-wise transformer. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2743\u20132752","DOI":"10.1109\/ICCV48922.2021.00274"},{"key":"5627_CR19","first-page":"34899","volume":"35","author":"Z Tian","year":"2022","unstructured":"Tian Z, Chu X, Wang X, Wei X, Shen C (2022) Fully convolutional one-stage 3d object detection on lidar range images. Adv Neural Inf Process Syst 35:34899\u201334911","journal-title":"Adv Neural Inf Process Syst"},{"key":"5627_CR20","doi-asserted-by":"publisher","unstructured":"Fan L, Xiong X, Wang F, Wang N, Zhang Z (2021) Rangedet: In defense of range view for lidar-based 3d object detection. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV). https:\/\/doi.org\/10.1109\/iccv48922.2021.00291","DOI":"10.1109\/iccv48922.2021.00291"},{"key":"5627_CR21","doi-asserted-by":"publisher","unstructured":"Meyer GP, Laddha A, Kee E, Vallespi-Gonzalez C, Wellington CK (2019) Lasernet: An efficient probabilistic 3d object detector for autonomous driving. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). https:\/\/doi.org\/10.1109\/cvpr.2019.01296","DOI":"10.1109\/cvpr.2019.01296"},{"key":"5627_CR22","doi-asserted-by":"publisher","unstructured":"Wang T, Zhu X, Pang J, Lin D (2021) Fcos3d: Fully convolutional one-stage monocular 3d object detection. In: 2021 IEEE\/CVF International Conference on Computer Vision Workshops (ICCVW). https:\/\/doi.org\/10.1109\/iccvw54120.2021.00107","DOI":"10.1109\/iccvw54120.2021.00107"},{"key":"5627_CR23","doi-asserted-by":"publisher","unstructured":"Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A, Zagoruyko S (2020) End-to-End Object Detection with Transformers, pp. 213\u2013229. https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"5627_CR24","unstructured":"Wang Y, Guizilini VC, Zhang T, Wang Y, Zhao H, Solomon J (2022) Detr3d: 3d object detection from multi-view images via 3d-to-2d queries. In: Conference on Robot Learning, pp. 180\u2013191. PMLR"},{"key":"5627_CR25","doi-asserted-by":"crossref","unstructured":"Wang YL, Zhang X, Sun J (2022) Petr: Position embedding transformation for multi-view 3d object detection. In: European Conference on Computer Vision, pp. 531\u2013548. Springer","DOI":"10.1007\/978-3-031-19812-0_31"},{"key":"5627_CR26","doi-asserted-by":"crossref","unstructured":"Liu Y, Yan J, Jia F, Gao SLA, Wang T, Zhang X (2023) Petrv2: A unified framework for 3d perception from multi-camera images. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3262\u20133272","DOI":"10.1109\/ICCV51070.2023.00302"},{"key":"5627_CR27","doi-asserted-by":"publisher","unstructured":"Reading C, Harakeh A, Chae J, Waslander SL (2021) Categorical depth distribution network for monocular 3d object detection. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). https:\/\/doi.org\/10.1109\/cvpr46437.2021.00845","DOI":"10.1109\/cvpr46437.2021.00845"},{"key":"5627_CR28","unstructured":"Roddick T, Kendall A, Cipolla R (2018) Orthographic feature transform for monocular 3d object detection. British Machine Vision Conference"},{"key":"5627_CR29","doi-asserted-by":"publisher","unstructured":"Li Z, Wang W, Li H, Xie E, Sima C, Lu T, Qiao Y, Dai J (2022) Bevformer: Learning bird\u2019s-eye-view representation from multi-camera images via spatiotemporal transformers. In: Lecture Notes in Computer Science, Computer Vision\u2013ECCV 2022, pp. 1\u201318. https:\/\/doi.org\/10.1007\/978-3-031-20077-9_1","DOI":"10.1007\/978-3-031-20077-9_1"},{"key":"5627_CR30","doi-asserted-by":"crossref","unstructured":"Yang C, Chen Y, Tian H, Tao C, Zhu X, Zhang Z, Huang G, Li H, Qiao Y, Lu L et al (2023) Bevformerv2: Adapting modern image backbones to bird\u2019s-eye-view recognition via perspective supervision. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 17830\u201317839","DOI":"10.1109\/CVPR52729.2023.01710"},{"key":"5627_CR31","doi-asserted-by":"publisher","unstructured":"Vora S, Lang AH, Helou B, Beijbom O (2020) Pointpainting: Sequential fusion for 3d object detection. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). https:\/\/doi.org\/10.1109\/cvpr42600.2020.00466","DOI":"10.1109\/cvpr42600.2020.00466"},{"key":"5627_CR32","doi-asserted-by":"crossref","unstructured":"Wang C, Ma C, Yang MZ (2021) Pointaugmenting: Cross-modal augmentation for 3d object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11794\u201311803","DOI":"10.1109\/CVPR46437.2021.01162"},{"key":"5627_CR33","doi-asserted-by":"crossref","unstructured":"Huang T, Liu Z, Chen X, Bai X (2020) Epnet: Enhancing point features with image semantics for 3d object detection. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XV 16, pp. 35\u201352. Springer","DOI":"10.1007\/978-3-030-58555-6_3"},{"key":"5627_CR34","unstructured":"Yang Z, Chen J, Miao Z, Li W, Zhu X, Zhang L (2022) Deepinteraction: 3d object detection via modality interaction. Advances in Neural Information Processing Systems 35"},{"key":"5627_CR35","unstructured":"Chen S, Wang X, Cheng T, Zhang Q, Huang C, Liu W (2022) Polar parametrization for vision-based surround-view 3d detection. arXiv:2206.10965"},{"key":"5627_CR36","doi-asserted-by":"crossref","unstructured":"Gu W, Ai R, Liu J, Fan L, Cao D, Zhang K (2022) Application of dynamic deformable attention in bird\u2019s-eye-view detection. IEEE Journal of Radio Frequency Identification 6:886\u2013890","DOI":"10.1109\/JRFID.2022.3210696"},{"key":"5627_CR37","doi-asserted-by":"crossref","unstructured":"Qin Z, Chen J, Chen C, Chen X, Li X (2023) Unifusion: Unified multi-view fusion transformer for spatial-temporal representation in bird\u2019s-eye-view, 8690\u20138699","DOI":"10.1109\/ICCV51070.2023.00798"},{"key":"5627_CR38","unstructured":"Huang J, Huang G (2022) Bevdet4d: Exploit temporal cues in multi-camera 3d object detection. arXiv:2203.17054"},{"issue":"11","key":"5627_CR39","doi-asserted-by":"publisher","first-page":"4546","DOI":"10.1109\/TIV.2023.3275993","volume":"8","author":"JL Zhiqiang Cao","year":"2023","unstructured":"Zhiqiang Cao JL, Yang J, Liu X, Yang Y, Qu Z (2023) Bird\u2019s-eye-view semantic segmentation with two-stream compact depth transformation and feature rectification. IEEE Transactions on Intelligent Vehicles 8(11):4546\u20134558. https:\/\/doi.org\/10.1109\/TIV.2023.3275993","journal-title":"IEEE Transactions on Intelligent Vehicles"},{"key":"5627_CR40","doi-asserted-by":"publisher","first-page":"1486","DOI":"10.1609\/aaai.v37i2.25234","volume":"37","author":"Y Li","year":"2023","unstructured":"Li Y, Bao H, Ge Z, Yang J, Sun J, Li Z (2023) Bevstereo: Enhancing depth estimation in multi-view 3d object detection with temporal stereo. Proceedings of the AAAI Conference on Artificial Intelligence 37:1486\u20131494","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"issue":"4","key":"5627_CR41","doi-asserted-by":"publisher","first-page":"10478","DOI":"10.1109\/LRA.2022.3191849","volume":"7","author":"Z Zhou","year":"2022","unstructured":"Zhou Z, Du L, Ye X, Zou Z, Tan X, Zhang L, Xue X, Feng J (2022) Sgm3d: Stereo guided monocular 3d object detection. IEEE Robotics and Automation Letters 7(4):10478\u201310485","journal-title":"IEEE Robotics and Automation Letters"},{"key":"5627_CR42","doi-asserted-by":"publisher","unstructured":"Yin T, Zhou X, Philipp K (2021) Center-based 3d object detection and tracking. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). https:\/\/doi.org\/10.1109\/cvpr46437.2021.01161","DOI":"10.1109\/cvpr46437.2021.01161"},{"key":"5627_CR43","doi-asserted-by":"crossref","unstructured":"Zhou Y, Tuzel O (2018) Voxelnet: End-to-end learning for point cloud based 3d object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4490\u20134499","DOI":"10.1109\/CVPR.2018.00472"},{"key":"5627_CR44","doi-asserted-by":"crossref","unstructured":"Koh J, Lee J, Lee Y, Kim J, Choi JW (2023) Mgtanet: Encoding sequential lidar points using long short-term motion-guided temporal attention for 3d object detection. Proceedings of the AAAI Conference on Artificial Intelligence 37:1179\u20131187","DOI":"10.1609\/aaai.v37i1.25200"},{"key":"5627_CR45","doi-asserted-by":"crossref","unstructured":"Lin TY, Goyal P, Girshick R, He K, Doll\u00e1r P (2017) Focal loss for dense object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2980\u20132988","DOI":"10.1109\/ICCV.2017.324"},{"key":"5627_CR46","doi-asserted-by":"publisher","unstructured":"Caesar H, Bankiti V, Lang AH, Vora S, Liong VE, Xu Q, Krishnan A, Pan Y, Baldan G, Beijbom O (2020) nuscenes: A multimodal dataset for autonomous driving. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). https:\/\/doi.org\/10.1109\/cvpr42600.2020.01164","DOI":"10.1109\/cvpr42600.2020.01164"},{"key":"5627_CR47","unstructured":"Paszke A, Gross S, Massa F, Lerer A, Bradbury J, Chanan G, Killeen T, Lin Z, Gimelshein N, Antiga L al (2019) Pytorch: An imperative style, high-performance deep learning library"},{"key":"5627_CR48","unstructured":"Contributors M (2020) MMDetection3D: OpenMMLab next-generation platform for general 3D object detection. https:\/\/github.com\/open-mmlab\/mmdetection3d"},{"key":"5627_CR49","doi-asserted-by":"crossref","unstructured":"Liang T, Chu X, Liu Y, Wang Y, Tang Z, Chu W, Chen J, HaibinLing (2022) Cbnet: A composite backbone network architecture for object detection. IEEE Trans Image Process 31:6893\u20136906","DOI":"10.1109\/TIP.2022.3216771"},{"key":"5627_CR50","doi-asserted-by":"crossref","unstructured":"Chen Z, Li Z, Zhang S, Fang L, Jiang Q, Zhao F (2022) Autoalignv2: Deformable feature aggregation for dynamic multi-modal 3d object detection. arXiv:2207.10316","DOI":"10.1007\/978-3-031-20074-8_36"},{"key":"5627_CR51","doi-asserted-by":"publisher","unstructured":"Xu S, Zhou D, Fang J, Yin J, Bin Z, Zhang L (2021) Fusionpainting: Multimodal fusion with adaptive attention for 3d object detection. In: 2021 IEEE International Intelligent Transportation Systems Conference (ITSC) . https:\/\/doi.org\/10.1109\/itsc48978.2021.9564951","DOI":"10.1109\/itsc48978.2021.9564951"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-024-05627-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-024-05627-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-024-05627-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,15]],"date-time":"2024-08-15T13:18:56Z","timestamp":1723727936000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-024-05627-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,16]]},"references-count":51,"journal-issue":{"issue":"19","published-print":{"date-parts":[[2024,10]]}},"alternative-id":["5627"],"URL":"https:\/\/doi.org\/10.1007\/s10489-024-05627-3","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"type":"print","value":"0924-669X"},{"type":"electronic","value":"1573-7497"}],"subject":[],"published":{"date-parts":[[2024,7,16]]},"assertion":[{"value":"16 June 2024","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 July 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that we do not have any commercial or associative interest that represents a conflict of interest in connection with the work submitted. The authors declare that they have no conflicts of interest to this work.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing Interests"}},{"value":"Informed consent was obtained from all human participants involved in the study","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical statement"}}]}}