{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,28]],"date-time":"2026-02-28T01:32:13Z","timestamp":1772242333809,"version":"3.50.1"},"reference-count":72,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2025,7,3]],"date-time":"2025-07-03T00:00:00Z","timestamp":1751500800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,7,3]],"date-time":"2025-07-03T00:00:00Z","timestamp":1751500800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Complex Intell. Syst."],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1007\/s40747-025-01984-9","type":"journal-article","created":{"date-parts":[[2025,7,3]],"date-time":"2025-07-03T04:42:01Z","timestamp":1751517721000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["DMformer: a transformer with denoising and multi-modal data fusion for enhancing BEV perception"],"prefix":"10.1007","volume":"11","author":[{"given":"Xuefeng","family":"Bao","sequence":"first","affiliation":[]},{"given":"Feng","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Yunli","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Yong","family":"Li","sequence":"additional","affiliation":[]},{"given":"Rui","family":"Tian","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,3]]},"reference":[{"key":"1984_CR1","doi-asserted-by":"publisher","first-page":"2151","DOI":"10.1109\/TPAMI.2023.3333838","volume":"46","author":"H Li","year":"2023","unstructured":"Li H, Sima C, Dai J et al (2023) Delving into the devils of bird\u2019s-eye-view perception: a review, evaluation and recipe. IEEE Trans Pattern Anal Mach Intell 46:2151\u20132170","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"1984_CR2","doi-asserted-by":"publisher","first-page":"10978","DOI":"10.1109\/TPAMI.2024.3449912","volume":"46","author":"Y Ma","year":"2024","unstructured":"Ma Y, Wang T, Bai X et al (2024) Vision-centric BEV perception: a survey. IEEE Trans Pattern Anal Mach Intell 46:10978\u201310997","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"1984_CR3","doi-asserted-by":"crossref","unstructured":"Singh A (2023) Surround-view vision-based 3D detection for autonomous driving: a survey. In: Proc. IEEE\/CVF Int. Conf. Comput. Vis. Workshops (ICCVW), pp 3235\u20133244","DOI":"10.1109\/ICCVW60793.2023.00348"},{"key":"1984_CR4","doi-asserted-by":"publisher","first-page":"125103","DOI":"10.1016\/j.eswa.2024.125103","volume":"258","author":"J Zhao","year":"2024","unstructured":"Zhao J, Shi J, Zhuo L (2024) BEV perception for autonomous driving: state of the art and future perspectives. Expert Syst Appl 258:125103","journal-title":"Expert Syst Appl"},{"issue":"6","key":"1984_CR5","doi-asserted-by":"publisher","first-page":"7681","DOI":"10.1007\/s40747-024-01567-0","volume":"10","author":"P Shi","year":"2024","unstructured":"Shi P, Liu Z, Dong X et al (2024) CL-fusionBEV: 3D object detection method with camera-LiDAR fusion in Bird\u2019s Eye View. Complex Intell Syst 10(6):7681\u20137696","journal-title":"Complex Intell Syst"},{"issue":"2","key":"1984_CR6","doi-asserted-by":"publisher","first-page":"2825","DOI":"10.1007\/s40747-023-01295-x","volume":"10","author":"W Yan","year":"2024","unstructured":"Yan W, Liu S, Liu H et al (2024) Adaptive learning point cloud and image diversity feature fusion network for 3D object detection. Complex Intell Syst 10(2):2825\u20132837","journal-title":"Complex Intell Syst"},{"issue":"9","key":"1984_CR7","doi-asserted-by":"publisher","first-page":"16249","DOI":"10.1109\/TITS.2022.3149370","volume":"23","author":"O Natan","year":"2022","unstructured":"Natan O, Miura J (2022) Towards compact autonomous driving perception with balanced learning and multi-sensor fusion. IEEE Trans Intell Transp Syst 23(9):16249\u201316266","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"1984_CR8","doi-asserted-by":"crossref","unstructured":"Kim Y, Shin J, Kim S et al (2023) CRN: camera radar net for accurate, robust, efficient 3D perception. In: Proc. IEEE\/CVF Int. Conf. Comput. Vis., pp 17615\u201317626","DOI":"10.1109\/ICCV51070.2023.01615"},{"issue":"9","key":"1984_CR9","doi-asserted-by":"publisher","first-page":"10850","DOI":"10.1109\/TPAMI.2023.3261988","volume":"45","author":"FA Croitoru","year":"2023","unstructured":"Croitoru FA, Hondru V, Ionescu RT et al (2023) Diffusion models in vision: a survey. IEEE Trans Pattern Anal Mach Intell 45(9):10850\u201310869","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"4","key":"1984_CR10","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3626235","volume":"56","author":"L Yang","year":"2023","unstructured":"Yang L, Zhang Z, Song Y et al (2023) Diffusion models: a comprehensive survey of methods and applications. ACM Comput Surv 56(4):1\u201339","journal-title":"ACM Comput Surv"},{"key":"1984_CR11","unstructured":"Chen K, Liu Y (2024) Efficient image deblurring networks based on diffusion models. arXiv preprint arXiv:2401.05907"},{"key":"1984_CR12","unstructured":"Nichol AQ, Dhariwal P (2021) Improved denoising diffusion probabilistic models. In: Proc. Int. Conf. Mach. Learn. (ICML), PMLR, pp 8162\u20138171"},{"issue":"5","key":"1984_CR13","doi-asserted-by":"publisher","first-page":"2179","DOI":"10.1007\/s40747-021-00428-4","volume":"7","author":"AE Ilesanmi","year":"2021","unstructured":"Ilesanmi AE, Ilesanmi TO (2021) Methods for image denoising using convolutional neural network: a review. Complex Intell Syst 7(5):2179\u20132198","journal-title":"Complex Intell Syst"},{"issue":"1","key":"1984_CR14","doi-asserted-by":"publisher","first-page":"4272","DOI":"10.1038\/s41598-024-52370-3","volume":"14","author":"H Xiao","year":"2024","unstructured":"Xiao H, Wang X, Wang J et al (2024) Single image super-resolution with denoising diffusion GANs. Sci Rep 14(1):4272","journal-title":"Sci Rep"},{"issue":"2","key":"1984_CR15","doi-asserted-by":"publisher","first-page":"722","DOI":"10.1109\/TITS.2020.3023541","volume":"23","author":"Y Cui","year":"2021","unstructured":"Cui Y, Chen R, Chu W et al (2021) Deep learning for image and point cloud fusion in autonomous driving: a review. IEEE Trans Intell Transp Syst 23(2):722\u2013739","journal-title":"IEEE Trans Intell Transp Syst"},{"issue":"7","key":"1984_CR16","doi-asserted-by":"publisher","first-page":"3781","DOI":"10.1109\/TIV.2023.3264658","volume":"8","author":"L Wang","year":"2023","unstructured":"Wang L, Zhang X, Song Z et al (2023) Multi-modal 3D object detection in autonomous driving: a survey and taxonomy. IEEE Trans Intell Veh 8(7):3781\u20133798","journal-title":"IEEE Trans Intell Veh"},{"key":"1984_CR17","doi-asserted-by":"crossref","unstructured":"Philion J, Fidler S (2020) Lift, splat, shoot: encoding images from arbitrary camera rigs by implicitly unprojecting to 3D. In: Proc. Eur. Conf. Comput. Vis. (ECCV), Glasgow, UK, Aug. 23\u201328, pp 194\u2013210","DOI":"10.1007\/978-3-030-58568-6_12"},{"key":"1984_CR18","doi-asserted-by":"crossref","unstructured":"Fan S, Wang Z, Huo X et al (2023) Calibration-free BEV representation for infrastructure perception. In: Proc. IEEE\/RSJ Int. Conf. Intell. Robots Syst. (IROS), pp 9008\u20139013","DOI":"10.1109\/IROS55552.2023.10341916"},{"key":"1984_CR19","doi-asserted-by":"crossref","unstructured":"Yang L, Yu K, Tang T et al (2023) BEVHeight: a robust framework for vision-based roadside 3D object detection. In: Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit. (CVPR), pp 21611\u201321620","DOI":"10.1109\/CVPR52729.2023.02070"},{"key":"1984_CR20","doi-asserted-by":"crossref","unstructured":"Li Z, Wang W, Li H et al (2022) BEVFormer: learning bird\u2019s-eye-view representation from multi-camera images via spatiotemporal transformers. In: Proc. Eur. Conf. Comput. Vis. (ECCV), Cham, Switzerland, pp 1\u201318","DOI":"10.1007\/978-3-031-20077-9_1"},{"key":"1984_CR21","doi-asserted-by":"publisher","first-page":"150574","DOI":"10.1109\/ACCESS.2024.3453944","volume":"12","author":"H Bilal","year":"2024","unstructured":"Bilal H, Aslam MS, Tian Y et al (2024) Enhancing trajectory tracking and vibration control of flexible robots with hybrid fuzzy ADRC and input shaping. IEEE Access 12:150574\u2013150591","journal-title":"IEEE Access"},{"issue":"12","key":"1984_CR22","doi-asserted-by":"publisher","first-page":"1290","DOI":"10.3390\/bioengineering11121290","volume":"11","author":"H Bilal","year":"2024","unstructured":"Bilal H, Tian Y, Ali A et al (2024) An intelligent approach for early and accurate prediction of cardiac disease using hybrid artificial intelligence techniques. Bioengineering 11(12):1290","journal-title":"Bioengineering"},{"key":"1984_CR23","doi-asserted-by":"crossref","unstructured":"Bilal H, Obaidat MS, Aslam MS et al (2024) Online fault diagnosis of industrial robot using IoRT and hybrid deep learning techniques: an experimental approach. IEEE Internet Things J 11(19):31422\u201331437","DOI":"10.1109\/JIOT.2024.3418352"},{"issue":"5","key":"1984_CR24","doi-asserted-by":"publisher","first-page":"7311","DOI":"10.1007\/s40747-024-01544-7","volume":"10","author":"MS Aslam","year":"2024","unstructured":"Aslam MS, Bilal H, Chang W et al (2024) Indirect adaptive observer control (I-AOC) design for truck\u2013trailer model based on T-S fuzzy system with unknown nonlinear function. Complex Intell Syst 10(5):7311\u20137331","journal-title":"Complex Intell Syst"},{"key":"1984_CR25","first-page":"14","volume":"1","author":"H Bilal","year":"2024","unstructured":"Bilal H, Ahmed F, Aslam MS et al (2024) A blockchain-enabled approach for privacy-protected data sharing in Internet of Robotic Things networks. Hum-Centric Comput Inf Sci 1:14","journal-title":"Hum-Centric Comput Inf Sci"},{"key":"1984_CR26","doi-asserted-by":"crossref","unstructured":"Ding Y, Wang A, Zhang L (2024) Multidimensional semantic disentanglement network for clothes-changing person re-identification. In: Proc. Int. Conf. Multimedia Retrieval (ICMR), pp 1025\u20131033","DOI":"10.1145\/3652583.3658037"},{"issue":"5","key":"1984_CR27","doi-asserted-by":"publisher","first-page":"4337","DOI":"10.1007\/s11760-024-03076-6","volume":"18","author":"Y Ding","year":"2024","unstructured":"Ding Y, Mao R, Du G et al (2024) Clothes-eraser: clothing-aware controllable disentanglement for clothes-changing person re-identification. SIViP 18(5):4337\u20134348","journal-title":"SIViP"},{"issue":"27","key":"1984_CR28","doi-asserted-by":"publisher","first-page":"69693","DOI":"10.1007\/s11042-024-18440-4","volume":"83","author":"Y Ding","year":"2024","unstructured":"Ding Y, Wu Y, Wang A et al (2024) Disentangled body features for clothing change person re-identification. Multimed Tools Appl 83(27):69693\u201369714","journal-title":"Multimed Tools Appl"},{"issue":"1","key":"1984_CR29","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s40747-024-01646-2","volume":"11","author":"Y Ding","year":"2025","unstructured":"Ding Y, Li J, Wang H et al (2025) Attention-enhanced multimodal feature fusion network for clothes-changing person re-identification. Complex Intell Syst 11(1):1\u201315","journal-title":"Complex Intell Syst"},{"key":"1984_CR30","unstructured":"Huang J, Huang G, Zhu Z et al (2021) BEVDet: high-performance multi-camera 3D object detection in bird-eye-view. arXiv preprint arXiv:2112.11790"},{"key":"1984_CR31","unstructured":"Huang J, Huang G (2022) BEVDet4D: exploit temporal cues in multi-camera 3D object detection. arXiv preprint arXiv:2203.17054"},{"key":"1984_CR32","doi-asserted-by":"crossref","unstructured":"Li Y, Ge Z, Yu G et al (2023) BEVDepth: acquisition of reliable depth for multi-view 3D object detection. In: Proc. AAAI Conf. Artif. Intell., vol 37, p 2","DOI":"10.1609\/aaai.v37i2.25233"},{"key":"1984_CR33","doi-asserted-by":"crossref","unstructured":"Zhou Y, Tuzel O (2018) VoxelNet: end-to-end learning for point cloud based 3D object detection. In: Proc. IEEE Conf. Comput. Vis. Pattern Recognit. (CVPR), pp 4490\u20134499","DOI":"10.1109\/CVPR.2018.00472"},{"issue":"10","key":"1984_CR34","doi-asserted-by":"publisher","first-page":"3337","DOI":"10.3390\/s18103337","volume":"18","author":"Y Yan","year":"2018","unstructured":"Yan Y, Mao Y, Li B (2018) SECOND: sparsely embedded convolutional detection. Sensors 18(10):3337","journal-title":"Sensors"},{"key":"1984_CR35","doi-asserted-by":"crossref","unstructured":"Lang AH, Vora S, Caesar H et al (2019) PointPillars: fast encoders for object detection from point clouds. In: Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit. (CVPR), pp 12697\u201312705","DOI":"10.1109\/CVPR.2019.01298"},{"key":"1984_CR36","doi-asserted-by":"crossref","unstructured":"Deng J, Shi S, Li P et al (2021) Voxel R-CNN: Towards high performance voxel-based 3D object detection. In: Proc. AAAI Conf. Artif. Intell., vol 35, no 2, pp 1201\u20131209","DOI":"10.1609\/aaai.v35i2.16207"},{"key":"1984_CR37","doi-asserted-by":"crossref","unstructured":"Liu Y, Wang T, Zhang X et al (2022) PETR: position embedding transformation for multi-view 3D object detection. In: Proc. Eur. Conf. Comput. Vis. (ECCV). Springer Nature, Cham, pp 531\u2013548","DOI":"10.1007\/978-3-031-19812-0_31"},{"key":"1984_CR38","doi-asserted-by":"crossref","unstructured":"Huang KC, Wu TH, Su HT et al (2022) MonoDTR: monocular 3D object detection with depth-aware transformer. In: Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit. (CVPR), pp 4012\u20134021","DOI":"10.1109\/CVPR52688.2022.00398"},{"key":"1984_CR39","doi-asserted-by":"crossref","unstructured":"Zhou B, Kr\u00e4henb\u00fchl P (2022) Cross-view transformers for real-time map-view semantic segmentation. In: Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit. (CVPR), pp 13760\u201313769","DOI":"10.1109\/CVPR52688.2022.01339"},{"key":"1984_CR40","doi-asserted-by":"crossref","unstructured":"Gong S, Ye X, Tan X et al (2022) GITNet: geometric prior-based transformation for bird\u2019s-eye-view segmentation. In: Proc. Eur. Conf. Comput. Vis. (ECCV). Springer Nature, Cham, pp 396\u2013411","DOI":"10.1007\/978-3-031-19769-7_23"},{"key":"1984_CR41","doi-asserted-by":"crossref","unstructured":"Shi P, Dong X, Ge R et al (2025) Dp-M3D: monocular 3D object detection algorithm with depth perception capability. Knowl-Based Syst, art. 113539","DOI":"10.1016\/j.knosys.2025.113539"},{"key":"1984_CR42","doi-asserted-by":"crossref","unstructured":"Dong X, Shi P, Liang T et al (2024) CTAFFNet: CNN-transformer adaptive feature fusion object detection algorithm for complex traffic scenarios. Transp Res Rec, art. 03611981241258753","DOI":"10.1177\/03611981241258753"},{"key":"1984_CR43","doi-asserted-by":"crossref","unstructured":"Dong X, Shi P, Qi H et al (2024) TS-BEV: BEV object detection algorithm based on temporal-spatial feature fusion. Displays 84, art. 102814","DOI":"10.1016\/j.displa.2024.102814"},{"key":"1984_CR44","doi-asserted-by":"crossref","unstructured":"Sindagi VA, Zhou Y, Tuzel O (2019) MVX-Net: multimodal VoxelNet for 3D object detection. In: Proc. Int. Conf. Robot. Autom. (ICRA), pp 7276\u20137282","DOI":"10.1109\/ICRA.2019.8794195"},{"key":"1984_CR45","doi-asserted-by":"crossref","unstructured":"Vora S, Lang AH, Helou B et al (2020) PointPainting: sequential fusion for 3D object detection. In: Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit. (CVPR), pp 4604\u20134612","DOI":"10.1109\/CVPR42600.2020.00466"},{"key":"1984_CR46","doi-asserted-by":"crossref","unstructured":"Yoo JH, Kim Y, Kim J et al (2020) 3D-CVF: generating joint camera and LiDAR features using cross-view spatial feature fusion for 3D object detection. In: Proc. Eur. Conf. Comput. Vis. (ECCV), Glasgow, UK, Aug. 23\u201328, pp 720\u2013736","DOI":"10.1007\/978-3-030-58583-9_43"},{"key":"1984_CR47","doi-asserted-by":"crossref","unstructured":"Bai X, Hu Z, Zhu X et al (2022) TransFusion: robust LiDAR-camera fusion for 3D object detection with transformers. In: Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit. (CVPR), pp 1090\u20131099","DOI":"10.1109\/CVPR52688.2022.00116"},{"key":"1984_CR48","doi-asserted-by":"crossref","unstructured":"Liu Z, Tang H, Amini A et al (2023) BEVFusion: multi-task multi-sensor fusion with unified bird\u2019s-eye view representation. In: Proc. IEEE Int. Conf. Robot. Autom. (ICRA), pp 2774\u20132781","DOI":"10.1109\/ICRA48891.2023.10160968"},{"issue":"6","key":"1984_CR49","doi-asserted-by":"publisher","first-page":"1195","DOI":"10.1093\/logcom\/exac018","volume":"32","author":"LA Dennis","year":"2022","unstructured":"Dennis LA, Fu Y, Slavkovik M (2022) Markov chain model representation of information diffusion in social networks. J Log Comput 32(6):1195\u20131211","journal-title":"J Log Comput"},{"key":"1984_CR50","unstructured":"Park SW, Lee K, Kwon J (2021) Neural Markov controlled SDE: Stochastic optimization for continuous-time data. In: Proc. Int. Conf. Learn. Represent. (ICLR)"},{"key":"1984_CR51","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho J, Jain A, Abbeel P (2020) Denoising diffusion probabilistic models. Adv Neural Inf Process Syst 33:6840\u20136851","journal-title":"Adv Neural Inf Process Syst"},{"key":"1984_CR52","unstructured":"Song Y, Ermon S (2019) Generative modeling by estimating gradients of the data distribution. In: Advances in neural information processing systems, vol 32"},{"issue":"11","key":"1984_CR53","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1145\/3422622","volume":"63","author":"I Goodfellow","year":"2020","unstructured":"Goodfellow I, Pouget-Abadie J, Mirza M et al (2020) Generative adversarial networks. Commun ACM 63(11):139\u2013144","journal-title":"Commun ACM"},{"key":"1984_CR54","unstructured":"Rezende D, Mohamed S (2015) Variational inference with normalizing flows. In: Proc. Int. Conf. Mach. Learn. (ICML), PMLR, pp 1530\u20131538"},{"key":"1984_CR55","unstructured":"Vaswani A (2017) Attention is all you need. In: Advances in neural information processing systems, vol 30"},{"key":"1984_CR56","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S et al (2016) Deep residual learning for image recognition. In: Proc. IEEE Conf. Comput. Vis. Pattern Recognit. (CVPR), pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"1984_CR57","doi-asserted-by":"crossref","unstructured":"Lin TY, Doll\u00e1r P, Girshick R et al (2017) Feature pyramid networks for object detection. In: Proc. IEEE Conf. Comput. Vis. Pattern Recognit. (CVPR), pp 2117\u20132125","DOI":"10.1109\/CVPR.2017.106"},{"key":"1984_CR58","doi-asserted-by":"crossref","unstructured":"Ronneberger O, Fischer P, Brox T (2015) U-Net: convolutional networks for biomedical image segmentation. In: Proc. Med. Image Comput. Comput.-Assist. Intervent. (MICCAI), Munich, Germany, Oct. 5\u20139, pp 234\u2013241","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"1984_CR59","doi-asserted-by":"crossref","unstructured":"Sun D, Yang X, Liu MY et al (2018) PWC-Net: CNNs for optical flow using pyramid, warping, and cost volume. In: Proc. IEEE Conf. Comput. Vis. Pattern Recognit. (CVPR), pp 8934\u20138943","DOI":"10.1109\/CVPR.2018.00931"},{"key":"1984_CR60","doi-asserted-by":"crossref","unstructured":"Caesar H, Bankiti V, Lang AH et al (2020) nuScenes: a multimodal dataset for autonomous driving. In: Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit. (CVPR), pp 11621\u201311631","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"1984_CR61","doi-asserted-by":"crossref","unstructured":"Yin T, Zhou X, Kr\u00e4henb\u00fchl P (2021) Center-based 3D object detection and tracking. In: Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit. (CVPR), pp 11779\u201311788","DOI":"10.1109\/CVPR46437.2021.01161"},{"key":"1984_CR62","doi-asserted-by":"crossref","unstructured":"Li Y, Bao H, Ge Z et al (2023) BEVStereo: enhancing depth estimation in multi-view 3D object detection with temporal stereo. In: Proc. AAAI Conf. Artif. Intell., vol 37, no 2, pp 1486\u20131494","DOI":"10.1609\/aaai.v37i2.25234"},{"key":"1984_CR63","first-page":"1","volume":"1","author":"Y Li","year":"2024","unstructured":"Li Y, Huang B, Chen Z et al (2024) Fast-BEV: a fast and strong bird\u2019s-eye view perception baseline. IEEE Trans Pattern Anal Mach Intell 1:1\u201314","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"1984_CR64","doi-asserted-by":"crossref","unstructured":"Liu H, Teng Y, Lu T et al (2023) SparseBEV: high-performance sparse 3D object detection from multi-camera videos. In: Proc. IEEE\/CVF Int. Conf. Comput. Vis. (ICCV), pp 18580\u201318590","DOI":"10.1109\/ICCV51070.2023.01703"},{"key":"1984_CR65","unstructured":"Yang L, Tang T, Li J et al (2023) BEVHeight++: toward robust visual centric 3D object detection. arXiv preprint arXiv:2309.16179"},{"key":"1984_CR66","doi-asserted-by":"crossref","unstructured":"Jiang Y, Zhang L, Miao Z et al (2023) PolarFormer: multi-camera 3D object detection with polar transformer. In: Proc. AAAI Conf. Artif. Intell., vol 37, no 1, pp 1042\u20131050","DOI":"10.1609\/aaai.v37i1.25185"},{"key":"1984_CR67","doi-asserted-by":"crossref","unstructured":"Monninger T, Dokkadi V, Anwar MZ et al (2024) TempBEV: improving learned BEV encoders with combined image and BEV space temporal aggregation. arXiv preprint arXiv:2404.11803","DOI":"10.1109\/IROS58592.2024.10801646"},{"key":"1984_CR68","doi-asserted-by":"crossref","unstructured":"Chen Z, Li Z, Zhang S et al (2022) AutoAlign: pixel-instance feature aggregation for multi-modal 3D object detection. arXiv preprint arXiv:2201.06493","DOI":"10.24963\/ijcai.2022\/116"},{"key":"1984_CR69","doi-asserted-by":"crossref","unstructured":"Yu H, Luo Y, Shu M et al (2022) DAIR-V2X: a large-scale dataset for vehicle-infrastructure cooperative 3D object detection. In: Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit. (CVPR), pp 21361\u201321370","DOI":"10.1109\/CVPR52688.2022.02067"},{"issue":"1","key":"1984_CR70","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s11760-024-03598-z","volume":"19","author":"P Shi","year":"2025","unstructured":"Shi P, Pan Y, Yang A (2025) SS-BEV: multi-camera BEV object detection based on multi-scale spatial structure understanding. SIViP 19(1):1\u201313","journal-title":"SIViP"},{"key":"1984_CR71","doi-asserted-by":"crossref","unstructured":"Yin J, Shen J, Chen R et al (2024) IS-Fusion: instance-scene collaborative fusion for multimodal 3D object detection. In: Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit. (CVPR), pp 14905\u201314915","DOI":"10.1109\/CVPR52733.2024.01412"},{"key":"1984_CR72","doi-asserted-by":"crossref","unstructured":"Li Z, Zhao X, Bian J et al (2025) V-Fusion: 2D detection-enhanced multimodal 3D BEV object detection. In: Proc. IEEE Int. Conf. Acoust., Speech Signal Process. (ICASSP), pp 1\u20135","DOI":"10.1109\/ICASSP49660.2025.10889489"}],"container-title":["Complex &amp; Intelligent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-025-01984-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s40747-025-01984-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-025-01984-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,7]],"date-time":"2025-09-07T01:30:54Z","timestamp":1757208654000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s40747-025-01984-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,3]]},"references-count":72,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2025,8]]}},"alternative-id":["1984"],"URL":"https:\/\/doi.org\/10.1007\/s40747-025-01984-9","relation":{},"ISSN":["2199-4536","2198-6053"],"issn-type":[{"value":"2199-4536","type":"print"},{"value":"2198-6053","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,7,3]]},"assertion":[{"value":"21 March 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 May 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 July 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"368"}}