{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T21:49:47Z","timestamp":1774043387418,"version":"3.50.1"},"reference-count":41,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012659","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62371013"],"award-info":[{"award-number":["62371013"]}],"id":[{"id":"10.13039\/501100012659","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2026,5]]},"DOI":"10.1109\/lra.2026.3671538","type":"journal-article","created":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T20:00:15Z","timestamp":1773086415000},"page":"5358-5365","source":"Crossref","is-referenced-by-count":0,"title":["StreamCMT: Prior-Guided Multimodal Temporal Fusion for Sparse 3D Object Detection"],"prefix":"10.1109","volume":"11","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-6145-1114","authenticated-orcid":false,"given":"Yanliang","family":"Huang","sequence":"first","affiliation":[{"name":"College of Robotics, Beijing Union University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8343-6875","authenticated-orcid":false,"given":"Yuansheng","family":"Liu","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, Beijing Union University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.15961"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00466"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00327"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9341791"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-023-01790-1"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160968"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.52202\/068431-0757"},{"key":"ref8","first-page":"180","article-title":"DETR3D: 3D object detection from multi-view images via 3D-to-2D queries","volume-title":"Proc. Conf. Robot Learn.","author":"Wang","year":"2022"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00116"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01675"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19812-0_31"},{"key":"ref12","first-page":"1992","article-title":"DeepInteraction: 3D object detection via modality interaction","volume-title":"Proc. Adv. Neural. Inf. Process. Syst.","volume":"35","author":"Yang","year":"2022"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3515454"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01710"},{"key":"ref15","article-title":"BEVDet4D: Exploit temporal cues in multi-camera 3D object detection","author":"Huang","year":"2022"},{"key":"ref16","article-title":"Time will tell: New outlooks and a baseline for temporal multi-view 3d object detection","volume-title":"Proc. 11th Int. Conf. Learn. Representations","author":"Park","year":"2023"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00302"},{"key":"ref18","article-title":"Sparse4D: Multi-view 3D object detection with sparse spatial-temporal fusion","author":"Lin","year":"2022"},{"key":"ref19","article-title":"Sparse4D V2: Recurrent temporal fusion with sparse model","author":"Lin","year":"2023"},{"key":"ref20","article-title":"Sparse4D V3: Advancing end-to-end 3D detection and tracking","author":"Lin","year":"2023"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00335"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2025.3535960"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.106"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.3390\/s18103337"},{"key":"ref27","article-title":"Decoupled weight decay regularization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Loshchilov","year":"2019"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2017.58"},{"key":"ref29","article-title":"Class-balanced grouping and sampling for point cloud 3D object detection","author":"Zhu","year":"2019"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01325"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01703"},{"key":"ref32","article-title":"RoPETR: Improving temporal camera-only 3D detection by integrating enhanced rotary position embedding","author":"Ji","year":"2025"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01161"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01296"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00535"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00771"},{"key":"ref37","first-page":"16494","article-title":"Multimodal virtual point 3D detection","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Yin","year":"2021"},{"key":"ref38","article-title":"BEVFusion4D: Learning LiDAR-camera fusion under Birds-Eye-View via cross-modality guidance and temporal aggregation","author":"Cai","year":"2023"},{"key":"ref39","article-title":"FusionFormer: A multi-sensory fusion in Birds-Eye-View and temporal consistent transformer for 3D object detection","author":"Hu","year":"2023"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2025.3609348"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/IROS60139.2025.11246316"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7083369\/11435997\/11425771.pdf?arnumber=11425771","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T20:00:29Z","timestamp":1774036829000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11425771\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5]]},"references-count":41,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/lra.2026.3671538","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"value":"2377-3766","type":"electronic"},{"value":"2377-3774","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,5]]}}}