{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T03:19:04Z","timestamp":1785554344415,"version":"3.56.0"},"reference-count":77,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National High Technology Research and Development Program of China","award":["2018YFE0204300"],"award-info":[{"award-number":["2018YFE0204300"]}]},{"name":"National High Technology Research and Development Program of China","award":["2022YFB2503003"],"award-info":[{"award-number":["2022YFB2503003"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62273198"],"award-info":[{"award-number":["62273198"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["52072215"],"award-info":[{"award-number":["52072215"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Beijing Natural Science Foundation Program of China","award":["L241017"],"award-info":[{"award-number":["L241017"]}]},{"name":"Research Internship Program of Alibaba Group"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1109\/tpami.2025.3549711","type":"journal-article","created":{"date-parts":[[2025,3,11]],"date-time":"2025-03-11T17:37:06Z","timestamp":1741714626000},"page":"5094-5111","source":"Crossref","is-referenced-by-count":33,"title":["BEVHeight++: Toward Robust Visual Centric 3D Object Detection"],"prefix":"10.1109","volume":"47","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1800-6892","authenticated-orcid":false,"given":"Lei","family":"Yang","sequence":"first","affiliation":[{"name":"School of Vehicle and Mobility, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8526-220X","authenticated-orcid":false,"given":"Tao","family":"Tang","sequence":"additional","affiliation":[{"name":"Shenzhen Campus, Sun Yat-sen University, Guangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0437-5112","authenticated-orcid":false,"given":"Jun","family":"Li","sequence":"additional","affiliation":[{"name":"School of Vehicle and Mobility, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8394-8187","authenticated-orcid":false,"given":"Kun","family":"Yuan","sequence":"additional","affiliation":[{"name":"Center for Machine Learning Research, Peking University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Kai","family":"Wu","sequence":"additional","affiliation":[{"name":"Tencent YouTu Lab, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Peng","family":"Chen","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9325-2391","authenticated-orcid":false,"given":"Li","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Vehicle and Mobility, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1513-8443","authenticated-orcid":false,"given":"Yi","family":"Huang","sequence":"additional","affiliation":[{"name":"School of Vehicle and Mobility, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5650-4513","authenticated-orcid":false,"given":"Lei","family":"Li","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0034-9037","authenticated-orcid":false,"given":"Xinyu","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Vehicle and Mobility, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0186-3399","authenticated-orcid":false,"given":"Kaicheng","family":"Yu","sequence":"additional","affiliation":[{"name":"Westlake University, Hangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","first-page":"34493","article-title":"Vehicle-infrastructure cooperative 3D object detection via feature flow prediction","volume":"36","author":"Yu","year":"2023","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2024.3374168"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02067"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02065"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02070"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i2.25233"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00105"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3179507"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00252"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3515454"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3025077"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3286409"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3228806"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3270728"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2024.3439557"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2024.3431671"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3264658"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73347-5_20"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/IV51971.2022.9827401"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/IV55152.2023.10186723"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1093\/nsr\/nwad121"},{"key":"ref24","first-page":"180","article-title":"Detr3D: 3D object detection from multi-view images via 3D-to-2D queries","volume-title":"Proc. Conf. Robot Learn.","author":"Wang","year":"2022"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19812-0_31"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00302"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i1.25185"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58568-6_12"},{"key":"ref29","article-title":"Bevdet: High-performance multi-camera 3D object detection in bird-eye-view","author":"Huang","year":"2021"},{"key":"ref30","article-title":"Sts: Surround-view temporal stereo for multi-view 3D detection","author":"Wang","year":"2022"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i2.25234"},{"key":"ref32","article-title":"Bevdet4D: Exploit temporal cues in multi-camera 3D object detection","author":"Huang","year":"2022"},{"key":"ref33","first-page":"1","article-title":"Time will tell: New outlooks and a baseline for temporal multi-view 3D object detection","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Park","year":"2023"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00335"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00953"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00214"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00208"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"ref40","first-page":"1","article-title":"3D object proposals for accurate object class detection","volume":"28","author":"Chen","year":"2015","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20077-9_39"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01392"},{"key":"ref43","first-page":"1","article-title":"Class-balanced grouping and sampling for point cloud 3D object detection","volume-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit. Workshops","author":"Zhu","year":"2019"},{"key":"ref44","first-page":"1","article-title":"Decoupled weight decay regularization","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Loshchilov","year":"2019"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00133"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01298"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.3390\/s18103337"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794195"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00938"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58592-1_9"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00469"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00330"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3414835"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00107"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01161"},{"key":"ref56","article-title":"Detr4D: Direct multi-view 3D object detection with sparse attention","author":"Luo","year":"2022"},{"key":"ref57","first-page":"18442","article-title":"Unifying voxel-based representation with transformer for 3D object detection","volume":"35","author":"Li","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref58","first-page":"1475","article-title":"Probabilistic and geometric depth: Detecting objects in perspective","volume-title":"Proc. Conf. Robot Learn.","author":"Wang","year":"2022"},{"key":"ref59","first-page":"11703","article-title":"Monouni: A unified vehicle and infrastructure-side monocular 3D object detection network with sufficient depth clues","volume":"36","author":"Jinrang","year":"2023","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00845"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00886"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00310"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2023.110517"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01211"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00377"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3074363"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3136899"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00313"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3191849"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00398"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00115"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00840"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00976"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01264"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00845"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01489"},{"key":"ref77","article-title":"SGV3D: Towards scenario generalization for vision-based roadside 3D object detection","author":"Yang","year":"2024"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/34\/10990047\/10919014.pdf?arnumber=10919014","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,8]],"date-time":"2025-05-08T04:26:35Z","timestamp":1746678395000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10919014\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6]]},"references-count":77,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2025.3549711","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,6]]}}}