{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T03:28:27Z","timestamp":1777865307177,"version":"3.51.4"},"reference-count":59,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62125603,62321005,62336004"],"award-info":[{"award-number":["62125603,62321005,62336004"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004826","name":"Beijing Natural Science Foundation","doi-asserted-by":"publisher","award":["L247009"],"award-info":[{"award-number":["L247009"]}],"id":[{"id":"10.13039\/501100004826","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iccv51701.2025.02446","type":"proceedings-article","created":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T19:45:49Z","timestamp":1777491949000},"page":"26360-26370","source":"Crossref","is-referenced-by-count":0,"title":["EmbodiedOcc: Embodied 3D Occupancy Prediction for Vision-Based Online Scene Understanding"],"prefix":"10.1109","author":[{"given":"Yuqi","family":"Wu","sequence":"first","affiliation":[{"name":"Tsinghua University,Department of Automation,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenzhao","family":"Zheng","sequence":"additional","affiliation":[{"name":"Tsinghua University,Department of Automation,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sicheng","family":"Zuo","sequence":"additional","affiliation":[{"name":"Tsinghua University,Department of Automation,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuanhui","family":"Huang","sequence":"additional","affiliation":[{"name":"Tsinghua University,Department of Automation,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jie","family":"Zhou","sequence":"additional","affiliation":[{"name":"Tsinghua University,Department of Automation,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiwen","family":"Lu","sequence":"additional","affiliation":[{"name":"Tsinghua University,Department of Automation,China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00039"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00396"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.261"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00481"},{"key":"ref5","article-title":"Compact 3d gaussian splatting for dense visual slam","author":"Deng","year":"2024","journal-title":"arXiv preprint"},{"key":"ref6","article-title":"Deeper into self-supervised monocular indoor depth estimation","author":"Fan","year":"2023","journal-title":"arXiv preprint"},{"key":"ref7","article-title":"Gaussianflow: Splatting gaussian dynamics for 4d content creation","author":"Gao","year":"2024","journal-title":"arXiv preprint"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2019.00055"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00961"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73650-6_24"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00890"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01885"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73383-3_22"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/icpr56361.2022.9956561"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3592433"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i4.32459"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01545"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00788"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/lra.2019.2953639"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72751-1_10"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00877"},{"key":"ref22","article-title":"Fb-occ: 3d occupancy prediction based on forward-backward view transformation","author":"Li","year":"2023","journal-title":"arXiv preprint"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.324"},{"key":"ref24","article-title":"Infusion: Inpainting 3d gaussians via learning depth completion from diffusion prior","author":"Liu","year":"2024","journal-title":"arXiv preprint"},{"key":"ref25","article-title":"Sgdr: Stochastic gradient descent with warm restarts","author":"Loshchilov","year":"2016","journal-title":"arXiv preprint"},{"key":"ref26","article-title":"Decoupled weight decay regularization","author":"Loshchilov","year":"2017","journal-title":"arXiv preprint"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00850"},{"key":"ref28","article-title":"Mmscan: A multi-modal 3d scene dataset with hierarchical grounded language annotations","author":"Lyu","year":"2024","journal-title":"arXiv preprint"},{"key":"ref29","article-title":"Gsedit: Efficient text-guided editing of 3d objects via gaussian splatting","author":"Palandra","year":"2024","journal-title":"arXiv preprint"},{"key":"ref30","first-page":"652","article-title":"Pointnet: Deep learning on point sets for 3d classification and segmentation","volume":"1","author":"Qi","year":"2017","journal-title":"CVPR"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00937"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00566"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3095302"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20080-9_28"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33715-4_54"},{"key":"ref36","article-title":"Contrastive gaussian clustering: Weakly supervised 3d scene segmentation","author":"Silva","year":"2024","journal-title":"arXiv preprint"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.28"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01954"},{"key":"ref39","first-page":"6105","article-title":"Efficientnet: Rethinking model scaling for convolutional neural networks","author":"Tan","year":"2019","journal-title":"In ICML"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00772"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00273"},{"key":"ref42","first-page":"29975","article-title":"Cagroup3d: Class-aware grouping for 3d object detection on point clouds","volume":"35","author":"Wang","year":"2022","journal-title":"NeurIPS"},{"key":"ref43","article-title":"Occsora: 4d occupancy generation models as world simulators for autonomous driving","author":"Wang","year":"2024","journal-title":"arXiv preprint"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01868"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01636"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01986"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/3DV50981.2020.00090"},{"key":"ref48","article-title":"Bridging 3d gaussian and mesh for freeview video rendering","author":"Xiao","year":"2024","journal-title":"arXiv preprint"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02041"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01853"},{"key":"ref51","article-title":"Depth anything v2","author":"Yang","year":"2024","journal-title":"arXiv preprint"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i9.33010"},{"key":"ref53","article-title":"4d gaussian splatting: Modeling dynamic scenes with native 4 d primitives","author":"Yang","year":"2024","journal-title":"arXiv preprint"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00867"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00407"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73404-5_3"},{"key":"ref57","article-title":"Gaussian-slam: Photo-realistic dense slam with gaussian splatting","author":"Yugay","year":"2023","journal-title":"arXiv preprint"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72624-8_4"},{"key":"ref59","article-title":"Pointocc: Cylindrical tri-perspective view for point-based 3d semantic occupancy prediction","author":"Zuo","year":"2023","journal-title":"arXiv preprint"}],"event":{"name":"2025 IEEE\/CVF International Conference on Computer Vision (ICCV)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,10,19]]},"end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/CVF International Conference on Computer Vision (ICCV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11443115\/11443287\/11445188.pdf?arnumber=11445188","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T06:14:14Z","timestamp":1777529654000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11445188\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":59,"URL":"https:\/\/doi.org\/10.1109\/iccv51701.2025.02446","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}