{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T10:18:58Z","timestamp":1777889938812,"version":"3.51.4"},"reference-count":57,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2023YFB3209800"],"award-info":[{"award-number":["2023YFB3209800"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62301484"],"award-info":[{"award-number":["62301484"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100007834","name":"Ningbo Natural Science Foundation of China","doi-asserted-by":"publisher","award":["2024J454"],"award-info":[{"award-number":["2024J454"]}],"id":[{"id":"10.13039\/100007834","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004750","name":"Aeronautical Science Foundation of China","doi-asserted-by":"publisher","award":["2024M071076001"],"award-info":[{"award-number":["2024M071076001"]}],"id":[{"id":"10.13039\/501100004750","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iccv51701.2025.00708","type":"proceedings-article","created":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T19:45:49Z","timestamp":1777491949000},"page":"7548-7558","source":"Crossref","is-referenced-by-count":0,"title":["Language Driven Occupancy Prediction"],"prefix":"10.1109","author":[{"given":"Zhu","family":"Yu","sequence":"first","affiliation":[{"name":"Zhejiang University"}]},{"given":"Bowen","family":"Pang","sequence":"additional","affiliation":[{"name":"Zhejiang University"}]},{"given":"Lizhe","family":"Liu","sequence":"additional","affiliation":[{"name":"CaiNiao Inc., Alibaba Group,Unmanned Vehicle Dept."}]},{"given":"Runmin","family":"Zhang","sequence":"additional","affiliation":[{"name":"Zhejiang University"}]},{"given":"Qiang","family":"Li","sequence":"additional","affiliation":[{"name":"CaiNiao Inc., Alibaba Group,Unmanned Vehicle Dept."}]},{"given":"Si-Yuan","family":"Cao","sequence":"additional","affiliation":[{"name":"Zhejiang University,Ningbo Global Innovation Center"}]},{"given":"Maochun","family":"Luo","sequence":"additional","affiliation":[{"name":"CaiNiao Inc., Alibaba Group,Unmanned Vehicle Dept."}]},{"given":"Mingxia","family":"Chen","sequence":"additional","affiliation":[{"name":"CaiNiao Inc., Alibaba Group,Unmanned Vehicle Dept."}]},{"given":"Sheng","family":"Yang","sequence":"additional","affiliation":[{"name":"CaiNiao Inc., Alibaba Group,Unmanned Vehicle Dept."}]},{"given":"Hui-Liang","family":"Shen","sequence":"additional","affiliation":[{"name":"Zhejiang University"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Qwen technical report","author":"Bai","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref2","article-title":"Qwen-vl: A versatile vision-language model for understanding, localization, text reading, and beyond","author":"Bai","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00939"},{"key":"ref4","article-title":"Langocc: Self-supervised open vocabulary occupancy estimation via volume rendering","author":"Boeder","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00396"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01599"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52733.2024.00394"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01712"},{"key":"ref11","article-title":"Bevdet4d: Exploit temporal cues in multi-camera 3d object detection","author":"Huang","year":"2022","journal-title":"arXiv preprint arXiv"},{"key":"ref12","article-title":"Bevdet: High-performance multi-camera 3d object detection in bird-eye-view","author":"Huang","year":"2021","journal-title":"arXiv preprint arXiv"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00890"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01885"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72649-1_21"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01807"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"ref18","article-title":"Language-driven semantic segmentation","volume-title":"International Conference on Learning Representations","author":"Li","year":"2023"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00615"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00539"},{"key":"ref21","article-title":"Voxdet: Rethinking 3d semantic occupancy prediction as dense object detection","author":"Li","year":"2025","journal-title":"arXiv preprint arXiv"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00877"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20077-9_1"},{"key":"ref24","article-title":"Visual instruction tuning","volume":"36","author":"Liu","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref25","article-title":"Decoupled weight decay regularization","author":"Loshchilov","year":"2017","journal-title":"arXiv preprint arXiv"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52733.2024.01884"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10611537"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00085"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58568-6_12"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01895"},{"key":"ref31","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proceedings of the International Conference on Machine Learning","author":"Radford","year":"2021"},{"key":"ref32","article-title":"Sam 2: Segment anything in images and videos","author":"Ravi","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.52202\/075280-2809"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00772"},{"key":"ref35","article-title":"Pop-3d: Open-vocabulary 3d occupancy prediction from images","author":"Vobecky","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01868"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01636"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.52202\/068431-1800"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01986"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/icra55743.2025.11128708"},{"key":"ref41","article-title":"See through the dark: Learning illumination-affined representations for nighttime occupancy prediction","author":"Wu","year":"2025","journal-title":"arXiv preprint arXiv"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2024.102671"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00289"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00288"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.3390\/s18103337"},{"key":"ref46","first-page":"4874","article-title":"Triperspective view decomposition for geometry-aware depth completion","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Yan","year":"2024"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01710"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00987"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73464-9_23"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00802"},{"key":"ref51","article-title":"Flashocc: Fast and memory-efficient occupancy prediction via channel-to-height plugin","author":"Yu","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.52202\/079017-0049"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01416"},{"key":"ref54","article-title":"Occnerf: Selfsupervised multi-camera occupancy prediction with neural radiance fields","author":"Zhang","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00865"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72949-2_6"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10611261"}],"event":{"name":"2025 IEEE\/CVF International Conference on Computer Vision (ICCV)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,10,19]]},"end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/CVF International Conference on Computer Vision (ICCV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11443115\/11443287\/11446014.pdf?arnumber=11446014","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T05:22:41Z","timestamp":1777612961000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11446014\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":57,"URL":"https:\/\/doi.org\/10.1109\/iccv51701.2025.00708","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}