{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T07:21:53Z","timestamp":1764400913952,"version":"3.46.0"},"reference-count":16,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,22]]},"DOI":"10.1109\/apsipaasc65261.2025.11249082","type":"proceedings-article","created":{"date-parts":[[2025,11,28]],"date-time":"2025-11-28T18:40:26Z","timestamp":1764355226000},"page":"1464-1469","source":"Crossref","is-referenced-by-count":0,"title":["VoxRep: Enhancing 3D Spatial Understanding in 2D Vision-Language Models via Voxel Representation"],"prefix":"10.1109","author":[{"given":"Alan","family":"Dao Gia Tuan Dao","sequence":"first","affiliation":[{"name":"Menlo Research"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Norapat","family":"Buppodom","sequence":"additional","affiliation":[{"name":"Menlo Research"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"volume-title":"Voxelnet: End-to-end learning for point cloud based 3d object detection","year":"2017","author":"Zhou","key":"ref1"},{"volume-title":"Leveraging large (visual) language models for robot 3d scene understanding","year":"2023","author":"Chen","key":"ref2"},{"volume-title":"Wikipedia contributors, Voxel - Wikipedia, The Free Encyclopedia","year":"2025","key":"ref3"},{"volume-title":"Qwen2.5-vl technical report","year":"2025","author":"Bai","key":"ref4"},{"volume-title":"Minicpm-v: A gpt-4v level mllm on your phone","year":"2024","author":"Yao","key":"ref5"},{"volume-title":"Lscenellm: Enhancing large 3d scene understanding using adaptive visual preferences","year":"2025","author":"Zhi","key":"ref6"},{"volume-title":"Scenegpt: A language model for 3d scene understanding","year":"2024","author":"Chandhok","key":"ref7"},{"volume-title":"Scene-llm: Extending language model for 3d visual understanding and reasoning","year":"2024","author":"Fu","key":"ref8"},{"volume-title":"3d representation methods: A survey","year":"2024","author":"Wang","key":"ref9"},{"volume-title":"Hybrid voxel formats for efficient ray tracing","year":"2024","author":"Arbore","key":"ref10"},{"volume-title":"Differentiable voxelization and mesh morphing","year":"2024","author":"Luo","key":"ref11"},{"volume-title":"Foundational models for3 d point clouds: A survey and outlook","year":"2025","author":"Thengane","key":"ref12"},{"volume-title":"Mmspatial: Exploring 3d spatial understanding in multimodal llms","year":"2025","author":"Daxberger","key":"ref13"},{"volume-title":"Spatialvlm: Endowing vision-language models with spatial reasoning capabilities","year":"2024","author":"Chen","key":"ref14"},{"article-title":"Gemma 3: Technical report","volume-title":"Google DeepMind, Tech. Rep.","year":"2024","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298801"}],"event":{"name":"2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","start":{"date-parts":[[2025,10,22]]},"location":"Singapore, Singapore","end":{"date-parts":[[2025,10,24]]}},"container-title":["2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11248853\/11248968\/11249082.pdf?arnumber=11249082","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T07:19:43Z","timestamp":1764400783000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11249082\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,22]]},"references-count":16,"URL":"https:\/\/doi.org\/10.1109\/apsipaasc65261.2025.11249082","relation":{},"subject":[],"published":{"date-parts":[[2025,10,22]]}}}