{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T20:22:54Z","timestamp":1776111774588,"version":"3.50.1"},"reference-count":55,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,6,16]],"date-time":"2024-06-16T00:00:00Z","timestamp":1718496000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,6,16]],"date-time":"2024-06-16T00:00:00Z","timestamp":1718496000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,6,16]]},"DOI":"10.1109\/cvpr52733.2024.01536","type":"proceedings-article","created":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T17:34:53Z","timestamp":1726508093000},"page":"16277-16287","source":"Crossref","is-referenced-by-count":63,"title":["Holodeck: Language Guided Generation of 3D Embodied AI Environments"],"prefix":"10.1109","author":[{"given":"Yue","family":"Yang","sequence":"first","affiliation":[{"name":"University of Pennsylvania"}]},{"given":"Fan-Yun","family":"Sun","sequence":"additional","affiliation":[{"name":"Stanford University"}]},{"given":"Luca","family":"Weihs","sequence":"additional","affiliation":[{"name":"Allen Institute for Artificial Intelligence"}]},{"given":"Eli","family":"Vanderbilt","sequence":"additional","affiliation":[{"name":"Allen Institute for Artificial Intelligence"}]},{"given":"Alvaro","family":"Herrasti","sequence":"additional","affiliation":[{"name":"Allen Institute for Artificial Intelligence"}]},{"given":"Winson","family":"Han","sequence":"additional","affiliation":[{"name":"Allen Institute for Artificial Intelligence"}]},{"given":"Jiajun","family":"Wu","sequence":"additional","affiliation":[{"name":"Stanford University"}]},{"given":"Nick","family":"Haber","sequence":"additional","affiliation":[{"name":"Stanford University"}]},{"given":"Ranjay","family":"Krishna","sequence":"additional","affiliation":[{"name":"University of Washington"}]},{"given":"Lingjie","family":"Liu","sequence":"additional","affiliation":[{"name":"University of Pennsylvania"}]},{"given":"Chris","family":"Callison-Burch","sequence":"additional","affiliation":[{"name":"University of Pennsylvania"}]},{"given":"Mark","family":"Yatskar","sequence":"additional","affiliation":[{"name":"University of Pennsylvania"}]},{"given":"Aniruddha","family":"Kembhavi","sequence":"additional","affiliation":[{"name":"University of Washington"}]},{"given":"Christopher","family":"Clark","sequence":"additional","affiliation":[{"name":"Allen Institute for Artificial Intelligence"}]}],"member":"263","reference":[{"key":"ref1","author":"Batra","year":"2020","journal-title":"ObjectNav Revisited: On Evaluation of Embodied Agents Navigating to Objects."},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.3115\/vl\/W14-3102"},{"key":"ref3","author":"Chang","year":"2017","journal-title":"Sceneseer:3d scene design with natural language."},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794327"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00323"},{"key":"ref6","article-title":"Proc-THOR: Large-Scale Embodied AI Using Procedural Generation","author":"Deitke","year":"2022","journal-title":"NeurIPS"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00932"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01263"},{"key":"ref9","author":"Feng","year":"2023","journal-title":"Layoutgpt: Compositional visual plan-ning and generation with large language models."},{"key":"ref10","author":"Fridman","year":"2023","journal-title":"Scenescape: Text-driven consistent scene generation."},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01075"},{"key":"ref12","author":"Gan","year":"2020","journal-title":"Threedworld: A platform for interactive multi-modal physical simulation."},{"key":"ref13","first-page":"11808","article-title":"Nerfdiff: Single-image view synthesis with nerf-guided distillation from 3d-aware diffusion","volume-title":"International Conference on Machine Learning","author":"Gu"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.01008"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.595"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00727"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3386569.3392391"},{"key":"ref18","author":"Huang","year":"2023","journal-title":"Aladdin: Zero-shot hallucination of stylized 3d assets from abstract scene descriptions."},{"key":"ref19","author":"Ilharco","year":"2021","journal-title":"Openclip"},{"key":"ref20","first-page":"6670","article-title":"Sim2real predictivity: Does evalu-ation in simulation predict real-world performance?","volume-title":"IEEE Robotics and Automation Letters","volume":"5","author":"Kadian","year":"2019"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01441"},{"key":"ref22","author":"Khanna","year":"2023","journal-title":"Chang, and Manolis Savva. Habi-tat Synthetic Scenes Dataset (HSSD-200): An Analysis of 3D Scene Scale and Realism Tradeoffs for ObjectGoal Navigation."},{"key":"ref23","author":"Kolve","year":"2017","journal-title":"AI2-THOR: An Interactive 3D Environment for Visual AI."},{"key":"ref24","first-page":"80","article-title":"Behavior-1k: A benchmark for embodied ai with 1,000 everyday activities and realistic simulation","volume-title":"Conference on Robot Learning","author":"Li","year":"2023"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00037"},{"key":"ref26","author":"Lin","year":"2023","journal-title":"Towards language-guided interactive 3d generation: Llms as layout interpreter with generative feedback."},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/3272127.3275035"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01218"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3503250"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00768"},{"key":"ref31","year":"2023","journal-title":"GPT-4V(ision) System Card"},{"key":"ref32","first-page":"2303","author":"OpenAI","year":"2023","journal-title":"Gpt-4 technical report."},{"key":"ref33","article-title":"Atiss: Autoregres-sive transformers for indoor scene synthesis","author":"Paschalidou","year":"2021","journal-title":"Advances in Neural Information Processing Systems, 34:12013\u20131 2026"},{"key":"ref34","author":"Poole","year":"2022","journal-title":"Dreamfusion: Text-to-3d using 2d diffusion."},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00886"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206537"},{"key":"ref37","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"International conference on machine learning","author":"Radford","year":"2021"},{"key":"ref38","article-title":"Habitat-matterport 3d dataset (HM3d):1000 large-scale 3d environments for embodied AI","author":"Ramakrishnan","year":"2021","journal-title":"Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1410"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00943"},{"key":"ref41","article-title":"LAION -5b: An open large-scale dataset for training next generation image-text models","author":"Schuhmann","year":"2022","journal-title":"Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00529"},{"key":"ref43","first-page":"251","article-title":"Habitat 2.0: Training home assistants to rearrange their habitat","volume":"34","author":"Szot","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00687"},{"key":"ref45","author":"Tang","year":"2023","journal-title":"Diffuscene: Scene graph denoising diffusion probabilistic model for generative indoor scene synthesis."},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/3DV53792.2021.00021"},{"key":"ref47","author":"Wang","year":"2023","journal-title":"Robogen: Towards unleashing infinite data for automated robot learning via generative simulation."},{"key":"ref48","author":"Wei","year":"2022","journal-title":"Emergent abilities of large language models."},{"key":"ref49","first-page":"19037","article-title":"Lego-net: Learning regular rearrangements of ob-jects in rooms","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Wei","year":"2023"},{"key":"ref50","article-title":"Learning a probabilistic latent space of ob-ject shapes via 3d generative-adversarial modeling","volume":"29","author":"Wu","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref51","first-page":"9068","article-title":"Gibson env: Real-world per-ception for embodied agents","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","author":"Xia","year":"2018"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00464"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/tvcg.2024.3361502"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/3dv62453.2024.00132"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00577"}],"event":{"name":"2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","location":"Seattle, WA, USA","start":{"date-parts":[[2024,6,16]]},"end":{"date-parts":[[2024,6,22]]}},"container-title":["2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10654794\/10654797\/10657821.pdf?arnumber=10657821","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,18]],"date-time":"2024-09-18T06:28:44Z","timestamp":1726640924000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10657821\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,16]]},"references-count":55,"URL":"https:\/\/doi.org\/10.1109\/cvpr52733.2024.01536","relation":{},"subject":[],"published":{"date-parts":[[2024,6,16]]}}}