{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,24]],"date-time":"2026-06-24T14:54:52Z","timestamp":1782312892239,"version":"3.54.5"},"reference-count":86,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"GRF grant from the Research Grants Council"},{"name":"Hong Kong Special Administrative Region","award":["City U 11208123"],"award-info":[{"award-number":["City U 11208123"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Visual. Comput. Graphics"],"published-print":{"date-parts":[[2026,2]]},"DOI":"10.1109\/tvcg.2025.3626731","type":"journal-article","created":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T17:11:51Z","timestamp":1761930711000},"page":"2243-2259","source":"Crossref","is-referenced-by-count":7,"title":["Chat2Layout: Interactive 3D Furniture Layout With a Multimodal LLM"],"prefix":"10.1109","volume":"32","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5102-1464","authenticated-orcid":false,"given":"Can","family":"Wang","sequence":"first","affiliation":[{"name":"Department of Computer Science, City University of Hong Kong, Hong Kong"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-0840-8812","authenticated-orcid":false,"given":"Hongliang","family":"Zhong","sequence":"additional","affiliation":[{"name":"Department of Computer Science, City University of Hong Kong, Hong Kong"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3447-0866","authenticated-orcid":false,"given":"Menglei","family":"Chai","sequence":"additional","affiliation":[{"name":"Google, Mountain View, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9982-7934","authenticated-orcid":false,"given":"Mingming","family":"He","sequence":"additional","affiliation":[{"name":"Netflix, Los Gatos, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4642-4373","authenticated-orcid":false,"given":"Dongdong","family":"Chen","sequence":"additional","affiliation":[{"name":"Microsoft Cloud AI, New York, NY, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7014-5377","authenticated-orcid":false,"given":"Jing","family":"Liao","sequence":"additional","affiliation":[{"name":"Department of Computer Science, City University of Hong Kong, Hong Kong"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/P15-1006"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1217"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/2185520.2185552"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/2366145.2366154"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00381"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3306346.3322941"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3272127.3275035"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3130800.3130805"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3381866"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3197517.3201362"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00634"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00748"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3303766"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01604"},{"key":"ref15","first-page":"12013","article-title":"ATISS: Autoregressive transformers for indoor scene synthesis","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Paschalidou","year":"2021"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/3DV53792.2021.00021"},{"key":"ref17","first-page":"30026","article-title":"CommonScenes: Generating commonsense 3D indoor scenes with scene graphs","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Zhai","year":"2024"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52733.2024.01938"},{"key":"ref19","article-title":"InstructScene: Instruction-driven 3D indoor scene synthesis with semantic graph prior","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Lin","year":"2023"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01263"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01534-z"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01075"},{"key":"ref23","first-page":"52","article-title":"Anyhome: Open-vocabulary generation of structured and textured 3D homes","volume-title":"Proc. Eur. Conf. Comput. Vis.","author":"Wen","year":"2024"},{"key":"ref24","first-page":"18225","article-title":"LayoutGPT: Compositional visual planning and generation with large language models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Feng","year":"2024"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01536"},{"key":"ref26","article-title":"Dreamfusion: Text-to-3D using 2D diffusion","volume-title":"Proc. 11th Int. Conf. Learn. Representations","author":"Poole","year":"2022"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/3748302"},{"key":"ref28","article-title":"GPT-4 vision","year":"2023"},{"key":"ref29","article-title":"MiniGPT-4: Enhancing vision-language understanding with advanced large language models","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Zhu","year":"2023"},{"key":"ref30","first-page":"28541","article-title":"LLaVA-med: Training a large language-and-vision assistant for biomedicine in one day","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Li","year":"2024"},{"key":"ref31","article-title":"MobileVLM V2: Faster and stronger baseline for vision language model","author":"Chu","year":"2024"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.521"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2025.3637265"},{"key":"ref34","first-page":"42566","article-title":"InternLM-XComposer2-4KHD: A pioneering large vision-language model handling resolutions from 336 pixels to 4K HD","volume":"37","author":"Dong","year":"2024","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-024-4231-5"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/WACV61041.2025.00220"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-46002-9_23"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i17.29936"},{"key":"ref39","article-title":"AutoGen: Enabling next-gen LLM applications via multi-agent conversation","author":"Wu","year":"2024","journal-title":"Workshop Large Lang. Model"},{"key":"ref40","article-title":"Chateval: Towards better LLM-based evaluators through multi-agent debate","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Chan","year":"2024"},{"key":"ref41","first-page":"28091","article-title":"MIND2WEB: Towards a generalist agent for the Web","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Deng","year":"2023"},{"key":"ref42","first-page":"28440","article-title":"Housecrafter: Lifting floorplans to 3D scenes with 2D diffusion model","volume-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis.","author":"Chen","year":"2025"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01825"},{"key":"ref44","article-title":"Open-universe indoor scene generation using LLM program synthesis and uncurated object databases","author":"Aguina-Kang","year":"2024"},{"key":"ref45","first-page":"19252","article-title":"Scenecraft: An LLM agent for synthesizing 3D scenes as blender code","volume-title":"Proc. 41st Int. Conf. Mach. Learn.","author":"Hu","year":"2024"},{"key":"ref46","article-title":"AutoGPT","author":"Gravitas","year":"2023"},{"key":"ref47","article-title":"GPT-engineer","author":"Osika","year":"2024"},{"key":"ref48","article-title":"Babyagi","author":"Nakajima","year":"2024"},{"key":"ref49","article-title":"Self-consistency improves chain of thought reasoning in language models","volume-title":"Proc. 11th Int. Conf. Learn. Representations","author":"Wang","year":"2022"},{"key":"ref50","article-title":"Automatic chain of thought prompting in large language models","volume-title":"Proc. 11th Int. Conf. Learn. Representations","author":"Zhang","year":"2022"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.346"},{"key":"ref52","article-title":"Large language models are human-level prompt engineers","volume-title":"Proc. 11th Int. Conf. Learn. Representations","author":"Zhou","year":"2022"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01101"},{"key":"ref55","first-page":"24993","article-title":"Fine-grained visual prompting","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Yang","year":"2024"},{"key":"ref56","article-title":"The dawn of LMMs: Preliminary explorations with GPT-4V (ision)","author":"Yang","year":"2023"},{"key":"ref57","article-title":"Set-of-mark prompting unleashes extraordinary visual grounding in GPT-4V","author":"Yang","year":"2023"},{"key":"ref58","article-title":"Selective annotation makes language models better few-shot learners","volume-title":"Proc. 11th Int. Conf. Learn. Representations","author":"Su","year":"2022"},{"key":"ref59","first-page":"23716","article-title":"Flamingo: A visual language model for few-shot learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Alayrac","year":"2022"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i3.20215"},{"key":"ref61","article-title":"Meshy","year":"2023"},{"key":"ref62","article-title":"Luma AI","year":"2023"},{"key":"ref63","article-title":"Audo AI","year":"2023"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00258"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1145\/3550469.3555392"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2023.3283400"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00037"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00223"},{"key":"ref69","first-page":"8406","article-title":"ProlificDreamer: High-fidelity and diverse text-to-3D generation with variational score distillation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Wang","year":"2024"},{"key":"ref70","article-title":"3DTopia: Large text-to-3D generation model with hybrid diffusion priors","author":"Hong","year":"2024"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73235-5_1"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52733.2024.00649"},{"key":"ref73","article-title":"DreamGaussian: Generative Gaussian splatting for efficient 3D content creation","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Tang","year":"2024"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01808"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00381"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01218"},{"key":"ref77","article-title":"Instant3D: Fast text-to-3D with sparse-view generation and large reconstruction model","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Li","year":"2023"},{"key":"ref78","article-title":"Tripo3D","year":"2024"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-024-4222-0"},{"key":"ref80","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Radford","year":"2021"},{"key":"ref81","first-page":"1","article-title":"DINOv2: Learning robust visual features without supervision","volume":"1","author":"Oquab","year":"2024","journal-title":"Trans. Mach. Learn. Res."},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1145\/1653771.1653865"},{"key":"ref83","first-page":"2023","article-title":"ReAct: Synergizing reasoning and acting in language models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Yao"},{"key":"ref84","first-page":"8634","article-title":"Reflexion: Language agents with verbal reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Shinn","year":"2024"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/56.2083"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1177\/154193120605000909"}],"container-title":["IEEE Transactions on Visualization and Computer Graphics"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/2945\/11373710\/11222970.pdf?arnumber=11222970","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T00:33:21Z","timestamp":1770683601000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11222970\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2]]},"references-count":86,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tvcg.2025.3626731","relation":{},"ISSN":["1077-2626","1941-0506","2160-9306"],"issn-type":[{"value":"1077-2626","type":"print"},{"value":"1941-0506","type":"electronic"},{"value":"2160-9306","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2]]}}}