{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T10:16:30Z","timestamp":1777889790567,"version":"3.51.4"},"reference-count":77,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iccv51701.2025.01001","type":"proceedings-article","created":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T19:45:49Z","timestamp":1777491949000},"page":"10753-10763","source":"Crossref","is-referenced-by-count":2,"title":["Towards Human-Like Virtual Beings: Simulating Human Behavior in 3D Scenes"],"prefix":"10.1109","author":[{"given":"Chen","family":"Liang","sequence":"first","affiliation":[{"name":"Zhejiang University,State Key Lab of Brain-Machine Intelligence"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenguan","family":"Wang","sequence":"additional","affiliation":[{"name":"Zhejiang University,State Key Lab of Brain-Machine Intelligence"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yi","family":"Yang","sequence":"additional","affiliation":[{"name":"Zhejiang University,State Key Lab of Brain-Machine Intelligence"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Diderot\u2019s early philosophical works","author":"Diderot","year":"1911","journal-title":"Number 4. Open Court"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v26i1.8447"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1002\/hbe2.117"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51701.2025.00590"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/s12008-015-0259-2"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3526113.3545616"},{"key":"ref7","article-title":"Habitat 3.0: A co-habitat for humans, avatars and robots","author":"Puig","year":"2023","journal-title":"arXiv preprint"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01826"},{"key":"ref9","article-title":"Towards versatile embodied navigation","author":"Wang","year":"2022","journal-title":"NeurIPS"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01971"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00886"},{"key":"ref12","article-title":"Language models are few-shot learners","author":"Brown","year":"2020","journal-title":"NeurIPS"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.261"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00554"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00393"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/176789.176803"},{"key":"ref17","article-title":"Towards implementation of social interaction","author":"Zubek","year":"2002","journal-title":"AAAI"},{"key":"ref18","article-title":"Narrative in virtual environments-towards emergent narrative","author":"Aylett","year":"1999","journal-title":"AAAI"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v24i1.7567"},{"key":"ref20","article-title":"Evaluation of human-ai teams for learned and rule-based agents in hanabi","author":"Chit Siu","year":"2021","journal-title":"NeurIPS"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1201\/9780429489105"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.4018\/jgcms.2012040103"},{"key":"ref23","article-title":"Dota 2 with large scale deep reinforcement learning","author":"Berner","year":"2019","journal-title":"arXiv preprint"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6297"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/SNPD.2017.8022767"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10744"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/375735.376343"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1609\/aiide.v3i1.18787"},{"key":"ref30","article-title":"Interleaving learning, problem solving, and execution in the icarus architecture","author":"Langley","year":"2005","journal-title":"Technical report, Stanford University, Center for the Study of Language and Information"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3586183.3606763"},{"key":"ref32","article-title":"Neurosymbolic procedural planning with commonsense prompting","author":"Lu","year":"2023","journal-title":"ICLR"},{"key":"ref33","article-title":"Language models as zero-shot planners: Extracting actionable knowledge for embodied agents","author":"Huang","year":"2022","journal-title":"ICML"},{"key":"ref34","article-title":"Hugginggpt: Solving ai tasks with chatgpt and its friends in huggingface","author":"Shen","year":"2023","journal-title":"NeurIPS"},{"key":"ref35","article-title":"Chameleon: Plug-and-play compositional reasoning with large language models","author":"Lu","year":"2023","journal-title":"NeurIPS"},{"key":"ref36","article-title":"Doraemongpt: Toward understanding dynamic scenes with large language models (exemplified as a video agent)","author":"Yang","year":"2024","journal-title":"ICML"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-industry.4"},{"key":"ref38","article-title":"Llemma: An open language model for mathematics","author":"Azerbayev","year":"2023","journal-title":"arXiv preprint"},{"key":"ref39","article-title":"A systematic investigation of commonsense knowledge in large language models","author":"Lorraine Li","year":"2022","journal-title":"EMNLP"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160591"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2023.xix.025"},{"key":"ref42","article-title":"Palm: Scaling language modeling with pathways","author":"Chowdhery","year":"2022","journal-title":"arXiv preprint"},{"key":"ref43","article-title":"Do as i can, not as i say: Grounding language in robotic affordances","author":"Brohan","year":"2023","journal-title":"CoRL"},{"key":"ref44","article-title":"Grounded decoding: Guiding text generation with grounded models for robot control","author":"Huang","year":"2023","journal-title":"arXiv preprint"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.143"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.437"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51701.2025.01642"},{"key":"ref48","article-title":"Language models are realistic tabular data generators","author":"Borisov","year":"2023","journal-title":"ICLR"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-acl.5"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.555"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.754"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00237"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380107"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.341"},{"key":"ref55","article-title":"Humanise: Language-conditioned human motion generation in 3d scenes","author":"Wang","year":"2022","journal-title":"NeurIPS"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01123"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00667"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00943"},{"key":"ref59","article-title":"Habitat 2.0: Training home assistants to rearrange their habitat","author":"Szot","year":"2021","journal-title":"NeurIPS"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00205"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58548-8_34"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01981"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1007\/BF01096763"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-3207"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-75538-8_7"},{"key":"ref66","article-title":"Bertscore: Evaluating text generation with bert","author":"Zhang","year":"2019","journal-title":"ICLR"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00078"},{"key":"ref68","article-title":"Habitat-matterport 3d dataset (hm3d): 1000 large-scale 3d environments for embodied ai","author":"Ramakrishnan","year":"2021","journal-title":"NeurIPS Datasets and Benchmarks Track"},{"key":"ref69","article-title":"The replica dataset: A digital replica of indoor spaces","author":"Straub","year":"2019","journal-title":"arXiv preprint"},{"key":"ref70","article-title":"Singleshot motion completion with transformer","author":"Duan","year":"2021","journal-title":"arXiv preprint"},{"key":"ref71","article-title":"Human motion diffusion as a generative prior","author":"Shafir","year":"2024","journal-title":"ICLR"},{"key":"ref72","article-title":"Bleu: a method for automatic evaluation of machine translation","author":"Papineni","year":"2002","journal-title":"ACL"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1145\/3588432.3591525"},{"key":"ref74","volume-title":"Vicuna: An opensource chatbot impressing gpt-4 with 90%* chatgpt quality","author":"Chiang","year":"2023"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00928"},{"key":"ref76","article-title":"Human motion diffusion model","author":"Tevet","year":"2023","journal-title":"ICLR"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00509"}],"event":{"name":"2025 IEEE\/CVF International Conference on Computer Vision (ICCV)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,10,19]]},"end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/CVF International Conference on Computer Vision (ICCV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11443115\/11443287\/11446288.pdf?arnumber=11446288","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T05:21:26Z","timestamp":1777612886000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11446288\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":77,"URL":"https:\/\/doi.org\/10.1109\/iccv51701.2025.01001","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}