{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,25]],"date-time":"2026-06-25T03:40:49Z","timestamp":1782358849127,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":57,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,12,3]]},"DOI":"10.1145\/3680528.3687607","type":"proceedings-article","created":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T08:14:37Z","timestamp":1733213677000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["LLM-enhanced Scene Graph Learning for Household Rearrangement"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-4487-6148","authenticated-orcid":false,"given":"Wenhao","family":"Li","sequence":"first","affiliation":[{"name":"National University of Defense Technology (NUDT), Changsha, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-6201-6667","authenticated-orcid":false,"given":"Zhiyuan","family":"Yu","sequence":"additional","affiliation":[{"name":"National University of Defense Technology (NUDT), Changsha, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0422-969X","authenticated-orcid":false,"given":"Qijin","family":"She","sequence":"additional","affiliation":[{"name":"National University of Defense Technology (NUDT), Changsha, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-6269-4639","authenticated-orcid":false,"given":"Zhinan","family":"Yu","sequence":"additional","affiliation":[{"name":"National University of Defense Technology (NUDT), Changsha, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0546-355X","authenticated-orcid":false,"given":"Yuqing","family":"Lan","sequence":"additional","affiliation":[{"name":"National University of Defense Technology (NUDT), Changsha, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2838-8601","authenticated-orcid":false,"given":"Chenyang","family":"Zhu","sequence":"additional","affiliation":[{"name":"National University of Defense Technology (NUDT), Changsha, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6798-0336","authenticated-orcid":false,"given":"Ruizhen","family":"Hu","sequence":"additional","affiliation":[{"name":"Shenzhen University (SZU), Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9054-0216","authenticated-orcid":false,"given":"Kai","family":"Xu","sequence":"additional","affiliation":[{"name":"National University of Defense Technology (NUDT), Changsha, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,12,3]]},"reference":[{"key":"e_1_3_3_2_2_1","unstructured":"Josh Achiam Steven Adler Sandhini Agarwal Lama Ahmad Ilge Akkaya Florencia\u00a0Leoni Aleman Diogo Almeida Janko Altenschmidt Sam Altman Shyamal Anadkat et\u00a0al. 2023. Gpt-4 technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.08774 (2023)."},{"key":"e_1_3_3_2_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_25"},{"key":"e_1_3_3_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01854"},{"key":"e_1_3_3_2_5_1","unstructured":"Dhruv Batra Angel\u00a0X Chang Sonia Chernova Andrew\u00a0J Davison Jia Deng Vladlen Koltun Sergey Levine Jitendra Malik Igor Mordatch Roozbeh Mottaghi et\u00a0al. 2020. Rearrangement: A challenge for embodied ai. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2011.01975 (2020)."},{"key":"e_1_3_3_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00182"},{"key":"e_1_3_3_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01647"},{"key":"e_1_3_3_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00228"},{"key":"e_1_3_3_2_9_1","doi-asserted-by":"crossref","unstructured":"Matthew Fisher Manolis Savva Yangyan Li Pat Hanrahan and Matthias Nie\u00dfner. 2015. Activity-centric scene synthesis for functional 3D scene modeling. ACM Transactions on Graphics (TOG) 34 6 (2015) 1\u201313.","DOI":"10.1145\/2816795.2818057"},{"key":"e_1_3_3_2_10_1","doi-asserted-by":"publisher","unstructured":"Yunfan Gao Yun Xiong Xinyu Gao Kangxiang Jia Jinliu Pan Yuxi Bi Yi Dai Jiawei Sun Qianyu Guo Meng Wang and Haofen Wang. 2023b. Retrieval-Augmented Generation for Large Language Models: A Survey. CoRR abs\/2312.10997 (2023). 10.48550\/ARXIV.2312.10997 arXiv:https:\/\/arXiv.org\/abs\/2312.10997","DOI":"10.48550\/ARXIV.2312.10997"},{"key":"e_1_3_3_2_11_1","unstructured":"Yunfan Gao Yun Xiong Xinyu Gao Kangxiang Jia Jinliu Pan Yuxi Bi Yi Dai Jiawei Sun and Haofen Wang. 2023a. Retrieval-augmented generation for large language models: A survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.10997 (2023)."},{"key":"e_1_3_3_2_12_1","doi-asserted-by":"crossref","unstructured":"Georgios Georgakis Arsalan Mousavian Alexander\u00a0C Berg and Jana Kosecka. 2017. Synthesizing training data for object detection in indoor scenes. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1702.07836 (2017).","DOI":"10.15607\/RSS.2017.XIII.043"},{"key":"e_1_3_3_2_13_1","unstructured":"James\u00a0J Gibson. 1977. The theory of affordances. Hilldale USA 1 2 (1977) 67\u201382."},{"key":"e_1_3_3_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995327"},{"key":"e_1_3_3_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995448"},{"key":"e_1_3_3_2_16_1","unstructured":"Dongge Han Trevor McInroe Adam Jelley Stefano\u00a0V Albrecht Peter Bell and Amos Storkey. 2024. LLM-Personalize: Aligning LLM Planners with Human Preferences via Reinforced Self-Training for Housekeeping Robots. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.14285 (2024)."},{"key":"e_1_3_3_2_17_1","unstructured":"Yining Hong Haoyu Zhen Peihao Chen Shuhong Zheng Yilun Du Zhenfang Chen and Chuang Gan. 2023. 3d-llm: Injecting the 3d world into large language models. Advances in Neural Information Processing Systems 36 (2023) 20482\u201320494."},{"key":"e_1_3_3_2_18_1","doi-asserted-by":"publisher","DOI":"10.1111\/cgf.13385"},{"key":"e_1_3_3_2_19_1","unstructured":"Dehao Huang Chao Tang and Hong Zhang. 2023. Efficient Object Rearrangement via Multi-view Fusion. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2309.08994 (2023)."},{"key":"e_1_3_3_2_20_1","doi-asserted-by":"crossref","unstructured":"Krishna Murthy Jatavallabhula Alihusein Kuwajerwala Qiao Gu Mohd Omama Tao Chen Shuang Li Ganesh Iyer Soroush Saryazdi Nikhil Keetha Ayush Tewari Joshua B. Tenenbaum Celso Miguel de Melo Madhava Krishna Liam Paull Florian Shkurti and Antonio Torralba. 2023. ConceptFusion: Open-set Multimodal 3D Mapping. Robotics: Science and Systems (RSS) (2023).","DOI":"10.15607\/RSS.2023.XIX.066"},{"key":"e_1_3_3_2_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19842-7_21"},{"key":"e_1_3_3_2_22_1","doi-asserted-by":"crossref","unstructured":"Mukul Khanna Yongsen Mao Hanxiao Jiang Sanjay Haresh Brennan Shacklett Dhruv Batra Alexander Clegg Eric Undersander Angel\u00a0X. Chang and Manolis Savva. 2023. Habitat Synthetic Scenes Dataset (HSSD-200): An Analysis of 3D Scene Scale and Realism Tradeoffs for ObjectGoal Navigation. arXiv preprint (2023). arxiv:https:\/\/arXiv.org\/abs\/2306.11290\u00a0[cs.CV]","DOI":"10.1109\/CVPR52733.2024.01550"},{"key":"e_1_3_3_2_23_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10578-9_54"},{"key":"e_1_3_3_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01639"},{"key":"e_1_3_3_2_25_1","doi-asserted-by":"crossref","unstructured":"Changyang Li Haikun Huang Jyh-Ming Lien and Lap-Fai Yu. 2021. Synthesizing scene-aware virtual reality teleport graphs. ACM Transactions on Graphics (TOG) 40 6 (2021) 1\u201315.","DOI":"10.1145\/3478513.3480478"},{"key":"e_1_3_3_2_26_1","unstructured":"Gen Li Deqing Sun Laura Sevilla-Lara and Varun Jampani. 2023. One-Shot Open Affordance Learning with Foundation Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.17776 (2023)."},{"key":"e_1_3_3_2_27_1","volume-title":"International Conference on Learning Representations","author":"LI QI","year":"2022","unstructured":"QI LI, Kaichun Mo, Yanchao Yang, Hang Zhao, and Leonidas Guibas. 2022. IFR-Explore: Learning Inter-object Functional Relationships in 3D Indoor Scenes. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=OT3mLgR8Wg8"},{"key":"e_1_3_3_2_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01265"},{"key":"e_1_3_3_2_29_1","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.031"},{"key":"e_1_3_3_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9811931"},{"key":"e_1_3_3_2_31_1","doi-asserted-by":"crossref","unstructured":"Ziyuan Liu Wei Liu Yuzhe Qin Fanbo Xiang Minghao Gou Songyan Xin Maximo\u00a0A Roa Berk Calli Hao Su Yu Sun et\u00a0al. 2021. Ocrtoc: A cloud-based competition and benchmark for robotic grasping and manipulation. IEEE Robotics and Automation Letters 7 1 (2021) 486\u2013493.","DOI":"10.1109\/LRA.2021.3129136"},{"key":"e_1_3_3_2_32_1","first-page":"1666","volume-title":"Conference on robot learning","author":"Mo Kaichun","year":"2022","unstructured":"Kaichun Mo, Yuzhe Qin, Fanbo Xiang, Hao Su, and Leonidas Guibas. 2022. O2O-Afford: Annotation-free large-scale object-object affordance learning. In Conference on robot learning. PMLR, 1666\u20131677."},{"key":"e_1_3_3_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00878"},{"key":"e_1_3_3_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00024"},{"key":"e_1_3_3_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10341553"},{"key":"e_1_3_3_2_36_1","unstructured":"Zhe Ni Xiao-Xin Deng Cong Tai Xin-Yue Zhu Xiang Wu Yong-Jin Liu and Long Zeng. 2023. Grid: Scene-graph-based instruction-driven robotic task planning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2309.07726 (2023)."},{"key":"e_1_3_3_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/IWSSIP48289.2020.9145130"},{"key":"e_1_3_3_2_38_1","doi-asserted-by":"publisher","DOI":"10.1111\/cgf.14927"},{"key":"e_1_3_3_2_39_1","unstructured":"Xavier Puig Eric Undersander Andrew Szot Mikael\u00a0Dallaire Cote Tsung-Yen Yang Ruslan Partsey Ruta Desai Alexander\u00a0William Clegg Michal Hlavac So\u00a0Yeon Min et\u00a0al. 2023. Habitat 3.0: A co-habitat for humans avatars and robots. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.13724 (2023)."},{"key":"e_1_3_3_2_40_1","first-page":"8748","volume-title":"International conference on machine learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748\u20138763."},{"key":"e_1_3_3_2_41_1","unstructured":"Abhinav Rajvanshi Karan Sikka Xiao Lin Bhoram Lee Han-Pang Chiu and Alvaro Velasquez. 2023. Saynav: Grounding large language models for dynamic planning to navigation in new environments. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2309.04077 (2023)."},{"key":"e_1_3_3_2_42_1","volume-title":"7th Annual Conference on Robot Learning","author":"Rana Krishan","year":"2023","unstructured":"Krishan Rana, Jesse Haviland, Sourav Garg, Jad Abou-Chakra, Ian Reid, and Niko Suenderhauf. 2023. Sayplan: Grounding large language models using 3d scene graphs for scalable robot task planning. In 7th Annual Conference on Robot Learning."},{"key":"e_1_3_3_2_43_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19842-7_28"},{"key":"e_1_3_3_2_44_1","doi-asserted-by":"crossref","unstructured":"Manolis Savva Angel\u00a0X Chang Pat Hanrahan Matthew Fisher and Matthias Nie\u00dfner. 2014. SceneGrok: Inferring action maps in 3D environments. ACM transactions on graphics (TOG) 33 6 (2014) 1\u201310.","DOI":"10.1145\/2661229.2661230"},{"key":"e_1_3_3_2_45_1","doi-asserted-by":"crossref","unstructured":"Manolis Savva Angel\u00a0X Chang Pat Hanrahan Matthew Fisher and Matthias Nie\u00dfner. 2016. Pigraphs: learning interaction snapshots from observations. ACM Transactions On Graphics (TOG) 35 4 (2016) 1\u201312.","DOI":"10.1145\/2897824.2925867"},{"key":"e_1_3_3_2_46_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33715-4_54"},{"key":"e_1_3_3_2_47_1","unstructured":"Chao Tang Jingwen Yu Weinan Chen and Hong Zhang. 2021. Relationship oriented affordance learning through manipulation graph construction. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2110.14137 (2021)."},{"key":"e_1_3_3_2_48_1","unstructured":"Tuan Van\u00a0Vo Minh\u00a0Nhat Vu Baoru Huang Toan Nguyen Ngan Le Thieu Vo and Anh Nguyen. 2023. Open-vocabulary affordance detection using knowledge distillation and text-point correlation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2309.10932 (2023)."},{"key":"e_1_3_3_2_49_1","doi-asserted-by":"crossref","unstructured":"Zan Wang Yixin Chen Baoxiong Jia Puhao Li Jinlu Zhang Jingze Zhang Tengyu Liu Yixin Zhu Wei Liang and Siyuan Huang. 2024. Move as You Say Interact as You Can: Language-guided Human Motion Generation with Scene Affordance. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.18036 (2024).","DOI":"10.1109\/CVPR52733.2024.00049"},{"key":"e_1_3_3_2_50_1","unstructured":"Zehan Wang Haifeng Huang Yang Zhao Ziang Zhang and Zhou Zhao. 2023. Chat-3d: Data-efficiently tuning large language model for universal dialogue of 3d scenes. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2308.08769 (2023)."},{"key":"e_1_3_3_2_51_1","unstructured":"Jason Wei Xuezhi Wang Dale Schuurmans Maarten Bosma Fei Xia Ed Chi Quoc\u00a0V Le Denny Zhou et\u00a0al. 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in neural information processing systems 35 (2022) 24824\u201324837."},{"key":"e_1_3_3_2_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00586"},{"key":"e_1_3_3_2_53_1","unstructured":"Jimmy Wu Rika Antonova Adam Kan Marion Lepert Andy Zeng Shuran Song Jeannette Bohg Szymon Rusinkiewicz and Thomas Funkhouser. 2023. TidyBot: Personalized Robot Assistance with Large Language Models. Autonomous Robots (2023)."},{"key":"e_1_3_3_2_54_1","unstructured":"Sriram Yenamandra Arun Ramachandran Karmesh Yadav Austin Wang Mukul Khanna Theophile Gervet Tsung-Yen Yang Vidhi Jain Alex\u00a0William Clegg John Turner Zsolt Kira Manolis Savva Angel Chang Devendra\u00a0Singh Chaplot Dhruv Batra Roozbeh Mottaghi Yonatan Bisk and Chris Paxton. 2023. HomeRobot: Open Vocab Mobile Manipulation. https:\/\/aihabitat.org\/static\/challenge\/home_robot_ovmm_2023\/OVMM.pdf"},{"key":"e_1_3_3_2_55_1","unstructured":"Ceng Zhang Xin Meng Dongchen Qi and Gregory\u00a0S Chirikjian. 2024. RAIL: Robot Affordance Imagination with Large Language Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.19369 (2024)."},{"key":"e_1_3_3_2_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00645"},{"key":"e_1_3_3_2_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00459"},{"key":"e_1_3_3_2_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.415"}],"event":{"name":"SA '24: SIGGRAPH Asia 2024 Conference Papers","location":"Tokyo Japan","acronym":"SA '24","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["SIGGRAPH Asia 2024 Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3680528.3687607","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3680528.3687607","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:26Z","timestamp":1750294706000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3680528.3687607"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,3]]},"references-count":57,"alternative-id":["10.1145\/3680528.3687607","10.1145\/3680528"],"URL":"https:\/\/doi.org\/10.1145\/3680528.3687607","relation":{},"subject":[],"published":{"date-parts":[[2024,12,3]]},"assertion":[{"value":"2024-12-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}