{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T14:30:46Z","timestamp":1776090646296,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":128,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,4,25]],"date-time":"2025-04-25T00:00:00Z","timestamp":1745539200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"the Research Grants Council of the Hong Kong Special Administrative Region","award":["No. 16207923"],"award-info":[{"award-number":["No. 16207923"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,4,26]]},"DOI":"10.1145\/3706598.3713791","type":"proceedings-article","created":{"date-parts":[[2025,4,24]],"date-time":"2025-04-24T04:35:25Z","timestamp":1745469325000},"page":"1-20","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["ACKnowledge: A Computational Framework for Human Compatible Affordance-based Interaction Planning in Real-world Contexts"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5562-8685","authenticated-orcid":false,"given":"Ziqi","family":"Pan","sequence":"first","affiliation":[{"name":"The Hong Kong University of Science and Technology, Hong Kong, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-4206-630X","authenticated-orcid":false,"given":"Xiucheng","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Sun Yat-sen University, Zhuhai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8825-0191","authenticated-orcid":false,"given":"Zisu","family":"Li","sequence":"additional","affiliation":[{"name":"IIP (Computational Media and Arts), The Hong Kong University of Science and Technology, Hong Kong SAR, Hong Kong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5700-3136","authenticated-orcid":false,"given":"Zhenhui","family":"Peng","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Sun Yat-sen University, Zhuhai, Guangdong Province, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0356-4712","authenticated-orcid":false,"given":"Mingming","family":"Fan","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9847-7784","authenticated-orcid":false,"given":"Xiaojuan","family":"Ma","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology, Hong Kong, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,4,25]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"crossref","unstructured":"1999. Case studies of applying Gibson\u2019s ecological approach to mobile robots. IEEE Transactions on Systems Man and Cybernetics-Part A: Systems and Humans 29 1 (1999) 105\u2013111.","DOI":"10.1109\/3468.736365"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"crossref","unstructured":"Henk Aarts and Ap Dijksterhuis. 2003. The silence of the library: environment situational norm and social behavior. Journal of personality and social psychology 84 1 (2003) 18.","DOI":"10.1037\/\/0022-3514.84.1.18"},{"key":"e_1_3_3_2_4_2","unstructured":"Michael Ahn Anthony Brohan Noah Brown Yevgen Chebotar Omar Cortes Byron David Chelsea Finn Chuyuan Fu Keerthana Gopalakrishnan Karol Hausman Alex Herzog Daniel Ho Jasmine Hsu Julian Ibarz Brian Ichter Alex Irpan Eric Jang Rosario\u00a0Jauregui Ruano Kyle Jeffrey Sally Jesmonth Nikhil\u00a0J Joshi Ryan Julian Dmitry Kalashnikov Yuheng Kuang Kuang-Huei Lee Sergey Levine Yao Lu Linda Luu Carolina Parada Peter Pastor Jornell Quiambao Kanishka Rao Jarek Rettinghouse Diego Reyes Pierre Sermanet Nicolas Sievers Clayton Tan Alexander Toshev Vincent Vanhoucke Fei Xia Ted Xiao Peng Xu Sichun Xu Mengyuan Yan and Andy Zeng. 2022. Do As I Can Not As I Say: Grounding Language in Robotic Affordances. arxiv:https:\/\/arXiv.org\/abs\/2204.01691\u00a0[cs.RO]"},{"key":"e_1_3_3_2_5_2","first-page":"39","volume-title":"AAAI spring symposium: to boldly go where no human-robot team has gone before","author":"Alami Rachid","year":"2006","unstructured":"Rachid Alami, Aur\u00e9lie Clodic, Vincent Montreuil, Emrah\u00a0Akin Sisbot, and Raja Chatila. 2006. Toward Human-Aware Robot Task Planning.. In AAAI spring symposium: to boldly go where no human-robot team has gone before. 39\u201346."},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","DOI":"10.1145\/3383652.3423900"},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"crossref","unstructured":"Ettore Ambrosini Claudia Scorolli Anna\u00a0M Borghi and Marcello Costantini. 2012. Which body for embodied cognition? Affordance and language within actual and perceived reaching space. Consciousness and cognition 21 3 (2012) 1551\u20131557.","DOI":"10.1016\/j.concog.2012.06.010"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"crossref","unstructured":"Kamiar Aminian and Bijan Najafi. 2004. Capturing human motion using body-fixed sensors: outdoor measurement and clinical applications. Computer animation and virtual worlds 15 2 (2004) 79\u201394.","DOI":"10.1002\/cav.2"},{"key":"e_1_3_3_2_9_2","unstructured":"R Arora S Singh K Swaminathan A Datta S Banerjee B Bhowmick KM Jatavallabhula and M Sridharan. [n. d.]. Anticipate & Act: Integrating LLMs and Classical Planning for Efficient Task Execution in Household Environments. In the IEEE. ([n. d.])."},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"crossref","unstructured":"Susy\u00a0Budi Astuti Purwanita Setijanti and Ispurwono Soemarno. 2017. Personalization of space in private and public setting within vertical housing as sustainable living. DIMENSI (Journal of Architecture and Built Environment) 44 1 (2017) 37\u201344.","DOI":"10.9744\/dimensi.44.1.37-44"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01324"},{"key":"e_1_3_3_2_12_2","unstructured":"Martin Benfeghoul Umais Zahid Qinghai Guo and Zafeirios Fountas. 2024. When in Doubt Think Slow: Iterative Reasoning with Latent Imagination. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.15283 (2024)."},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"crossref","unstructured":"Miodrag Bolic Majed Rostamian and Petar\u00a0M Djuric. 2015. Proximity detection with RFID: A step toward the internet of things. IEEE Pervasive Computing 14 2 (2015) 70\u201376.","DOI":"10.1109\/MPRV.2015.39"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"crossref","unstructured":"Anna\u00a0M Borghi. 2021. Affordances context and sociality. Synthese 199 5 (2021) 12485\u201312515.","DOI":"10.1007\/s11229-018-02044-1"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"crossref","unstructured":"Virginia Braun and Victoria Clarke. 2006. Using thematic analysis in psychology. Qualitative research in psychology 3 2 (2006) 77\u2013101.","DOI":"10.1191\/1478088706qp063oa"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"crossref","unstructured":"Sandra Brunia and Anca Hartjes-Gosselink. 2009. Personalization in non-territorial offices: a study of a human need. Journal of Corporate Real Estate 11 3 (2009) 169\u2013182.","DOI":"10.1108\/14630010910985922"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"crossref","unstructured":"Wei Cao Yan Luo Yiming Dai Xin Wang Kaili Wu Huijuan Lin Kun Rui and Jixin Zhu. 2023. Piezoresistive pressure sensor based on a conductive 3D sponge network for motion sensing and human\u2013machine interface. ACS Applied Materials & Interfaces 15 2 (2023) 3131\u20133140.","DOI":"10.1021\/acsami.2c18203"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"crossref","unstructured":"Filippo Cavallo Raffaele Limosani Alessandro Manzi Manuele Bonaccorsi Raffaele Esposito Maurizio Di\u00a0Rocco Federico Pecora Giancarlo Teti Alessandro Saffiotti and Paolo Dario. 2014. Development of a socially believable multi-robot solution from town to home. Cognitive Computation 6 (2014) 954\u2013967.","DOI":"10.1007\/s12559-014-9290-z"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"crossref","unstructured":"Janis Chadsey and Steve Beyer. 2001. Social relationships in the workplace. Mental retardation and developmental disabilities research reviews 7 2 (2001) 128\u2013133.","DOI":"10.1002\/mrdd.1018"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"crossref","unstructured":"Georgia Chalvatzaki Ali Younes Daljeet Nandha An\u00a0Thai Le Leonardo\u00a0FR Ribeiro and Iryna Gurevych. 2023. Learning to reason over scene graphs: a case study of finetuning GPT-2 into a robot language model for grounded task planning. Frontiers in Robotics and AI 10 (2023) 1221739.","DOI":"10.3389\/frobt.2023.1221739"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.4324\/9780203726655-5"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161534"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.1145\/3586183.3606717"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00108"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"crossref","unstructured":"Mina Cikara Joel\u00a0E Martinez and Neil\u00a0A Lewis\u00a0Jr. 2022. Moving beyond social categories by incorporating context in social psychological theory. Nature Reviews Psychology 1 9 (2022) 537\u2013549.","DOI":"10.1038\/s44159-022-00079-3"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"crossref","unstructured":"Marcello Cirillo Lars Karlsson and Alessandro Saffiotti. 2010. Human-aware task planning: An application to mobile robots. ACM Transactions on Intelligent Systems and Technology (TIST) 1 2 (2010) 1\u201326.","DOI":"10.1145\/1869397.1869404"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"crossref","unstructured":"Alan Costall and Ann Richards. 2013. Canonical affordances: The psychology of everyday things. The Oxford handbook of the archaeology of the contemporary world (2013) 82\u201393.","DOI":"10.1093\/oxfordhb\/9780199602001.013.047"},{"key":"e_1_3_3_2_28_2","unstructured":"Murtaza Dalal Tarun Chiruvolu Devendra Chaplot and Ruslan Salakhutdinov. 2024. Plan-seq-learn: Language model guided rl for solving long horizon robotics tasks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2405.01534 (2024)."},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"crossref","unstructured":"Dayle David Pierre Th\u00e9rouanne and Isabelle Milhabet. 2022. The acceptability of social robots: A scoping review of the recent literature. Computers in Human Behavior 137 (2022) 107419.","DOI":"10.1016\/j.chb.2022.107419"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00182"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460902"},{"key":"e_1_3_3_2_32_2","unstructured":"Danny Driess Fei Xia Mehdi S.\u00a0M. Sajjadi Corey Lynch Aakanksha Chowdhery Brian Ichter Ayzaan Wahid Jonathan Tompson Quan Vuong Tianhe Yu Wenlong Huang Yevgen Chebotar Pierre Sermanet Daniel Duckworth Sergey Levine Vincent Vanhoucke Karol Hausman Marc Toussaint Klaus Greff Andy Zeng Igor Mordatch and Pete Florence. 2023. PaLM-E: An Embodied Multimodal Language Model. arxiv:https:\/\/arXiv.org\/abs\/2303.03378\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2303.03378"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491101.3519911"},{"key":"e_1_3_3_2_34_2","unstructured":"Wanyu Du Zae\u00a0Myung Kim Vipul Raheja Dhruv Kumar and Dongyeop Kang. 2022. Read revise repeat: A system demonstration for human-in-the-loop iterative text revision. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2204.03685 (2022)."},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"crossref","unstructured":"Andrew\u00a0P Duchon Leslie\u00a0Pack Kaelbling and William\u00a0H Warren. 1998. Ecological robotics. Adaptive Behavior 6 3-4 (1998) 473\u2013507.","DOI":"10.1177\/105971239800600306"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613905.3650752"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544549.3585871"},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"publisher","DOI":"10.1145\/2501988.2502032"},{"key":"e_1_3_3_2_39_2","unstructured":"Jensen Gao Bidipta Sarkar Fei Xia Ted Xiao Jiajun Wu Brian Ichter Anirudha Majumdar and Dorsa Sadigh. 2024. Physically Grounded Vision-Language Models for Robotic Manipulation. arxiv:https:\/\/arXiv.org\/abs\/2309.02561\u00a0[cs.RO] https:\/\/arxiv.org\/abs\/2309.02561"},{"key":"e_1_3_3_2_40_2","first-page":"1","volume-title":"2023 International Joint Conference on Neural Networks (IJCNN)","author":"Gao Wanting","year":"2023","unstructured":"Wanting Gao, Xinyi Gao, and Yin Tang. 2023. Multi-Turn Dialogue Agent as Sales\u2019 Assistant in Telemarketing. In 2023 International Joint Conference on Neural Networks (IJCNN). IEEE, 1\u20139."},{"key":"e_1_3_3_2_41_2","unstructured":"Arindam Ghosh Amartya Chakraborty Dhruv Chakraborty Mousumi Saha and Sujoy Saha. 2023. UltraSense: A non-intrusive approach for human activity identification using heterogeneous ultrasonic sensor grid for smart home environment. Journal of Ambient Intelligence and Humanized Computing (2023) 1\u201322."},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"crossref","unstructured":"James\u00a0J Gibson. 1977. The theory of affordances. Hilldale USA 1 2 (1977) 67\u201382.","DOI":"10.2307\/3171580"},{"key":"e_1_3_3_2_43_2","doi-asserted-by":"publisher","unstructured":"Sarah Gillet Marynel V\u00e1zquez Sean Andrist Iolanda Leite and Sarah Sebo. 2024. Interaction-Shaping Robotics: Robots That Influence Interactions between Other Agents. J. Hum.-Robot Interact. 13 1 Article 12 (March 2024) 23\u00a0pages. 10.1145\/3643803","DOI":"10.1145\/3643803"},{"key":"e_1_3_3_2_44_2","doi-asserted-by":"publisher","DOI":"10.1145\/3563657.3596032"},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10611090"},{"key":"e_1_3_3_2_46_2","doi-asserted-by":"publisher","DOI":"10.1145\/3319502.3374792"},{"key":"e_1_3_3_2_47_2","unstructured":"Dongge Han Trevor McInroe Adam Jelley Stefano\u00a0V Albrecht Peter Bell and Amos Storkey. 2024. LLM-Personalize: Aligning LLM Planners with Human Preferences via Reinforced Self-Training for Housekeeping Robots. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.14285 (2024)."},{"key":"e_1_3_3_2_48_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i18.29991"},{"key":"e_1_3_3_2_49_2","doi-asserted-by":"crossref","unstructured":"Marcel Heerink Ben Kr\u00f6se Vanessa Evers and Bob Wielinga. 2010. Assessing acceptance of assistive social agent technology by older adults: the almere model.","DOI":"10.1007\/s12369-010-0068-5"},{"key":"e_1_3_3_2_50_2","unstructured":"Yingdong Hu Fanqi Lin Tong Zhang Li Yi and Yang Gao. 2023. Look before you leap: Unveiling the power of gpt-4v in robotic vision-language planning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.17842 (2023)."},{"key":"e_1_3_3_2_51_2","unstructured":"Wenlong Huang Fei Xia Dhruv Shah Danny Driess Andy Zeng Yao Lu Pete Florence Igor Mordatch Sergey Levine Karol Hausman et\u00a0al. 2024. Grounded decoding: Guiding text generation with grounded models for embodied agents. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_2_52_2","doi-asserted-by":"publisher","DOI":"10.4324\/9781003135517-19"},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i7.16792"},{"key":"e_1_3_3_2_54_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-77385-4_41"},{"key":"e_1_3_3_2_55_2","doi-asserted-by":"publisher","DOI":"10.1145\/3586183.3606793"},{"key":"e_1_3_3_2_56_2","doi-asserted-by":"publisher","DOI":"10.1109\/VRW58643.2023.00240"},{"key":"e_1_3_3_2_57_2","unstructured":"Daniel Kahneman. 2011. Thinking Fast and Slow. Farrar Strauss and Giroux (2011)."},{"key":"e_1_3_3_2_58_2","doi-asserted-by":"publisher","DOI":"10.1145\/2207676.2208541"},{"key":"e_1_3_3_2_59_2","doi-asserted-by":"crossref","unstructured":"Laurent Karsenty and Val\u00e9rie Botherel. 2005. Transparency strategies to help users handle system errors. Speech Communication 45 3 (2005) 305\u2013324.","DOI":"10.1016\/j.specom.2004.10.018"},{"key":"e_1_3_3_2_60_2","unstructured":"Junghyun Kim Gi-Cheon Kang Jaein Kim Seoyun Yang Minjoon Jung and Byoung-Tak Zhang. 2023. PGA: Personalizing Grasping Agents with Single Human-Robot Interaction. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.12547 (2023)."},{"key":"e_1_3_3_2_61_2","first-page":"295","volume-title":"Proceedings of the AAAI Symposium Series","volume":"2","author":"Knowles Kobe","year":"2023","unstructured":"Kobe Knowles, Michael Witbrock, Gillian Dobbie, and Vithya Yogarajan. 2023. A Proposal for a Language Model Based Cognitive Architecture. In Proceedings of the AAAI Symposium Series , Vol.\u00a02. 295\u2013301."},{"key":"e_1_3_3_2_62_2","doi-asserted-by":"publisher","DOI":"10.1145\/3434074.3447138"},{"key":"e_1_3_3_2_63_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v28i1.9032"},{"key":"e_1_3_3_2_64_2","doi-asserted-by":"crossref","unstructured":"Ranjay Krishna Yuke Zhu Oliver Groth Justin Johnson Kenji Hata Joshua Kravitz Stephanie Chen Yannis Kalantidis Li-Jia Li David\u00a0A Shamma et\u00a0al. 2017. Visual genome: Connecting language and vision using crowdsourced dense image annotations. International journal of computer vision 123 (2017) 32\u201373.","DOI":"10.1007\/s11263-016-0981-7"},{"key":"e_1_3_3_2_65_2","doi-asserted-by":"publisher","DOI":"10.1145\/2157689.2157804"},{"key":"e_1_3_3_2_66_2","unstructured":"Bo Li Yuanhan Zhang Liangyu Chen Jinghao Wang Fanyi Pu Jingkang Yang Chunyuan Li and Ziwei Liu. 2023. MIMIC-IT: Multi-Modal In-Context Instruction Tuning. arxiv:https:\/\/arXiv.org\/abs\/2306.05425\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2306.05425"},{"key":"e_1_3_3_2_67_2","doi-asserted-by":"crossref","unstructured":"Chunyuan Li Zhe Gan Zhengyuan Yang Jianwei Yang Linjie Li Lijuan Wang Jianfeng Gao et\u00a0al. 2024. Multimodal foundation models: From specialists to general-purpose assistants. Foundations and Trends\u00ae in Computer Graphics and Vision 16 1-2 (2024) 1\u2013214.","DOI":"10.1561\/0600000110"},{"key":"e_1_3_3_2_68_2","unstructured":"Wenhao Li Zhiyuan Yu Qijin She Zhinan Yu Yuqing Lan Chenyang Zhu Ruizhen Hu and Kai Xu. 2024. LLM-enhanced Scene Graph Learning for Household Rearrangement. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2408.12093 (2024)."},{"key":"e_1_3_3_2_69_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01265"},{"key":"e_1_3_3_2_70_2","unstructured":"Xingxuan Li Ruochen Zhao Yew\u00a0Ken Chia Bosheng Ding Shafiq Joty Soujanya Poria and Lidong Bing. 2023. Chain-of-knowledge: Grounding large language models via dynamic knowledge adapting over heterogeneous sources. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.13269 (2023)."},{"key":"e_1_3_3_2_71_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3501992"},{"key":"e_1_3_3_2_72_2","unstructured":"Bill\u00a0Yuchen Lin Yicheng Fu Karina Yang Faeze Brahman Shiyu Huang Chandra Bhagavatula Prithviraj Ammanabrolu Yejin Choi and Xiang Ren. 2024. Swiftsage: A generative agent with fast and slow thinking for complex interactive tasks. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_2_73_2","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376614"},{"key":"e_1_3_3_2_74_2","unstructured":"Yuchen Liu Luigi Palmieri Sebastian Koch Ilche Georgievski and Marco Aiello. 2024. Towards Human Awareness in Robot Task Planning with Large Language Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.11267 (2024)."},{"key":"e_1_3_3_2_75_2","first-page":"79","volume-title":"International Conference on Cognitive Systems (CogSys)","author":"L\u00f6rken Christopher","year":"2008","unstructured":"Christopher L\u00f6rken and Joachim Hertzberg. 2008. Grounding planning operators by affordances. In International Conference on Cognitive Systems (CogSys). Citeseer, 79\u201384."},{"key":"e_1_3_3_2_76_2","doi-asserted-by":"crossref","unstructured":"Octavio Loyola-Gonzalez. 2019. Black-box vs. white-box: Understanding their advantages and weaknesses from a practical point of view. IEEE access 7 (2019) 154096\u2013154113.","DOI":"10.1109\/ACCESS.2019.2949286"},{"key":"e_1_3_3_2_77_2","doi-asserted-by":"crossref","unstructured":"Liangsheng Lu Wei Zhai Hongchen Luo Yu Kang and Yang Cao. 2022. Phrase-based affordance detection via cyclic bilateral interaction. IEEE Transactions on Artificial Intelligence 4 5 (2022) 1186\u20131198.","DOI":"10.1109\/TAI.2022.3199190"},{"key":"e_1_3_3_2_78_2","unstructured":"Tom McClelland. 2019. Representing our options: The perception of affordances for bodily and mental action. Journal of Consciousness Studies 26 3-4 (2019) 155\u2013180."},{"key":"e_1_3_3_2_79_2","unstructured":"Joseph\u00a0E Mercado Michael\u00a0A Rupp Jessie\u00a0YC Chen Daniel Barber Katelyn Procci and Michael Barnes. 2015. Effects of agent transparency on multi-robot management effectiveness. Aberdeen Proving Ground (MD): Army Research Laboratory (US) (2015)."},{"key":"e_1_3_3_2_80_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00100"},{"key":"e_1_3_3_2_81_2","unstructured":"Yao Mu Qinglong Zhang Mengkang Hu Wenhai Wang Mingyu Ding Jun Jin Bin Wang Jifeng Dai Yu Qiao and Ping Luo. 2024. Embodiedgpt: Vision-language pre-training via embodied chain of thought. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_2_82_2","doi-asserted-by":"crossref","unstructured":"Francisco Munguia-Galeano Satheeshkumar Veeramani Juan\u00a0David Hern\u00e1ndez Qingmeng Wen and Ze Ji. 2023. Affordance-based human\u2013robot interaction with reinforcement learning. IEEE Access 11 (2023) 31282\u201331292.","DOI":"10.1109\/ACCESS.2023.3262450"},{"key":"e_1_3_3_2_83_2","doi-asserted-by":"publisher","DOI":"10.1109\/HRI53351.2022.9889456"},{"key":"e_1_3_3_2_84_2","doi-asserted-by":"crossref","unstructured":"Manisha Natarajan Esmaeil Seraj Batuhan Altundas Rohan Paleja Sean Ye Letian Chen Reed Jensen Kimberlee\u00a0Chestnut Chang and Matthew Gombolay. 2023. Human-robot teaming: grand challenges. Current Robotics Reports 4 3 (2023) 81\u2013100.","DOI":"10.1007\/s43154-023-00103-1"},{"key":"e_1_3_3_2_85_2","doi-asserted-by":"publisher","DOI":"10.1093\/oxfordhb\/9780198735410.001.0001"},{"key":"e_1_3_3_2_86_2","unstructured":"Benjamin\u00a0A Newman Pranay Gupta Yonatan Bisk Kris Kitani Henny Admoni and Chris Paxton. 2024. Leveraging Vision and Language Models for Zero-Shot Personalization of Household Multi-Object Rearrangement Tasks. (2024)."},{"key":"e_1_3_3_2_87_2","doi-asserted-by":"crossref","unstructured":"Donald\u00a0A Norman. 1999. Affordance conventions and design. interactions 6 3 (1999) 38\u201343.","DOI":"10.1145\/301153.301168"},{"key":"e_1_3_3_2_88_2","doi-asserted-by":"crossref","unstructured":"Vasilios\u00a0A Orfanos Stavros\u00a0D Kaminaris Panagiotis Papageorgas Dimitrios Piromalis and Dionisis Kandris. 2023. A comprehensive review of IoT networking technologies for smart home automation applications. Journal of Sensor and Actuator Networks 12 2 (2023) 30.","DOI":"10.3390\/jsan12020030"},{"key":"e_1_3_3_2_89_2","doi-asserted-by":"publisher","DOI":"10.1109\/DEVLRN.2007.4354046"},{"key":"e_1_3_3_2_90_2","unstructured":"Maithili Patel and Sonia Chernova. 2024. Robot Behavior Personalization from Sparse User Feedback. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.19219 (2024)."},{"key":"e_1_3_3_2_91_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3501898"},{"key":"e_1_3_3_2_92_2","unstructured":"S\u00f6ren Pirk Karol Hausman Alexander Toshev and Mohi Khansari. 2020. Modeling Long-horizon Tasks as Sequential Interaction Landscapes. arxiv:https:\/\/arXiv.org\/abs\/2006.04843\u00a0[cs.RO] https:\/\/arxiv.org\/abs\/2006.04843"},{"key":"e_1_3_3_2_93_2","unstructured":"Jielin Qiu Andrea Madotto Zhaojiang Lin Paul\u00a0A Crook Yifan\u00a0Ethan Xu Xin\u00a0Luna Dong Christos Faloutsos Lei Li Babak Damavandi and Seungwhan Moon. 2024. Snapntell: Enhancing entity-centric visual question answering with retrieval augmented multimodal llm. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.04735 (2024)."},{"key":"e_1_3_3_2_94_2","doi-asserted-by":"crossref","unstructured":"Valentin Radu Catherine Tong Sourav Bhattacharya Nicholas\u00a0D Lane Cecilia Mascolo Mahesh\u00a0K Marina and Fahim Kawsar. 2018. Multimodal deep learning for activity and context recognition. Proceedings of the ACM on interactive mobile wearable and ubiquitous technologies 1 4 (2018) 1\u201327.","DOI":"10.1145\/3161174"},{"key":"e_1_3_3_2_95_2","doi-asserted-by":"publisher","unstructured":"Krishan Rana Jesse Haviland Sourav Garg Jad Abou-Chakra Ian\u00a0D. Reid and Niko S\u00fcnderhauf. 2023. SayPlan: Grounding Large Language Models using 3D Scene Graphs for Scalable Task Planning. CoRR abs\/2307.06135 (2023). 10.48550\/arXiv.2307.06135","DOI":"10.48550\/arXiv.2307.06135"},{"key":"e_1_3_3_2_96_2","doi-asserted-by":"crossref","unstructured":"Erik Rietveld Damiaan Denys and Maarten Van\u00a0Westen. 2018. Ecological-enactive cognition as engaging with a field of relevant affordances. The Oxford handbook of 4E cognition 41 (2018) 70.","DOI":"10.1093\/oxfordhb\/9780198735410.013.3"},{"key":"e_1_3_3_2_97_2","doi-asserted-by":"crossref","unstructured":"Lionel\u00a0P Robert\u00a0Jr Rasha Alahmad Connor Esterwood Sangmi Kim Sangseok You Qiaoning Zhang et\u00a0al. 2020. A review of personality in human\u2013robot interactions. Foundations and Trends\u00ae in Information Systems 4 2 (2020) 107\u2013212.","DOI":"10.1561\/2900000018"},{"key":"e_1_3_3_2_98_2","doi-asserted-by":"crossref","unstructured":"Erol \u015eahin Maya Cakmak Mehmet\u00a0R Do\u011far Emre U\u011fur and G\u00f6kt\u00fcrk \u00dc\u00e7oluk. 2007. To afford or not to afford: A new formalization of affordances toward affordance-based robot control. Adaptive Behavior 15 4 (2007) 447\u2013472.","DOI":"10.1177\/1059712307084689"},{"key":"e_1_3_3_2_99_2","doi-asserted-by":"crossref","unstructured":"Joe Saunders Dag\u00a0Sverre Syrdal Kheng\u00a0Lee Koay Nathan Burke and Kerstin Dautenhahn. 2015. \u201cteach me\u2013show me\u201d\u2014end-user personalization of a smart home and companion robot. IEEE Transactions on Human-Machine Systems 46 1 (2015) 27\u201340.","DOI":"10.1109\/THMS.2015.2445105"},{"key":"e_1_3_3_2_100_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICOEI.2019.8862778"},{"key":"e_1_3_3_2_101_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610216"},{"key":"e_1_3_3_2_102_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR56361.2022.9956224"},{"key":"e_1_3_3_2_103_2","doi-asserted-by":"crossref","unstructured":"Bruce Sherin. 2006. Common sense clarified: The role of intuitive knowledge in physics problem solving. Journal of Research in Science Teaching: The Official Journal of the National Association for Research in Science Teaching 43 6 (2006) 535\u2013555.","DOI":"10.1002\/tea.20136"},{"key":"e_1_3_3_2_104_2","doi-asserted-by":"crossref","unstructured":"John Shotter. 1983. Duality of structure\u201d and \u201cintentionality\u201d in an ecological psychology. Journal for the Theory of Social Behaviour 13 1 (1983) 19\u201344.","DOI":"10.1111\/j.1468-5914.1983.tb00460.x"},{"key":"e_1_3_3_2_105_2","unstructured":"Austin Stone Ted Xiao Yao Lu Keerthana Gopalakrishnan Kuang-Huei Lee Quan Vuong Paul Wohlhart Sean Kirmani Brianna Zitkovich Fei Xia et\u00a0al. 2023. Open-world object manipulation using pre-trained vision-language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.00905 (2023)."},{"key":"e_1_3_3_2_106_2","doi-asserted-by":"crossref","unstructured":"Kimberly Stowers Nicholas Kasdaglis Michael\u00a0A Rupp Olivia\u00a0B Newton Jessie\u00a0YC Chen and Michael\u00a0J Barnes. 2020. The IMPACT of agent transparency on human performance. IEEE Transactions on Human-Machine Systems 50 3 (2020) 245\u2013253.","DOI":"10.1109\/THMS.2020.2978041"},{"key":"e_1_3_3_2_107_2","unstructured":"Theodore\u00a0R Sumers Shunyu Yao Karthik Narasimhan and Thomas\u00a0L Griffiths. 2023. Cognitive architectures for language agents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2309.02427 (2023)."},{"key":"e_1_3_3_2_108_2","unstructured":"Ron Sun. 2024. Can A Cognitive Architecture Fundamentally Enhance LLMs? Or Vice Versa? arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.10444 (2024)."},{"key":"e_1_3_3_2_109_2","doi-asserted-by":"publisher","DOI":"10.1109\/ROMAN.2007.4415252"},{"key":"e_1_3_3_2_110_2","doi-asserted-by":"crossref","unstructured":"Sam Thellman and Tom Ziemke. 2021. The perceptual belief problem: Why explainability is a tough challenge in social robotics. ACM Transactions on Human-Robot Interaction (THRI) 10 3 (2021) 1\u201315.","DOI":"10.1145\/3461781"},{"key":"e_1_3_3_2_111_2","unstructured":"Ludger Van\u00a0Dijk and Erik Rietveld. 2020. Situated imagination. Phenomenology and the Cognitive Sciences (2020) 1\u201323."},{"key":"e_1_3_3_2_112_2","doi-asserted-by":"crossref","unstructured":"Denny Vrande\u010di\u0107 and Markus Kr\u00f6tzsch. 2014. Wikidata: a free collaborative knowledgebase. Commun. ACM 57 10 (2014) 78\u201385.","DOI":"10.1145\/2629489"},{"key":"e_1_3_3_2_113_2","unstructured":"Tu Vu Mohit Iyyer Xuezhi Wang Noah Constant Jerry Wei Jason Wei Chris Tar Yun-Hsuan Sung Denny Zhou Quoc Le et\u00a0al. 2023. Freshllms: Refreshing large language models with search engine augmentation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.03214 (2023)."},{"key":"e_1_3_3_2_114_2","doi-asserted-by":"crossref","unstructured":"Peter\u00a0C Wason and J\u00a0St\u00a0BT Evans. 1974. Dual processes in reasoning? Cognition 3 2 (1974) 141\u2013154.","DOI":"10.1016\/0010-0277(74)90017-1"},{"key":"e_1_3_3_2_115_2","doi-asserted-by":"crossref","unstructured":"Elke\u00a0U Weber Sara\u00a0M Constantino and Maja Schl\u00fcter. 2023. Embedding cognition: judgment and choice in an interdependent and dynamic world. Current Directions in Psychological Science 32 4 (2023) 328\u2013336.","DOI":"10.1177\/09637214231159282"},{"key":"e_1_3_3_2_116_2","doi-asserted-by":"crossref","unstructured":"Julia\u00a0L Wright Jessie\u00a0YC Chen and Shan\u00a0G Lakhmani. 2019. Agent transparency and reliability in human\u2013robot interaction: The influence on user confidence and perceived reliability. IEEE Transactions on Human-Machine Systems 50 3 (2019) 254\u2013263.","DOI":"10.1109\/THMS.2019.2925717"},{"key":"e_1_3_3_2_117_2","doi-asserted-by":"crossref","unstructured":"Jimmy Wu Rika Antonova Adam Kan Marion Lepert Andy Zeng Shuran Song Jeannette Bohg Szymon Rusinkiewicz and Thomas Funkhouser. 2023. Tidybot: Personalized robot assistance with large language models. Autonomous Robots 47 8 (2023) 1087\u20131102.","DOI":"10.1007\/s10514-023-10139-z"},{"key":"e_1_3_3_2_118_2","doi-asserted-by":"publisher","DOI":"10.1145\/3334480.3383170"},{"key":"e_1_3_3_2_119_2","volume-title":"RSS 2023 Workshop on Learning for Task and Motion Planning","author":"Wu Zhanxin","year":"2023","unstructured":"Zhanxin Wu, Bo Ai, and David Hsu. 2023. Integrating Common Sense and Planning with Large Language Models for Room Tidying. In RSS 2023 Workshop on Learning for Task and Motion Planning."},{"key":"e_1_3_3_2_120_2","unstructured":"Zhiheng Xi Wenxiang Chen Xin Guo Wei He Yiwen Ding Boyang Hong Ming Zhang Junzhe Wang Senjie Jin Enyu Zhou et\u00a0al. 2023. The rise and potential of large language model based agents: A survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2309.07864 (2023)."},{"key":"e_1_3_3_2_121_2","doi-asserted-by":"publisher","DOI":"10.5954\/ICAROB.2024.OS15-4"},{"key":"e_1_3_3_2_122_2","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3173652"},{"key":"e_1_3_3_2_123_2","doi-asserted-by":"crossref","unstructured":"Jing Yang Nade Liang Brandon\u00a0J Pitts Kwaku\u00a0O Prakah-Asante Reates Curry Mike Blommer Radhakrishnan Swaminathan and Denny Yu. 2023. Multimodal sensing and computational intelligence for situation awareness classification in autonomous driving. IEEE Transactions on Human-Machine Systems 53 2 (2023) 270\u2013281.","DOI":"10.1109\/THMS.2023.3234429"},{"key":"e_1_3_3_2_124_2","unstructured":"Yuhang Zang Wei Li Jun Han Kaiyang Zhou and Chen\u00a0Change Loy. 2024. Contextual object detection with multimodal large language models. International Journal of Computer Vision (2024) 1\u201319."},{"key":"e_1_3_3_2_125_2","doi-asserted-by":"crossref","unstructured":"Hongbo Zhang Junying Chen Feng Jiang Fei Yu Zhihong Chen Jianquan Li Guiming Chen Xiangbo Wu Zhiyi Zhang Qingying Xiao et\u00a0al. 2023. Huatuogpt towards taming language model to be a doctor. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.15075 (2023).","DOI":"10.18653\/v1\/2023.findings-emnlp.725"},{"key":"e_1_3_3_2_126_2","unstructured":"Yi-Fan Zhang Huanyu Zhang Haochen Tian Chaoyou Fu Shuangqing Zhang Junfei Wu Feng Li Kun Wang Qingsong Wen Zhang Zhang Liang Wang Rong Jin and Tieniu Tan. 2024. MME-RealWorld: Could Your Multimodal LLM Challenge High-Resolution Real-World Scenarios that are Difficult for Humans? arxiv:https:\/\/arXiv.org\/abs\/2408.13257\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2408.13257"},{"key":"e_1_3_3_2_127_2","unstructured":"Wangchunshu Zhou Yuchen\u00a0Eleanor Jiang Long Li Jialong Wu Tiannan Wang Shi Qiu Jintian Zhang Jing Chen Ruipu Wu Shuai Wang et\u00a0al. 2023. Agents: An open-source framework for autonomous language agents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2309.07870 (2023)."},{"key":"e_1_3_3_2_128_2","unstructured":"Yuqi Zhu Shuofei Qiao Yixin Ou Shumin Deng Ningyu Zhang Shiwei Lyu Yue Shen Lei Liang Jinjie Gu and Huajun Chen. 2024. Knowagent: Knowledge-augmented planning for llm-based agents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.03101 (2024)."},{"key":"e_1_3_3_2_129_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561548"}],"event":{"name":"CHI 2025: CHI Conference on Human Factors in Computing Systems","location":"Yokohama Japan","acronym":"CHI '25","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 2025 CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3706598.3713791","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3706598.3713791","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:49Z","timestamp":1750295929000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3706598.3713791"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,25]]},"references-count":128,"alternative-id":["10.1145\/3706598.3713791","10.1145\/3706598"],"URL":"https:\/\/doi.org\/10.1145\/3706598.3713791","relation":{},"subject":[],"published":{"date-parts":[[2025,4,25]]},"assertion":[{"value":"2025-04-25","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}