{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T11:20:31Z","timestamp":1773141631092,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":63,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,3,11]],"date-time":"2024-03-11T00:00:00Z","timestamp":1710115200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,3,11]]},"DOI":"10.1145\/3610977.3634930","type":"proceedings-article","created":{"date-parts":[[2024,3,10]],"date-time":"2024-03-10T00:19:00Z","timestamp":1710029940000},"page":"572-581","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Preference-Conditioned Language-Guided Abstraction"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8136-6175","authenticated-orcid":false,"given":"Andi","family":"Peng","sequence":"first","affiliation":[{"name":"MIT, Cambridge, MA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9507-7427","authenticated-orcid":false,"given":"Andreea","family":"Bobu","sequence":"additional","affiliation":[{"name":"Boston Dynamics AI Institute, Cambridge, MA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9961-180X","authenticated-orcid":false,"given":"Belinda Z.","family":"Li","sequence":"additional","affiliation":[{"name":"MIT, Cambridge, MA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6128-0291","authenticated-orcid":false,"given":"Theodore R.","family":"Sumers","sequence":"additional","affiliation":[{"name":"Princeton, Princeton, NJ, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4121-7479","authenticated-orcid":false,"given":"Ilia","family":"Sucholutsky","sequence":"additional","affiliation":[{"name":"Princeton, Princeton, NJ, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9291-3728","authenticated-orcid":false,"given":"Nishanth","family":"Kumar","sequence":"additional","affiliation":[{"name":"MIT, Cambridge, MA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5138-7255","authenticated-orcid":false,"given":"Thomas L.","family":"Griffiths","sequence":"additional","affiliation":[{"name":"Princeton, Princeton, NJ, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1338-8107","authenticated-orcid":false,"given":"Julie A.","family":"Shah","sequence":"additional","affiliation":[{"name":"MIT, Cambridge, MA, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,3,11]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Agent-agnostic human-in-the-loop reinforcement learning. arXiv preprint arXiv:1701.04079","author":"Abel David","year":"2017","unstructured":"David Abel, John Salvatier, Andreas Stuhlm\u00fcller, and Owain Evans. 2017. Agent-agnostic human-in-the-loop reinforcement learning. arXiv preprint arXiv:1701.04079 (2017)."},{"key":"e_1_3_2_2_2_1","unstructured":"Gati Aher Rosa I. Arriaga and Adam Tauman Kalai. 2023. Using Large Language Models to Simulate Multiple Humans and Replicate Human Subject Studies. arXiv:2208.10264 [cs.CL]"},{"key":"e_1_3_2_2_3_1","unstructured":"Michael Ahn Anthony Brohan Noah Brown Yevgen Chebotar Omar Cortes Byron David Chelsea Finn Keerthana Gopalakrishnan Karol Hausman Alex Herzog et al. 2022. Do as i can not as i say: Grounding language in robotic affordances. arXiv preprint arXiv:2204.01691 (2022)."},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1017\/pan.2023.2"},{"key":"e_1_3_2_2_5_1","volume-title":"Training a Helpful and Harmless Assistant with Reinforcement Learning from Human Feedback. ArXiv abs\/2204.05862","author":"Bai Yuntao","year":"2022","unstructured":"Yuntao Bai, Andy Jones, Kamal Ndousse, Amanda Askell, Anna Chen, Nova DasSarma, Dawn Drain, Stanislav Fort, Deep Ganguli, T. J. Henighan, Nicholas Joseph, Saurav Kadavath, John Kernion, Tom Conerly, Sheer El-Showk, Nelson Elhage, Zac Hatfield-Dodds, Danny Hernandez, Tristan Hume, Scott Johnston, Shauna Kravec, Liane Lovitt, Neel Nanda, Catherine Olsson, Dario Amodei, Tom B. Brown, Jack Clark, Sam McCandlish, Christopher Olah, Benjamin Mann, and Jared Kaplan. 2022. Training a Helpful and Harmless Assistant with Reinforcement Learning from Human Feedback. ArXiv abs\/2204.05862 (2022). https:\/\/api. semanticscholar.org\/CorpusID:248118878"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3196164"},{"key":"e_1_3_2_2_7_1","volume-title":"Aligning Robot and Human Representations. arXiv preprint arXiv:2302.01928","author":"Bobu Andreea","year":"2023","unstructured":"Andreea Bobu, Andi Peng, Pulkit Agrawal, Julie Shah, and Anca D Dragan. 2023. Aligning Robot and Human Representations. arXiv preprint arXiv:2302.01928 (2023)."},{"key":"e_1_3_2_2_8_1","volume-title":"Conference on robot learning. PMLR, 330--359","author":"Brown Daniel S","year":"2020","unstructured":"Daniel S Brown, Wonjoon Goo, and Scott Niekum. 2020. Better-than- demonstrator imitation learning via automatically-ranked demonstrations. In Conference on robot learning. PMLR, 330--359."},{"key":"e_1_3_2_2_9_1","volume-title":"Lin (Eds.)","volume":"33","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, Sandhini Agarwal, Ariel Herbert-Voss, Gretchen Krueger, Tom Henighan, Rewon Child, Aditya Ramesh, Daniel Ziegler, Jeffrey Wu, Clemens Winter, Chris Hesse, Mark Chen, Eric Sigler, Mateusz Litwin, Scott Gray, Benjamin Chess, Jack Clark, Christopher Berner, Sam McCandlish, Alec Radford, Ilya Sutskever, and Dario Amodei. 2020. Language Models are Few-Shot Learners. In Advances in Neural Information Processing Systems, H. Larochelle, M. Ran- zato, R. Hadsell, M.F. Balcan, and H. Lin (Eds.), Vol. 33. Curran Associates, Inc., 1877--1901. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/file\/ 1457c0d6bfcb4967418bfb8ac142f64a-Paper.pdf"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8461012"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/2157689.2157693"},{"key":"e_1_3_2_2_12_1","volume-title":"2010 5th ACM\/IEEE International Conference on Human- Robot Interaction (HRI). IEEE, 317--324","author":"Chao Crystal","year":"2010","unstructured":"Crystal Chao, Maya Cakmak, and Andrea L Thomaz. 2010. Transparent active learning for robots. In 2010 5th ACM\/IEEE International Conference on Human- Robot Interaction (HRI). IEEE, 317--324."},{"key":"e_1_3_2_2_13_1","volume-title":"Deep reinforcement learning from human preferences. Advances in neural information processing systems 30","author":"Christiano Paul F","year":"2017","unstructured":"Paul F Christiano, Jan Leike, Tom Brown, Miljan Martic, Shane Legg, and Dario Amodei. 2017. Deep reinforcement learning from human preferences. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_2_14_1","volume-title":"Garnett (Eds.)","volume":"30","author":"Christiano Paul F","year":"2017","unstructured":"Paul F Christiano, Jan Leike, Tom Brown, Miljan Martic, Shane Legg, and Dario Amodei. 2017. Deep Reinforcement Learning from Human Preferences. In Ad- vances in Neural Information Processing Systems, I. Guyon, U. Von Luxburg, S. Ben- gio, H. Wallach, R. Fergus, S. Vishwanathan, and R. Garnett (Eds.), Vol. 30. Cur- ran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2017\/file\/ d5e2c0adad503c91f91df240d0cd4e49-Paper.pdf"},{"key":"e_1_3_2_2_15_1","unstructured":"Hyung Won Chung Le Hou Shayne Longpre Barret Zoph Yi Tay William Fedus Yunxuan Li Xuezhi Wang Mostafa Dehghani Siddhartha Brahma Albert Web- son Shixiang Shane Gu Zhuyun Dai Mirac Suzgun Xinyun Chen Aakanksha Chowdhery Alex Castro-Ros Marie Pellat Kevin Robinson Dasha Valter Sharan Narang Gaurav Mishra Adams Yu Vincent Zhao Yanping Huang Andrew Dai Hongkun Yu Slav Petrov Ed H. Chi Jeff Dean Jacob Devlin Adam Roberts Denny Zhou Quoc V. Le and Jason Wei. 2022. Scaling Instruction-Finetuned Language Models. arXiv:2210.11416 [cs.LG]"},{"key":"e_1_3_2_2_16_1","volume-title":"Guiding policies with language via meta-learning. arXiv preprint arXiv:1811.07882","author":"Co-Reyes John D","year":"2018","unstructured":"John D Co-Reyes, Abhishek Gupta, Suvansh Sanjeev, Nick Altieri, Jacob Andreas, John DeNero, Pieter Abbeel, and Sergey Levine. 2018. Guiding policies with language via meta-learning. arXiv preprint arXiv:1811.07882 (2018)."},{"key":"e_1_3_2_2_17_1","volume-title":"Humans decompose tasks by trading off utility and computational cost. arXiv preprint arXiv:2211.03890","author":"Correa Carlos G","year":"2022","unstructured":"Carlos G Correa, Mark K Ho, Frederick Callaway, Nathaniel D Daw, and Thomas L Griffiths. 2022. Humans decompose tasks by trading off utility and computational cost. arXiv preprint arXiv:2211.03890 (2022)."},{"key":"e_1_3_2_2_18_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.tics.2023.04.008"},{"key":"e_1_3_2_2_20_1","unstructured":"Deep Ganguli Liane Lovitt Jackson Kernion Amanda Askell Yuntao Bai Saurav Kadavath Ben Mann Ethan Perez Nicholas Schiefer Kamal Ndousse Andy Jones Sam Bowman Anna Chen Tom Conerly Nova DasSarma Dawn Drain Nelson Elhage Sheer El-Showk Stanislav Fort Zac Hatfield-Dodds Tom Henighan Danny Hernandez Tristan Hume Josh Jacobson Scott Johnston Shauna Kravec Catherine Olsson Sam Ringer Eli Tran-Johnson Dario Amodei Tom Brown Nicholas Joseph Sam McCandlish Chris Olah Jared Kaplan and Jack Clark. 2022. Red Teaming Language Models to Reduce Harms: Methods Scaling Behaviors and Lessons Learned. arXiv:2209.07858 [cs.CL]"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0166-"},{"key":"e_1_3_2_2_22_1","volume-title":"People construct simplified mental representations to plan. Nature 606, 7912","author":"Ho Mark K","year":"2022","unstructured":"Mark K Ho, David Abel, Carlos G Correa, Michael L Littman, Jonathan D Cohen, and Thomas L Griffiths. 2022. People construct simplified mental representations to plan. Nature 606, 7912 (2022), 129--136."},{"key":"e_1_3_2_2_23_1","unstructured":"Mark K Ho Jonathan D Cohen and Tom Griffiths. 2023. Rational simplification and rigidity in human planning. (2023)."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.806"},{"key":"e_1_3_2_2_25_1","volume-title":"Lan- guage Models as Zero-Shot Planners: Extracting Actionable Knowledge for Em- bodied Agents. arXiv preprint arXiv:2201.07207","author":"Huang Wenlong","year":"2022","unstructured":"Wenlong Huang, Pieter Abbeel, Deepak Pathak, and Igor Mordatch. 2022. Lan- guage Models as Zero-Shot Planners: Extracting Actionable Knowledge for Em- bodied Agents. arXiv preprint arXiv:2201.07207 (2022)."},{"key":"e_1_3_2_2_26_1","volume-title":"et al","author":"Huang Wenlong","year":"2022","unstructured":"Wenlong Huang, Fei Xia, Ted Xiao, Harris Chan, Jacky Liang, Pete Florence, Andy Zeng, Jonathan Tompson, Igor Mordatch, Yevgen Chebotar, et al . 2022. Inner monologue: Embodied reasoning through planning with language models. arXiv preprint arXiv:2207.05608 (2022)."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cognition.2023.105414"},{"key":"e_1_3_2_2_28_1","volume-title":"Lin (Eds.)","volume":"33","author":"Jeon Hong Jun","year":"2020","unstructured":"Hong Jun Jeon, Smitha Milli, and Anca Dragan. 2020. Reward-rational (implicit) choice: A unifying formalism for reward learning. In Advances in Neural Informa- tion Processing Systems, H. Larochelle, M. Ranzato, R. Hadsell, M.F. Balcan, and H. Lin (Eds.), Vol. 33. Curran Associates, Inc., 4415--4426. https:\/\/proceedings. neurips.cc\/paper\/2020\/file\/2f10c1578a0706e06b6d7db6f0b4a6af-Paper.pdf"},{"key":"e_1_3_2_2_29_1","volume-title":"GenRec: Large Language Model for Generative Recommendation. ArXiv abs\/2307.00457","author":"Ji Jianchao","year":"2023","unstructured":"Jianchao Ji, Zelong Li, Shuyuan Xu, Wenyue Hua, Yingqiang Ge, Juntao Tan, and Yongfeng Zhang. 2023. GenRec: Large Language Model for Generative Recommendation. ArXiv abs\/2307.00457 (2023). https:\/\/api.semanticscholar.org\/ CorpusID:259332879"},{"key":"e_1_3_2_2_30_1","volume-title":"Vima: General robot manipulation with multimodal prompts. arXiv preprint arXiv:2210.03094","author":"Jiang Yunfan","year":"2022","unstructured":"Yunfan Jiang, Agrim Gupta, Zichen Zhang, Guanzhi Wang, Yongqiang Dou, Yanjun Chen, Li Fei-Fei, Anima Anandkumar, Yuke Zhu, and Linxi Fan. 2022. Vima: General robot manipulation with multimodal prompts. arXiv preprint arXiv:2210.03094 (2022)."},{"key":"e_1_3_2_2_31_1","volume-title":"et al","author":"Kirillov Alexander","year":"2023","unstructured":"Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alexander C Berg, Wan-Yen Lo, et al . 2023. Segment anything. arXiv preprint arXiv:2304.02643 (2023)."},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/DEVLRN.2008.4640845"},{"key":"e_1_3_2_2_33_1","volume-title":"Kalesha Bullard, and Dorsa Sadigh.","author":"Kwon Minae","year":"2023","unstructured":"Minae Kwon, Sang Michael Xie, Kalesha Bullard, and Dorsa Sadigh. 2023. Reward design with language models. arXiv preprint arXiv:2303.00001 (2023)."},{"key":"e_1_3_2_2_34_1","unstructured":"Belinda Z. Li William Chen Pratyusha Sharma and Jacob Andreas. 2023. LaMPP: Language Models as Probabilistic Priors for Perception and Action. arXiv:2302.02801 [cs.LG]"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16--1094"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.585"},{"key":"e_1_3_2_2_37_1","volume-title":"LLM- Rec: Personalized Recommendation via Prompting Large Language Models. ArXiv abs\/2307.15780","author":"Lyu Hanjia","year":"2023","unstructured":"Hanjia Lyu, Song Jiang, Hanqing Zeng, Yinglong Xia, and Jiebo Luo. 2023. LLM- Rec: Personalized Recommendation via Prompting Large Language Models. ArXiv abs\/2307.15780 (2023). https:\/\/api.semanticscholar.org\/CorpusID:260334587 HRI '24, March 11--14, 2024, Boulder, CO, USA Peng et al."},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462828"},{"key":"e_1_3_2_2_39_1","volume-title":"International Conference on Machine Learning. PMLR, 2285--2294","author":"MacGlashan James","year":"2017","unstructured":"James MacGlashan, Mark K Ho, Robert Loftin, Bei Peng, Guan Wang, David L Roberts, Matthew E Taylor, and Michael L Littman. 2017. Interactive learning from policy-dependent human feedback. In International Conference on Machine Learning. PMLR, 2285--2294."},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-short.100"},{"key":"e_1_3_2_2_41_1","unstructured":"OpenAI. 2023. GPT-4 Technical Report. arXiv:2303.08774 [cs.CL]"},{"key":"e_1_3_2_2_42_1","volume-title":"Oh (Eds.)","volume":"35","author":"Ouyang Long","year":"2022","unstructured":"Long Ouyang, Jeffrey Wu, Xu Jiang, Diogo Almeida, Carroll Wainwright, Pamela Mishkin, Chong Zhang, Sandhini Agarwal, Katarina Slama, Alex Ray, John Schulman, Jacob Hilton, Fraser Kelton, Luke Miller, Maddie Simens, Amanda Askell, Peter Welinder, Paul F Christiano, Jan Leike, and Ryan Lowe. 2022. Training language models to follow instructions with human feedback. In Advances in Neural Information Processing Systems, S. Koyejo, S. Mohamed, A. Agarwal, D. Belgrave, K. Cho, and A. Oh (Eds.), Vol. 35. Curran Associates, Inc., 27730--27744. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2022\/file\/ b1efde53be364a73914f58805a001731-Paper-Conference.pdf"},{"key":"e_1_3_2_2_43_1","volume-title":"Feedback, Adaptation: A Human-in-the- Loop Framework for Test-Time Policy Adaptation.","author":"Peng Andi","year":"2023","unstructured":"Andi Peng, Aviv Netanyahu, Mark K Ho, Tianmin Shu, Andreea Bobu, Julie Shah, and Pulkit Agrawal. 2023. Diagnosis, Feedback, Adaptation: A Human-in-the- Loop Framework for Test-Time Policy Adaptation. (2023)."},{"key":"e_1_3_2_2_44_1","unstructured":"Andi Peng Ilia Sucholutsky Belinda Li Theodore Sumers Thomas Griffiths Jacob Andreas and Julie Shah. [n. d.]. Learning with Language-Guided State Abstractions. ([n. d.])."},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/595"},{"key":"e_1_3_2_2_46_1","volume-title":"Liu","author":"Raffel Colin","year":"2019","unstructured":"Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, and Peter J. Liu. 2019. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. arXiv e-prints (2019). arXiv:1910.10683"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2304.10782"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.120"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/721"},{"key":"e_1_3_2_2_50_1","volume-title":"Lin (Eds.)","volume":"33","author":"Stiennon Nisan","year":"2020","unstructured":"Nisan Stiennon, Long Ouyang, Jeffrey Wu, Daniel Ziegler, Ryan Lowe, Chelsea Voss, Alec Radford, Dario Amodei, and Paul F Christiano. 2020. Learning to summarize with human feedback. In Advances in Neural Information Processing Systems, H. Larochelle, M. Ranzato, R. Hadsell, M.F. Balcan, and H. Lin (Eds.), Vol. 33. Curran Associates, Inc., 3008--3021. https:\/\/proceedings.neurips.cc\/ paper_files\/paper\/2020\/file\/1f89885d556929e98d3ef9b86448f951-Paper.pdf"},{"key":"e_1_3_2_2_51_1","volume-title":"Voyager: An open-ended embodied agent with large language models. arXiv preprint arXiv:2305.16291","author":"Wang Guanzhi","year":"2023","unstructured":"Guanzhi Wang, Yuqi Xie, Yunfan Jiang, Ajay Mandlekar, Chaowei Xiao, Yuke Zhu, Linxi Fan, and Anima Anandkumar. 2023. Voyager: An open-ended embodied agent with large language models. arXiv preprint arXiv:2305.16291 (2023)."},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539382"},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.754"},{"key":"e_1_3_2_2_54_1","volume-title":"TidyBot: Personalized Robot Assistance with Large Language Models. Autonomous Robots","author":"Wu Jimmy","year":"2023","unstructured":"Jimmy Wu, Rika Antonova, Adam Kan, Marion Lepert, Andy Zeng, Shuran Song, Jeannette Bohg, Szymon Rusinkiewicz, and Thomas Funkhouser. 2023. TidyBot: Personalized Robot Assistance with Large Language Models. Autonomous Robots (2023)."},{"key":"e_1_3_2_2_55_1","volume-title":"A Survey on Large Language Models for Recommendation. ArXiv abs\/2305.19860","author":"Wu Likang","year":"2023","unstructured":"Likang Wu, Zhilan Zheng, Zhaopeng Qiu, Hao Wang, Hongchao Gu, Tingjia Shen, Chuan Qin, Chen Zhu, Hengshu Zhu, Qi Liu, Hui Xiong, and Enhong Chen. 2023. A Survey on Large Language Models for Recommendation. ArXiv abs\/2305.19860 (2023). https:\/\/api.semanticscholar.org\/CorpusID:258987581"},{"key":"e_1_3_2_2_56_1","volume-title":"Socratic Models: Composing Zero-Shot Multimodal Reasoning with Language. In The Eleventh International Conference on Learning Representations. https:\/\/openreview. net\/forum?id=G2Q2Mh3avow","author":"Zeng Andy","year":"2023","unstructured":"Andy Zeng, Maria Attarian, brian ichter, Krzysztof Marcin Choromanski, Adrian Wong, Stefan Welker, Federico Tombari, Aveek Purohit, Michael S Ryoo, Vikas Sindhwani, Johnny Lee, Vincent Vanhoucke, and Pete Florence. 2023. Socratic Models: Composing Zero-Shot Multimodal Reasoning with Language. In The Eleventh International Conference on Learning Representations. https:\/\/openreview. net\/forum?id=G2Q2Mh3avow"},{"key":"e_1_3_2_2_57_1","volume-title":"Leveraging human guidance for deep reinforcement learning tasks. arXiv preprint arXiv:1909.09906","author":"Zhang Ruohan","year":"2019","unstructured":"Ruohan Zhang, Faraz Torabi, Lin Guan, Dana H Ballard, and Peter Stone. 2019. Leveraging human guidance for deep reinforcement learning tasks. arXiv preprint arXiv:1909.09906 (2019)."},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18--1205"},{"key":"e_1_3_2_2_59_1","unstructured":"Shengyu Zhang Linfeng Dong Xiaoya Li Sen Zhang Xiaofei Sun Shuhe Wang Jiwei Li Runyi Hu Tianwei Zhang Fei Wu and Guoyin Wang. 2023. Instruction Tuning for Large Language Models: A Survey. arXiv:2308.10792 [cs.CL]"},{"key":"e_1_3_2_2_60_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.426"},{"key":"e_1_3_2_2_61_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20077-9_21"},{"key":"e_1_3_2_2_62_1","volume-title":"Evaluating Commonsense in Pre-trained Language Models. In AAAI Conference on Artificial Intelligence. https:\/\/api.semanticscholar.org\/CorpusID:208310123","author":"Zhou Xuhui","year":"2019","unstructured":"Xuhui Zhou, Yue Zhang, Leyang Cui, and Dandan Huang. 2019. Evaluating Commonsense in Pre-trained Language Models. In AAAI Conference on Artificial Intelligence. https:\/\/api.semanticscholar.org\/CorpusID:208310123"},{"key":"e_1_3_2_2_63_1","unstructured":"Daniel M. Ziegler Nisan Stiennon Jeffrey Wu Tom B. Brown Alec Radford Dario Amodei Paul Christiano and Geoffrey Irving. 2020. Fine-Tuning Language Models from Human Preferences. arXiv:1909.08593 [cs.CL]"}],"event":{"name":"HRI '24: ACM\/IEEE International Conference on Human-Robot Interaction","location":"Boulder CO USA","acronym":"HRI '24","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence","SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 2024 ACM\/IEEE International Conference on Human-Robot Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3610977.3634930","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3610977.3634930","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,28]],"date-time":"2025-08-28T16:34:23Z","timestamp":1756398863000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3610977.3634930"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,11]]},"references-count":63,"alternative-id":["10.1145\/3610977.3634930","10.1145\/3610977"],"URL":"https:\/\/doi.org\/10.1145\/3610977.3634930","relation":{},"subject":[],"published":{"date-parts":[[2024,3,11]]},"assertion":[{"value":"2024-03-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}