{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T11:19:21Z","timestamp":1773141561783,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":78,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,3,11]],"date-time":"2024-03-11T00:00:00Z","timestamp":1710115200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Swedish Foundation for Strategic Research","award":["SSF FFL18-0199"],"award-info":[{"award-number":["SSF FFL18-0199"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,3,11]]},"DOI":"10.1145\/3610977.3634970","type":"proceedings-article","created":{"date-parts":[[2024,3,10]],"date-time":"2024-03-10T00:19:00Z","timestamp":1710029940000},"page":"259-268","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["PREDILECT: Preferences Delineated with Zero-Shot Language-based Reasoning in Reinforcement Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5727-8140","authenticated-orcid":false,"given":"Simon","family":"Holk","sequence":"first","affiliation":[{"name":"KTH Royal Institute of Technology, Stockholm, Sweden"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3510-5481","authenticated-orcid":false,"given":"Daniel","family":"Marta","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology, Stockholm, Sweden"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2212-4325","authenticated-orcid":false,"given":"Iolanda","family":"Leite","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology, Stockholm, Sweden"}]}],"member":"320","published-online":{"date-parts":[[2024,3,11]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Repeated inverse reinforcement learning. Advances in neural information processing systems","author":"Amin Kareem","year":"2017","unstructured":"Kareem Amin, Nan Jiang, and Satinder Singh. 2017. Repeated inverse reinforcement learning. Advances in neural information processing systems , Vol. 30 (2017)."},{"key":"e_1_3_2_2_2_1","volume-title":"Concrete problems in AI safety. arXiv preprint arXiv:1606.06565","author":"Amodei Dario","year":"2016","unstructured":"Dario Amodei, Chris Olah, Jacob Steinhardt, Paul Christiano, John Schulman, and Dan Man\u00e9. 2016. Concrete problems in AI safety. arXiv preprint arXiv:1606.06565 (2016)."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3171221.3171284"},{"key":"e_1_3_2_2_4_1","unstructured":"Erik B\u00e5venstrand and Jakob Berggren. 2019. Performance evaluation of imitation learning algorithms with human experts."},{"key":"e_1_3_2_2_5_1","volume-title":"Preference learning along multiple criteria: A game-theoretic perspective. Advances in neural information processing systems","author":"Bhatia Kush","year":"2020","unstructured":"Kush Bhatia, Ashwin Pananjady, Peter Bartlett, Anca Dragan, and Martin J Wainwright. 2020. Preference learning along multiple criteria: A game-theoretic perspective. Advances in neural information processing systems , Vol. 33 (2020), 7413--7424."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"crossref","unstructured":"Erdem Biyik Nicolas Huynh Mykel J Kochenderfer and Dorsa Sadigh. 2020. Active preference-based gaussian process regression for reward learning. In Robotics: Science and Systems (RSS).","DOI":"10.15607\/RSS.2020.XVI.041"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3319502.3374811"},{"key":"e_1_3_2_2_8_1","unstructured":"Rishi Bommasani Drew A Hudson Ehsan Adeli Russ Altman Simran Arora Sydney von Arx Michael S Bernstein Jeannette Bohg Antoine Bosselut Emma Brunskill et al. 2021. On the opportunities and risks of foundation models. arXiv preprint arXiv:2108.07258 (2021)."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.5555\/3523760.3523784"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.2307\/2334029"},{"key":"e_1_3_2_2_11_1","volume-title":"Openai gym. arXiv preprint arXiv:1606.01540","author":"Brockman Greg","year":"2016","unstructured":"Greg Brockman, Vicki Cheung, Ludwig Pettersson, Jonas Schneider, John Schulman, Jie Tang, and Wojciech Zaremba. 2016. Openai gym. arXiv preprint arXiv:1606.01540 (2016)."},{"key":"e_1_3_2_2_12_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et al. 2020. Language models are few-shot learners. Advances in neural information processing systems Vol. 33 (2020) 1877--1901."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2011.6094735"},{"key":"e_1_3_2_2_14_1","volume-title":"Jared Kaplan, Harri Edwards, Yuri Burda, Nicholas Joseph, Greg Brockman, et al.","author":"Chen Mark","year":"2021","unstructured":"Mark Chen, Jerry Tworek, Heewoo Jun, Qiming Yuan, Henrique Ponde de Oliveira Pinto, Jared Kaplan, Harri Edwards, Yuri Burda, Nicholas Joseph, Greg Brockman, et al. 2021. Evaluating large language models trained on code. arXiv preprint arXiv:2107.03374 (2021)."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5763"},{"key":"e_1_3_2_2_16_1","volume-title":"Deep reinforcement learning from human preferences. Advances in neural information processing systems","author":"Christiano Paul F","year":"2017","unstructured":"Paul F Christiano, Jan Leike, Tom Brown, Miljan Martic, Shane Legg, and Dario Amodei. 2017. Deep reinforcement learning from human preferences. Advances in neural information processing systems , Vol. 30 (2017)."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460854"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2018.2800101"},{"key":"e_1_3_2_2_19_1","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Haan Pim De","year":"2019","unstructured":"Pim De Haan, Dinesh Jayaraman, and Sergey Levine. 2019. Causal confusion in imitation learning. Advances in Neural Information Processing Systems , Vol. 32 (2019)."},{"key":"e_1_3_2_2_20_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/s41060-016-0038-6"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2019.2956365"},{"key":"e_1_3_2_2_23_1","unstructured":"Anthony Francis Claudia P\u00e9rez-d'Arpino Chengshu Li Fei Xia Alexandre Alahi Rachid Alami Aniket Bera Abhijat Biswas Joydeep Biswas Rohan Chandra et al. 2023. Principles and guidelines for evaluating social robot navigation algorithms. arXiv preprint arXiv:2306.16740 (2023)."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.313"},{"key":"e_1_3_2_2_25_1","volume-title":"Inverse reward design. Advances in neural information processing systems","author":"Hadfield-Menell Dylan","year":"2017","unstructured":"Dylan Hadfield-Menell, Smitha Milli, Pieter Abbeel, Stuart J Russell, and Anca Dragan. 2017. Inverse reward design. Advances in neural information processing systems , Vol. 30 (2017)."},{"key":"e_1_3_2_2_26_1","volume-title":"Conference on Robot Learning. PMLR","author":"Dorsa Sadigh Donald Joseph","year":"2023","unstructured":"Donald Joseph Hejna III and Dorsa Sadigh. 2023. Few-shot preference learning for human-in-the-loop rl. In Conference on Robot Learning. PMLR, 2014--2025."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevE.51.4282"},{"key":"e_1_3_2_2_28_1","volume-title":"Large language models are reasoning teachers. arXiv preprint arXiv:2212.10071","author":"Ho Namgyu","year":"2022","unstructured":"Namgyu Ho, Laura Schmid, and Se-Young Yun. 2022. Large language models are reasoning teachers. arXiv preprint arXiv:2212.10071 (2022)."},{"key":"e_1_3_2_2_29_1","volume-title":"Jaime Ferrando Huertas, and Dino Sejdinovic.","author":"Hu Robert","year":"2022","unstructured":"Robert Hu, Siu Lun Chau, Jaime Ferrando Huertas, and Dino Sejdinovic. 2022. Explaining Preferences with Shapley Values. In Advances in Neural Information Processing Systems, Alice H. Oh, Alekh Agarwal, Danielle Belgrave, and Kyunghyun Cho (Eds.). https:\/\/openreview.net\/forum?id=-me36V0os8P"},{"key":"e_1_3_2_2_30_1","volume-title":"Reward learning from human preferences and demonstrations in atari. Advances in neural information processing systems","author":"Ibarz Borja","year":"2018","unstructured":"Borja Ibarz, Jan Leike, Tobias Pohlen, Geoffrey Irving, Shane Legg, and Dario Amodei. 2018. Reward learning from human preferences and demonstrations in atari. Advances in neural information processing systems , Vol. 31 (2018)."},{"key":"e_1_3_2_2_31_1","volume-title":"Tom Schaul, Joel Z Leibo, David Silver, and Koray Kavukcuoglu.","author":"Jaderberg Max","year":"2016","unstructured":"Max Jaderberg, Volodymyr Mnih, Wojciech Marian Czarnecki, Tom Schaul, Joel Z Leibo, David Silver, and Koray Kavukcuoglu. 2016. Reinforcement learning with unsupervised auxiliary tasks. arXiv preprint arXiv:1611.05397 (2016)."},{"key":"e_1_3_2_2_32_1","first-page":"4415","article-title":"Reward-rational (implicit) choice: A unifying formalism for reward learning","volume":"33","author":"Jeon Hong Jun","year":"2020","unstructured":"Hong Jun Jeon, Smitha Milli, and Anca Dragan. 2020. Reward-rational (implicit) choice: A unifying formalism for reward learning. Advances in Neural Information Processing Systems , Vol. 33 (2020), 4415--4426.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_33_1","volume-title":"International Conference on Human-Robot Interaction (HRI).","author":"Khayrallah Huda","year":"2015","unstructured":"Huda Khayrallah, Sean Trott, and Jerome Feldman. 2015. Natural language for human robot interaction. In International Conference on Human-Robot Interaction (HRI)."},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/1597735.1597738"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-02675-6_46"},{"key":"e_1_3_2_2_36_1","volume-title":"Unsupervised learning of object keypoints for perception and control. Advances in neural information processing systems","author":"Kulkarni Tejas D","year":"2019","unstructured":"Tejas D Kulkarni, Ankush Gupta, Catalin Ionescu, Sebastian Borgeaud, Malcolm Reynolds, Andrew Zisserman, and Volodymyr Mnih. 2019. Unsupervised learning of object keypoints for perception and control. Advances in neural information processing systems , Vol. 32 (2019)."},{"key":"e_1_3_2_2_37_1","first-page":"167","article-title":"Using natural language and program abstractions to instill human inductive biases in machines","volume":"35","author":"Kumar Sreejan","year":"2022","unstructured":"Sreejan Kumar, Carlos G Correa, Ishita Dasgupta, Raja Marjieh, Michael Y Hu, Robert Hawkins, Jonathan D Cohen, Karthik Narasimhan, Tom Griffiths, et al. 2022. Using natural language and program abstractions to instill human inductive biases in machines. Advances in Neural Information Processing Systems , Vol. 35 (2022), 167--180.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_38_1","volume-title":"Pebble: Feedback-efficient interactive reinforcement learning via relabeling experience and unsupervised pre-training. arXiv preprint arXiv:2106.05091","author":"Lee Kimin","year":"2021","unstructured":"Kimin Lee, Laura Smith, and Pieter Abbeel. 2021. Pebble: Feedback-efficient interactive reinforcement learning via relabeling experience and unsupervised pre-training. arXiv preprint arXiv:2106.05091 (2021)."},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.3389\/fnbot.2020.610139"},{"key":"e_1_3_2_2_40_1","volume-title":"Reward Uncertainty for Exploration in Preference-based Reinforcement Learning. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=OWZVD-l-ZrC","author":"Liang Xinran","year":"2022","unstructured":"Xinran Liang, Katherine Shu, Kimin Lee, and Pieter Abbeel. 2022. Reward Uncertainty for Exploration in Preference-based Reinforcement Learning. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=OWZVD-l-ZrC"},{"key":"e_1_3_2_2_41_1","unstructured":"Runze Liu Fengshuo Bai Yali Du and Yaodong Yang. 2022. Meta-Reward-Net: Implicitly Differentiable Reward Learning for Preference-based Reinforcement Learning. In Advances in Neural Information Processing Systems Alice H. Oh Alekh Agarwal Danielle Belgrave and Kyunghyun Cho (Eds.). https:\/\/openreview.net\/forum?id=OZKBReUF-wX"},{"key":"e_1_3_2_2_42_1","volume-title":"Int. Conf. on Machine Learning. PMLR, 2285--2294","author":"MacGlashan James","year":"2017","unstructured":"James MacGlashan, Mark K Ho, Robert Loftin, Bei Peng, Guan Wang, David L Roberts, Matthew E Taylor, and Michael L Littman. 2017. Interactive learning from policy-dependent human feedback. In Int. Conf. on Machine Learning. PMLR, 2285--2294."},{"key":"e_1_3_2_2_43_1","volume-title":"Conference on Robot Learning. PMLR, 734--743","author":"Matas Jan","year":"2018","unstructured":"Jan Matas, Stephen James, and Andrew J Davison. 2018. Sim-to-real reinforcement learning for deformable object manipulation. In Conference on Robot Learning. PMLR, 734--743."},{"key":"e_1_3_2_2_44_1","unstructured":"Piotr Mirowski Razvan Pascanu Fabio Viola Hubert Soyer Andrew J Ballard Andrea Banino Misha Denil Ross Goroshin Laurent Sifre Koray Kavukcuoglu et al. 2016. Learning to navigate in complex environments. arXiv preprint arXiv:1611.03673 (2016)."},{"key":"e_1_3_2_2_45_1","volume-title":"Clipcap: Clip prefix for image captioning. arXiv preprint arXiv:2111.09734","author":"Mokady Ron","year":"2021","unstructured":"Ron Mokady, Amir Hertz, and Amit H Bermano. 2021. Clipcap: Clip prefix for image captioning. arXiv preprint arXiv:2111.09734 (2021)."},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.3389\/frobt.2021.584075"},{"key":"e_1_3_2_2_47_1","unstructured":"Jiquan Ngiam Aditya Khosla Mingyu Kim Juhan Nam Honglak Lee and Andrew Y Ng. 2011. Multimodal deep learning. In ICML."},{"key":"e_1_3_2_2_48_1","unstructured":"Jongjin Park Younggyo Seo Jinwoo Shin Honglak Lee Pieter Abbeel and Kimin Lee. 2022. SURF: Semi-supervised Reward Learning with Data Augmentation for Feedback-efficient Preference-based Reinforcement Learning. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=TfhfZLQ2EJO"},{"key":"e_1_3_2_2_49_1","volume-title":"Natural language for human-robot collaboration: Problems beyond language grounding. arXiv preprint arXiv:2110.04441","author":"Pate Seth","year":"2021","unstructured":"Seth Pate, Wei Xu, Ziyi Yang, Maxwell Love, Siddarth Ganguri, and Lawson LS Wong. 2021. Natural language for human-robot collaboration: Problems beyond language grounding. arXiv preprint arXiv:2110.04441 (2021)."},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989249"},{"key":"e_1_3_2_2_51_1","volume-title":"International Conference on Machine Learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International Conference on Machine Learning. PMLR, 8748--8763."},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00433"},{"key":"e_1_3_2_2_53_1","volume-title":"Know what you don't know: Unanswerable questions for SQuAD. arXiv preprint arXiv:1806.03822","author":"Rajpurkar Pranav","year":"2018","unstructured":"Pranav Rajpurkar, Robin Jia, and Percy Liang. 2018. Know what you don't know: Unanswerable questions for SQuAD. arXiv preprint arXiv:1806.03822 (2018)."},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"crossref","unstructured":"Dorsa Sadigh Anca D Dragan Shankar Sastry and Sanjit A Seshia. 2017. Active preference-based learning of reward functions.","DOI":"10.15607\/RSS.2017.XIII.053"},{"key":"e_1_3_2_2_55_1","volume-title":"MIND MELD: Personalized Meta-Learning for Robot-Centric Imitation Learning.. In HRI. 157--165.","author":"Schrum Mariah L","year":"2022","unstructured":"Mariah L Schrum, Erin Hedlund-Botti, Nina Moorman, and Matthew C Gombolay. 2022. MIND MELD: Personalized Meta-Learning for Robot-Centric Imitation Learning.. In HRI. 157--165."},{"key":"e_1_3_2_2_56_1","volume-title":"Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347","author":"Schulman John","year":"2017","unstructured":"John Schulman, Filip Wolski, Prafulla Dhariwal, Alec Radford, and Oleg Klimov. 2017. Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)."},{"key":"e_1_3_2_2_57_1","volume-title":"Proceedings of the 17th International Conference on Autonomous Agents and MultiAgent Systems. 2070--2072","author":"Schwab Devin","year":"2018","unstructured":"Devin Schwab, Yifeng Zhu, and Manuela Veloso. 2018. Zero shot transfer learning for robot soccer. In Proceedings of the 17th International Conference on Autonomous Agents and MultiAgent Systems. 2070--2072."},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2017.03.015"},{"key":"e_1_3_2_2_59_1","volume-title":"Offline Reinforcement Learning for Visual Navigation. In 6th Annual Conference on Robot Learning. https:\/\/openreview.net\/forum?id=uhIfIEIiWm_","author":"Shah Dhruv","year":"2022","unstructured":"Dhruv Shah, Arjun Bhorkar, Hrishit Leen, Ilya Kostrikov, Nicholas Rhinehart, and Sergey Levine. 2022. Offline Reinforcement Learning for Visual Navigation. In 6th Annual Conference on Robot Learning. https:\/\/openreview.net\/forum?id=uhIfIEIiWm_"},{"key":"e_1_3_2_2_60_1","doi-asserted-by":"crossref","unstructured":"Pratyusha Sharma Balakumar Sundaralingam Valts Blukis Chris Paxton Tucker Hermans Antonio Torralba Jacob Andreas and Dieter Fox. 2022. Correcting robot plans with natural language feedback. In Robotics: Science and Systems (RSS).","DOI":"10.15607\/RSS.2022.XVIII.065"},{"key":"e_1_3_2_2_61_1","volume-title":"Loss is its own reward: Self-supervision for reinforcement learning. arXiv preprint arXiv:1612.07307","author":"Shelhamer Evan","year":"2016","unstructured":"Evan Shelhamer, Parsa Mahmoudieh, Max Argus, and Trevor Darrell. 2016. Loss is its own reward: Self-supervision for reinforcement learning. arXiv preprint arXiv:1612.07307 (2016)."},{"key":"e_1_3_2_2_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00357"},{"key":"e_1_3_2_2_63_1","volume-title":"prediction, and search","author":"Spirtes Peter","unstructured":"Peter Spirtes, Clark N Glymour, Richard Scheines, and David Heckerman. 2000. Causation, prediction, and search. MIT press."},{"key":"e_1_3_2_2_64_1","unstructured":"Theodore Sumers Robert D. Hawkins Mark K Ho Thomas L. Griffiths and Dylan Hadfield-Menell. 2022. How to talk so AI will learn: Instructions descriptions and autonomy. In Advances in Neural Information Processing Systems Alice H. Oh Alekh Agarwal Danielle Belgrave and Kyunghyun Cho (Eds.). https:\/\/openreview.net\/forum?id=ZLsZmNe1RDb"},{"key":"e_1_3_2_2_65_1","volume-title":"Jamie Hall, Noam Shazeer, Apoorv Kulshreshtha, Heng-Tze Cheng, Alicia Jin, Taylor Bos, Leslie Baker, Yu Du, et al.","author":"Thoppilan Romal","year":"2022","unstructured":"Romal Thoppilan, Daniel De Freitas, Jamie Hall, Noam Shazeer, Apoorv Kulshreshtha, Heng-Tze Cheng, Alicia Jin, Taylor Bos, Leslie Baker, Yu Du, et al. 2022. Lamda: Language models for dialog applications. arXiv preprint arXiv:2201.08239 (2022)."},{"key":"e_1_3_2_2_66_1","volume-title":"Zackory Erickson, Anca D Dragan, and Daniel Brown.","author":"Tien Jeremy","year":"2022","unstructured":"Jeremy Tien, Jerry Zhi-Yang He, Zackory Erickson, Anca D Dragan, and Daniel Brown. 2022. A Study of Causal Confusion in Preference-Based Reward Learning. arXiv preprint arXiv:2204.06601 (2022)."},{"key":"e_1_3_2_2_67_1","first-page":"200","article-title":"Multimodal few-shot learning with frozen language models","volume":"34","author":"Tsimpoukelli Maria","year":"2021","unstructured":"Maria Tsimpoukelli, Jacob L Menick, Serkan Cabi, SM Eslami, Oriol Vinyals, and Felix Hill. 2021. Multimodal few-shot learning with frozen language models. Advances in Neural Information Processing Systems , Vol. 34 (2021), 200--212.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_68_1","volume-title":"Conference on Robot Learning. PMLR, 1259--1268","author":"Wang Xiaofei","year":"2022","unstructured":"Xiaofei Wang, Kimin Lee, Kourosh Hakhamaneshi, Pieter Abbeel, and Michael Laskin. 2022. Skill preferences: Learning to extract and execute robotic skills from human feedback. In Conference on Robot Learning. PMLR, 1259--1268."},{"key":"e_1_3_2_2_69_1","first-page":"24824","article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume":"35","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Fei Xia, Ed Chi, Quoc V Le, Denny Zhou, et al. 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in Neural Information Processing Systems , Vol. 35 (2022), 24824--24837.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_70_1","volume-title":"A bayesian approach for policy learning from trajectory preference queries. Advances in neural information processing systems","author":"Wilson Aaron","year":"2012","unstructured":"Aaron Wilson, Alan Fern, and Prasad Tadepalli. 2012. A bayesian approach for policy learning from trajectory preference queries. Advances in neural information processing systems , Vol. 25 (2012)."},{"key":"e_1_3_2_2_71_1","first-page":"1","article-title":"A survey of preference-based reinforcement learning methods","volume":"18","author":"Wirth Christian","year":"2017","unstructured":"Christian Wirth, Riad Akrour, Gerhard Neumann, Johannes F\u00fcrnkranz, et al. 2017. A survey of preference-based reinforcement learning methods. Journal of Machine Learning Research, Vol. 18, 136 (2017), 1--46.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_2_72_1","volume-title":"Conference on Robot Learning. PMLR, 40--52","author":"Xie Annie","year":"2018","unstructured":"Annie Xie, Avi Singh, Sergey Levine, and Chelsea Finn. 2018. Few-shot goal inference for visuomotor learning and planning. In Conference on Robot Learning. PMLR, 40--52."},{"key":"e_1_3_2_2_73_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01070"},{"key":"e_1_3_2_2_74_1","volume-title":"International Conference on Machine Learning. PMLR, 5085--5094","author":"Ying Wei","year":"2018","unstructured":"Wei Ying, Yu Zhang, Junzhou Huang, and Qiang Yang. 2018. Transfer learning via learning to transfer. In International Conference on Machine Learning. PMLR, 5085--5094."},{"key":"e_1_3_2_2_75_1","volume-title":"Conference on Robot Learning. PMLR, 537--546","author":"Zakka Kevin","year":"2022","unstructured":"Kevin Zakka, Andy Zeng, Pete Florence, Jonathan Tompson, Jeannette Bohg, and Debidatta Dwibedi. 2022. Xirl: Cross-embodiment inverse reinforcement learning. In Conference on Robot Learning. PMLR, 537--546."},{"key":"e_1_3_2_2_76_1","volume-title":"Socratic Models: Composing Zero-Shot Multimodal Reasoning with Language. In The Eleventh International Conference on Learning Representations.","author":"Zeng Andy","year":"2022","unstructured":"Andy Zeng, Maria Attarian, Krzysztof Marcin Choromanski, Adrian Wong, Stefan Welker, Federico Tombari, Aveek Purohit, Michael S Ryoo, Vikas Sindhwani, Johnny Lee, et al. 2022. Socratic Models: Composing Zero-Shot Multimodal Reasoning with Language. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_2_2_77_1","volume-title":"LiT: Zero-Shot Transfer with Locked-image Text Tuning. arXiv preprint arXiv:2111.07991","author":"Zhai Xiaohua","year":"2021","unstructured":"Xiaohua Zhai, Xiao Wang, Basil Mustafa, Andreas Steiner, Daniel Keysers, Alexander Kolesnikov, and Lucas Beyer. 2021. LiT: Zero-Shot Transfer with Locked-image Text Tuning. arXiv preprint arXiv:2111.07991 (2021)."},{"key":"e_1_3_2_2_78_1","volume-title":"6th Annual Conference on Robot Learning. https:\/\/openreview.net\/forum?id=H6rr_CGzV9y io","author":"Zhang Ruohan","year":"2022","unstructured":"Ruohan Zhang, Dhruva Bansal, Yilun Hao, Ayano Hiranaka, Jialu Gao, Chen Wang, Roberto Mart'in-Mart'in, Li Fei-Fei, and Jiajun Wu. 2022. A Dual Representation Framework for Robot Learning with Human Guidance. In 6th Annual Conference on Robot Learning. https:\/\/openreview.net\/forum?id=H6rr_CGzV9y io"}],"event":{"name":"HRI '24: ACM\/IEEE International Conference on Human-Robot Interaction","location":"Boulder CO USA","acronym":"HRI '24","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence","SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 2024 ACM\/IEEE International Conference on Human-Robot Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3610977.3634970","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3610977.3634970","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,28]],"date-time":"2025-08-28T16:36:37Z","timestamp":1756398997000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3610977.3634970"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,11]]},"references-count":78,"alternative-id":["10.1145\/3610977.3634970","10.1145\/3610977"],"URL":"https:\/\/doi.org\/10.1145\/3610977.3634970","relation":{},"subject":[],"published":{"date-parts":[[2024,3,11]]},"assertion":[{"value":"2024-03-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}