{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T11:20:34Z","timestamp":1773141634962,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":58,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,3,13]],"date-time":"2023-03-13T00:00:00Z","timestamp":1678665600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"ONR Young Investigator Award"},{"name":"Apple AI\/ML Fellowship"},{"name":"Weill Neurohub"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,3,13]]},"DOI":"10.1145\/3568162.3576989","type":"proceedings-article","created":{"date-parts":[[2023,3,9]],"date-time":"2023-03-09T18:08:48Z","timestamp":1678385328000},"page":"565-574","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["SIRL"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9507-7427","authenticated-orcid":false,"given":"Andreea","family":"Bobu","sequence":"first","affiliation":[{"name":"University of California Berkeley, Berkeley, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1105-0033","authenticated-orcid":false,"given":"Yi","family":"Liu","sequence":"additional","affiliation":[{"name":"University of California Berkeley, Berkeley, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0656-2800","authenticated-orcid":false,"given":"Rohin","family":"Shah","sequence":"additional","affiliation":[{"name":"DeepMind Research, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9570-1832","authenticated-orcid":false,"given":"Daniel S.","family":"Brown","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, Berkeley, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6312-5466","authenticated-orcid":false,"given":"Anca D.","family":"Dragan","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, Berkeley, CA, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,3,13]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"International Conference on. ACM.","author":"Abbeel Pieter","year":"2004","unstructured":"Pieter Abbeel and Andrew Y Ng. 2004. Apprenticeship learning via inverse reinforcement learning. In Machine Learning (ICML), International Conference on. ACM."},{"key":"e_1_3_2_2_2_1","unstructured":"Sameer Agarwal Josh Wills Lawrence Cayton Gert Lanckriet David Kriegman and Serge Belongie. 2007. Generalized non-metric multidimensional scaling. In Artificial Intelligence and Statistics. PMLR 11--18."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3--319--23528--8_14"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.5555\/3327144.3327216"},{"key":"e_1_3_2_2_5_1","unstructured":"Monica Babes Vukosi N Marivate Kaushik Subramanian and Michael L Littman. 2011. Apprenticeship learning about multiple intentions. In ICML."},{"key":"e_1_3_2_2_6_1","volume-title":"Proceedings of the 1st Annual Conference on Robot Learning (Proceedings of Machine Learning Research","volume":"226","author":"Bajcsy Andrea","unstructured":"Andrea Bajcsy, Dylan P. Losey, Marcia K. O'Malley, and Anca D. Dragan. 2017. Learning Robot Objectives from Physical Human Interaction. In Proceedings of the 1st Annual Conference on Robot Learning (Proceedings of Machine Learning Research, Vol. 78), Sergey Levine, Vincent Vanhoucke, and Ken Goldberg (Eds.). PMLR, 217--226. http:\/\/proceedings.mlr.press\/v78\/bajcsy17a.html"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","unstructured":"Vassileios Balntas Edgar Riba Daniel Ponsa and Krystian Mikolajczyk. 2016. Learning local feature descriptors with triplets and shallow convolutional neural networks. 119.1--119.11. https:\/\/doi.org\/10.5244\/C.30.119","DOI":"10.5244\/C.30.119"},{"key":"e_1_3_2_2_8_1","volume-title":"Conference on robot learning. PMLR, 519--528","author":"Biyik Erdem","year":"2018","unstructured":"Erdem Biyik and Dorsa Sadigh. 2018. Batch active preference-based learning of reward functions. In Conference on robot learning. PMLR, 519--528."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3196164"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","unstructured":"Andreea Bobu Marius Wiggert Claire Tomlin and Anca D. Dragan. 0. Inducing Structure in Reward Learning by Learning Features. The International Journal of Robotics Research 0 0 (0) 02783649221078031. https:\/\/doi.org\/10.1177\/ 02783649221078031 arXiv:https:\/\/doi.org\/10.1177\/02783649221078031","DOI":"10.1177\/02783649221078031"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3434073.3444667"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.2307\/2334029"},{"key":"e_1_3_2_2_13_1","volume-title":"Proceedings of the 37th International Conference on Machine Learning (Proceedings of Machine Learning Research","author":"Brown Daniel","year":"2020","unstructured":"Daniel Brown, Russell Coleman, Ravi Srinivasan, and Scott Niekum. 2020. Safe Imitation Learning via Fast Bayesian Reward Inference from Preferences. In Proceedings of the 37th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 119), Hal Daum\u00e9 III and Aarti Singh (Eds.). PMLR, 1165--1177. http:\/\/proceedings.mlr.press\/v119\/brown20a.html"},{"key":"e_1_3_2_2_14_1","volume-title":"International Conference on Machine Learning. PMLR, 783-- 792","author":"Brown Daniel","year":"2019","unstructured":"Daniel Brown, Wonjoon Goo, Prabhat Nagarajan, and Scott Niekum. 2019. Extrapolating beyond suboptimal demonstrations via inverse reinforcement learning from observations. In International Conference on Machine Learning. PMLR, 783-- 792."},{"key":"e_1_3_2_2_15_1","volume-title":"2018 IEEE International Conference on Robotics and Automation, ICRA 2018","author":"Bullard Kalesha","year":"2018","unstructured":"Kalesha Bullard, Sonia Chernova, and Andrea Lockerd Thomaz. 2018. HumanDriven Feature Selection for a Robotic Agent Learning Classification Tasks from Demonstration. In 2018 IEEE International Conference on Robotics and Automation, ICRA 2018, Brisbane, Australia, May 21--25, 2018. IEEE, 6923--6930. https:\/\/doi. org\/10.1109\/ICRA.2018.8461012"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2157689.2157693"},{"key":"e_1_3_2_2_17_1","volume-title":"Learning Generalizable Robotic Reward Functions from \"In-The-Wild\" Human Videos. CoRR abs\/2103.16817","author":"Chen Annie S.","year":"2021","unstructured":"Annie S. Chen, Suraj Nair, and Chelsea Finn. 2021. Learning Generalizable Robotic Reward Functions from \"In-The-Wild\" Human Videos. CoRR abs\/2103.16817 (2021). arXiv:2103.16817 https:\/\/arxiv.org\/abs\/2103.16817"},{"key":"e_1_3_2_2_18_1","volume-title":"Garnett (Eds.)","volume":"31","author":"Chen Ricky T. Q.","year":"2018","unstructured":"Ricky T. Q. Chen, Xuechen Li, Roger B Grosse, and David K Duvenaud. 2018. Isolating Sources of Disentanglement in Variational Autoencoders. In Advances in Neural Information Processing Systems, S. Bengio, H. Wallach, H. Larochelle, K. Grauman, N. Cesa-Bianchi, and R. Garnett (Eds.), Vol. 31. Curran Associates, Inc."},{"key":"e_1_3_2_2_19_1","volume-title":"International conference on machine learning. PMLR, 1597--1607","author":"Chen Ting","year":"2020","unstructured":"Ting Chen, Simon Kornblith, Mohammad Norouzi, and Geoffrey Hinton. 2020. A simple framework for contrastive learning of visual representations. In International conference on machine learning. PMLR, 1597--1607."},{"key":"e_1_3_2_2_20_1","volume-title":"Proceedings of the 30th International Conference on Neural Information Processing Systems","author":"Chen Xi","year":"2016","unstructured":"Xi Chen, Yan Duan, Rein Houthooft, John Schulman, Ilya Sutskever, and Pieter Abbeel. 2016. InfoGAN: Interpretable Representation Learning by Information Maximizing Generative Adversarial Nets. In Proceedings of the 30th International Conference on Neural Information Processing Systems (Barcelona, Spain) (NIPS'16). Curran Associates Inc., Red Hook, NY, USA, 2180--2188."},{"key":"e_1_3_2_2_21_1","volume-title":"Nonparametric Bayesian inverse reinforcement learning for multiple reward functions. Advances in Neural Information Processing Systems 25","author":"Choi Jaedeug","year":"2012","unstructured":"Jaedeug Choi and Kee-Eung Kim. 2012. Nonparametric Bayesian inverse reinforcement learning for multiple reward functions. Advances in Neural Information Processing Systems 25 (2012)."},{"key":"e_1_3_2_2_22_1","volume-title":"Garnett (Eds.)","volume":"30","author":"Christiano Paul F","year":"2017","unstructured":"Paul F Christiano, Jan Leike, Tom Brown, Miljan Martic, Shane Legg, and Dario Amodei. 2017. Deep Reinforcement Learning from Human Preferences. In Advances in Neural Information Processing Systems, I. Guyon, U. V. Luxburg, S. Bengio, H. Wallach, R. Fergus, S. Vishwanathan, and R. Garnett (Eds.), Vol. 30. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper\/2017\/file\/ d5e2c0adad503c91f91df240d0cd4e49-Paper.pdf"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"crossref","unstructured":"Adam Coates and A. Ng. 2012. Learning Feature Representations with K-Means. In Neural Networks: Tricks of the Trade.","DOI":"10.1007\/978-3-642-35289-8_30"},{"key":"e_1_3_2_2_24_1","unstructured":"Erwin Coumans and Yunfei Bai. 2016--2019. PyBullet a Python module for physics simulation for games robotics and machine learning. http:\/\/pybullet.org."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2014.2346978"},{"key":"e_1_3_2_2_26_1","volume-title":"European workshop on reinforcement learning. Springer, 273--284","author":"Dimitrakakis Christos","year":"2011","unstructured":"Christos Dimitrakakis and Constantin A Rothkopf. 2011. Bayesian multitask inverse reinforcement learning. In European workshop on reinforcement learning. Springer, 273--284."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.167"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2015.7139510"},{"key":"e_1_3_2_2_29_1","volume-title":"Proceedings of the 34th International Conference on Machine Learning -","volume":"70","author":"Finn Chelsea","year":"2017","unstructured":"Chelsea Finn, Pieter Abbeel, and Sergey Levine. 2017. Model-Agnostic MetaLearning for Fast Adaptation of Deep Networks. In Proceedings of the 34th International Conference on Machine Learning - Volume 70 (Sydney, NSW, Australia) (ICML'17). JMLR.org, 1126--1135."},{"key":"e_1_3_2_2_30_1","volume-title":"Multi-task maximum entropy inverse reinforcement learning. arXiv preprint arXiv:1805.08882","author":"Gleave Adam","year":"2018","unstructured":"Adam Gleave and Oliver Habryka. 2018. Multi-task maximum entropy inverse reinforcement learning. arXiv preprint arXiv:1805.08882 (2018)."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2014.6943191"},{"key":"e_1_3_2_2_32_1","unstructured":"Irina Higgins Lo\u00efc Matthey Arka Pal Christopher P. Burgess Xavier Glorot Matthew M. Botvinick Shakir Mohamed and Alexander Lerchner. 2017. betaVAE: Learning Basic Visual Concepts with a Constrained Variational Framework. In ICLR."},{"key":"e_1_3_2_2_33_1","volume-title":"Meta Preference Learning for Fast User Adaptation in Human-Supervisory Multi-Robot Deployments. In 2021 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS). IEEE, 5851--5856","author":"Huang Chao","year":"2021","unstructured":"Chao Huang, Wenhao Luo, and Rui Liu. 2021. Meta Preference Learning for Fast User Adaptation in Human-Supervisory Multi-Robot Deployments. In 2021 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS). IEEE, 5851--5856."},{"key":"e_1_3_2_2_34_1","first-page":"4415","article-title":"Reward-rational (implicit) choice: A unifying formalism for reward learning","volume":"33","author":"Jeon Hong Jun","year":"2020","unstructured":"Hong Jun Jeon, Smitha Milli, and Anca Dragan. 2020. Reward-rational (implicit) choice: A unifying formalism for reward learning. Advances in Neural Information Processing Systems 33 (2020), 4415--4426.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_35_1","unstructured":"Maurice George Kendall. 1948. Rank correlation methods. (1948)."},{"key":"e_1_3_2_2_36_1","volume-title":"Kingma and Max Welling","author":"Diederik","year":"2014","unstructured":"Diederik P. Kingma and Max Welling. 2014. Auto-Encoding Variational Bayes. In 2nd International Conference on Learning Representations, ICLR 2014, Banff, AB, Canada, April 14--16, 2014, Conference Track Proceedings, Yoshua Bengio and Yann LeCun (Eds.). http:\/\/arxiv.org\/abs\/1312.6114"},{"key":"e_1_3_2_2_37_1","volume-title":"IJCAI 2013, Proceedings of the 23rd International Joint Conference on Artificial Intelligence","author":"Kulick Johannes","year":"2013","unstructured":"Johannes Kulick, Marc Toussaint, Tobias Lang, and Manuel Lopes. 2013. Active Learning for Teaching a Robot Grounded Relational Symbols. In IJCAI 2013, Proceedings of the 23rd International Joint Conference on Artificial Intelligence, Beijing, China, August 3--9, 2013, Francesca Rossi (Ed.). IJCAI\/AAAI, 1451--1457. http:\/\/www.aaai.org\/ocs\/index.php\/IJCAI\/IJCAI13\/paper\/view\/6706"},{"key":"e_1_3_2_2_38_1","volume-title":"Contrastive Predictive Coding Based Feature for Automatic Speaker Verification. arXiv preprint arXiv:1904.01575","author":"Lai I","year":"2019","unstructured":"Cheng-I Lai. 2019. Contrastive Predictive Coding Based Feature for Automatic Speaker Verification. arXiv preprint arXiv:1904.01575 (2019)."},{"key":"e_1_3_2_2_39_1","volume-title":"Proceedings of the 37th International Conference on Machine Learning (Proceedings of Machine Learning Research","author":"Laskin Michael","year":"2020","unstructured":"Michael Laskin, Aravind Srinivas, and Pieter Abbeel. 2020. CURL: Contrastive Unsupervised Representations for Reinforcement Learning. In Proceedings of the 37th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 119), Hal Daum\u00e9 III and Aarti Singh (Eds.). PMLR, 5639-- 5650. https:\/\/proceedings.mlr.press\/v119\/laskin20a.html"},{"key":"e_1_3_2_2_40_1","volume-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems. arXiv preprint arXiv:2005.01643","author":"Levine Sergey","year":"2020","unstructured":"Sergey Levine, Aviral Kumar, George Tucker, and Justin Fu. 2020. Offline reinforcement learning: Tutorial, review, and perspectives on open problems. arXiv preprint arXiv:2005.01643 (2020)."},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9560840"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICARM.2019.8833723"},{"key":"e_1_3_2_2_43_1","volume-title":"March 13--16","author":"Mandi Zhao","year":"2023","unstructured":"Zhao Mandi, Pieter Abbeel, and Stephen James. 2022. On the Effectiveness of Fine-tuning Versus Meta-reinforcement Learning. arXiv preprint arXiv:2206.03271 (2022) HRI '23, March 13--16, 2023, Stockholm, Sweden Andreea Bobu, Yi Liu, Rohin Shah, Daniel S. Brown, & Anca D. Dragan"},{"key":"e_1_3_2_2_44_1","article-title":"Learning Multi-modal Similarity","volume":"12","author":"McFee Brian","year":"2011","unstructured":"Brian McFee, Gert Lanckriet, and Tony Jebara. 2011. Learning Multi-modal Similarity. Journal of machine learning research 12, 2 (2011).","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/1957656.1957786"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197126"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1561\/2300000053"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2018.00278"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00112"},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"crossref","unstructured":"Dorsa Sadigh Anca D Dragan Shankar Sastry and Sanjit A Seshia. 2017. Active preference-based learning of reward functions. In Robotics: Science and systems.","DOI":"10.15607\/RSS.2017.XIII.053"},{"key":"e_1_3_2_2_51_1","volume-title":"Shixiang Shane Gu, and Richard Zemel","author":"Seyed Ghasemipour Seyed Kamyar","year":"2019","unstructured":"Seyed Kamyar Seyed Ghasemipour, Shixiang Shane Gu, and Richard Zemel. 2019. Smile: Scalable meta inverse reinforcement learning through context-conditional policies. Advances in Neural Information Processing Systems 32 (2019)."},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2203.02091"},{"key":"e_1_3_2_2_53_1","volume-title":"Gordon DA Brown, and Nick Chater","author":"Stewart Neil","year":"2005","unstructured":"Neil Stewart, Gordon DA Brown, and Nick Chater. 2005. Absolute identification by relative judgment. Psychological review 112, 4 (2005), 881."},{"key":"e_1_3_2_2_54_1","volume-title":"Adaptively learning the crowd kernel. arXiv preprint arXiv:1105.1033","author":"Tamuz Omer","year":"2011","unstructured":"Omer Tamuz, Ce Liu, Serge Belongie, Ohad Shamir, and Adam Tauman Kalai. 2011. Adaptively learning the crowd kernel. arXiv preprint arXiv:1105.1033 (2011)."},{"key":"e_1_3_2_2_55_1","first-page":"1","article-title":"A survey of preference-based reinforcement learning methods","volume":"18","author":"Wirth Christian","year":"2017","unstructured":"Christian Wirth, Riad Akrour, Gerhard Neumann, Johannes F\u00fcrnkranz, et al. 2017. A survey of preference-based reinforcement learning methods. Journal of Machine Learning Research 18, 136 (2017), 1--46.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_2_56_1","volume-title":"Proceedings of the 36th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"6962","author":"Xu Kelvin","year":"2019","unstructured":"Kelvin Xu, Ellis Ratner, Anca Dragan, Sergey Levine, and Chelsea Finn. 2019. Learning a Prior over Intent via Meta-Inverse Reinforcement Learning. In Proceedings of the 36th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 97), Kamalika Chaudhuri and Ruslan Salakhutdinov (Eds.). PMLR, 6952--6962. https:\/\/proceedings.mlr.press\/v97\/xu19d.html"},{"key":"e_1_3_2_2_57_1","volume-title":"International Conference on Machine Learning. PMLR, 6952--6962","author":"Xu Kelvin","year":"2019","unstructured":"Kelvin Xu, Ellis Ratner, Anca Dragan, Sergey Levine, and Chelsea Finn. 2019. Learning a prior over intent via meta-inverse reinforcement learning. In International Conference on Machine Learning. PMLR, 6952--6962."},{"key":"e_1_3_2_2_58_1","unstructured":"Lantao Yu Tianhe Yu Chelsea Finn and Stefano Ermon. 2019. Meta-inverse reinforcement learning with probabilistic context variables. Advances in Neural Information Processing Systems 32 (2019)"}],"event":{"name":"HRI '23: ACM\/IEEE International Conference on Human-Robot Interaction","location":"Stockholm Sweden","acronym":"HRI '23","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence","SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 2023 ACM\/IEEE International Conference on Human-Robot Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3568162.3576989","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3568162.3576989","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T21:26:16Z","timestamp":1750281976000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3568162.3576989"}},"subtitle":["Similarity-based Implicit Representation Learning"],"short-title":[],"issued":{"date-parts":[[2023,3,13]]},"references-count":58,"alternative-id":["10.1145\/3568162.3576989","10.1145\/3568162"],"URL":"https:\/\/doi.org\/10.1145\/3568162.3576989","relation":{},"subject":[],"published":{"date-parts":[[2023,3,13]]},"assertion":[{"value":"2023-03-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}