{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T13:48:18Z","timestamp":1765547298985,"version":"3.28.0"},"reference-count":75,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1109\/icra57147.2024.10610534","type":"proceedings-article","created":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T17:51:05Z","timestamp":1723139465000},"page":"9585-9592","source":"Crossref","is-referenced-by-count":3,"title":["SEQUEL: Semi-Supervised Preference-based RL with Query Synthesis via Latent Interpolation"],"prefix":"10.1109","author":[{"given":"Daniel","family":"Marta","sequence":"first","affiliation":[{"name":"KTH Royal Institute of Technology,Division of Robotics, Perception and Learning, School of Electrical Engineering and Computer Science,Stockholm,Sweden,114 28"}]},{"given":"Simon","family":"Holk","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology,Division of Robotics, Perception and Learning, School of Electrical Engineering and Computer Science,Stockholm,Sweden,114 28"}]},{"given":"Christian","family":"Pek","sequence":"additional","affiliation":[{"name":"TU Delft,Dept. of Cognitive Robotics,Delft,The Netherlands,2628 CD"}]},{"given":"Iolanda","family":"Leite","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology,Division of Robotics, Perception and Learning, School of Electrical Engineering and Computer Science,Stockholm,Sweden,114 28"}]}],"member":"263","reference":[{"key":"ref1","first-page":"2285","article-title":"Interactive learning from policydependent human feedback","volume-title":"Int. Conf. on Machine Learning","author":"MacGlashan"},{"article-title":"A dual representation framework for robot learning with human guidance","volume-title":"6th Annual Conference on Robot Learning","author":"Zhang","key":"ref2"},{"article-title":"Offline reinforcement learning for visual navigation","volume-title":"6th Annual Conference on Robot Learning","author":"Shah","key":"ref3"},{"key":"ref4","first-page":"40","article-title":"Few-shot goal inference for visuomotor learning and planning","volume-title":"Conference on Robot Learning","author":"Xie"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10341795"},{"key":"ref6","article-title":"How to talk so AI will learn: Instructions, descriptions, and autonomy","volume-title":"Advances in Neural Information Processing Systems","author":"Sumers","year":"2022"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3610977.3634970"},{"key":"ref8","first-page":"783","article-title":"Extrapolating beyond suboptimal demonstrations via inverse reinforcement learning from observations","volume-title":"International conference on machine learning","author":"Brown"},{"key":"ref9","first-page":"330","article-title":"Better-than-demonstrator imitation learning via automatically-ranked demonstrations","volume-title":"Conference on robot learning","author":"Brown"},{"article-title":"INQUIRE: INteractive querying for user-aware informative REasoning","volume-title":"6th Annual Conference on Robot Learning","author":"Fitzgerald","key":"ref10"},{"article-title":"Adversarial imitation learning with preferences","volume-title":"The Eleventh International Conference on Learning Representations","author":"Taranovic","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1177\/02783649211041652"},{"key":"ref13","article-title":"Reward learning from human preferences and demonstrations in atari","volume":"31","author":"Ibarz","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161261"},{"issue":"136","key":"ref15","first-page":"1","article-title":"A survey of preference-based reinforcement learning methods","volume":"18","author":"Wirth","year":"2017","journal-title":"Journal of Machine Learning Research"},{"key":"ref16","article-title":"Deep reinforcement learning from human preferences","volume":"30","author":"Christiano","year":"2017","journal-title":"Advances in neural information processing systems"},{"article-title":"Deep bayesian reward learning from preferences","year":"2019","author":"Brown","key":"ref17"},{"key":"ref18","first-page":"342","article-title":"Learning multimodal rewards from rankings","volume-title":"Conference on Robot Learning","author":"Myers"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/HRI53351.2022.9889398"},{"key":"ref20","first-page":"4415","article-title":"Reward-rational (implicit) choice: A unifying formalism for reward learning","volume":"33","author":"Jeon","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref21","first-page":"429","article-title":"Less is more: Rethinking probabilistic models of human behavior","volume-title":"Proceedings of the 2020 acm\/IEEE international conference on humanrobot interaction","author":"Bobu"},{"article-title":"B-pref: Benchmarking preference-based reinforcement learning","year":"2021","author":"Lee","key":"ref22"},{"key":"ref23","first-page":"2014","article-title":"Few-shot preference learning for human-in-the-loop rl","volume-title":"Conference on Robot Learning","author":"Hejna III"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-72816-0_16456"},{"key":"ref25","article-title":"Inverse reward design","volume":"30","author":"Hadfield-Menell","year":"2017","journal-title":"Advances in neural information processing systems"},{"article-title":"Concrete problems in ai safety","year":"2016","author":"Amodei","key":"ref26"},{"volume-title":"Causation, prediction, and search","year":"2000","author":"Spirtes","key":"ref27"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/s41060-016-0038-6"},{"key":"ref29","article-title":"Causal confusion in imitation learning","volume":"32","author":"De Haan","year":"2019","journal-title":"Advances in Neural Information Processing Systems"},{"author":"Tien","key":"ref30","article-title":"A study of causal confusion in preference-based reward learning"},{"article-title":"Causal confusion and reward misidentification in preferencebased reward learning","volume-title":"The Eleventh International Conference on Learning Representations","author":"Tien","key":"ref31"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3201576"},{"key":"ref33","article-title":"Recurrent world models facilitate policy evolution","volume":"31","author":"Ha","year":"2018","journal-title":"Advances in neural information processing systems"},{"article-title":"Dream to control: Learning behaviors by latent imagination","volume-title":"International Conference on Learning Representations","author":"Hafner","key":"ref34"},{"key":"ref35","first-page":"8583","article-title":"Planning to explore via self-supervised world models","volume-title":"International Conference on Machine Learning","author":"Sekar"},{"article-title":"Understanding and improving interpolation in autoencoders via an adversarial regularizer","year":"2018","author":"Berthelot","key":"ref36"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/k16-1002"},{"article-title":"A neural representation of sketch drawings","year":"2017","author":"Ha","key":"ref38"},{"article-title":"Adversarially learned inference","year":"2016","author":"Dumoulin","key":"ref39"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-23780-5_11"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33486-3_8"},{"key":"ref42","first-page":"1503","article-title":"Programming by feedback","volume-title":"International Conference on Machine Learning","author":"Akrour"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2011.6094735"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2017.xiii.053"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2020.xvi.041"},{"key":"ref46","first-page":"1259","article-title":"Skill preferences: Learning to extract and execute robotic skills from human feedback","volume-title":"Conference on Robot Learning","author":"Wang"},{"article-title":"Reward uncertainty for exploration in preference-based reinforcement learning","volume-title":"International Conference on Learning Representations","author":"Liang","key":"ref47"},{"article-title":"SURF: Semi-supervised reward learning with data augmentation for feedbackefficient preference-based reinforcement learning","volume-title":"International Conference on Learning Representations","author":"Park","key":"ref48"},{"key":"ref49","article-title":"Explaining preferences with shapley values","volume-title":"Advances in Neural Information Processing Systems","author":"Hu","year":"2022"},{"key":"ref50","article-title":"Meta-reward-net: Implicitly differentiable reward learning for preference-based reinforcement learning","volume-title":"Advances in Neural Information Processing Systems","author":"Liu","year":"2022"},{"key":"ref51","article-title":"Benchmarks and algorithms for offline preference-based reward learning","author":"Shin","year":"2022","journal-title":"Transactions on Machine Learning Research"},{"key":"ref52","article-title":"Semi-supervised learning by entropy minimization","volume":"17","author":"Grandvalet","year":"2004","journal-title":"Advances in neural information processing systems"},{"key":"ref53","first-page":"596","article-title":"Fixmatch: Simplifying semisupervised learning with consistency and confidence","volume":"33","author":"Sohn","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref54","first-page":"896","article-title":"Pseudo-label: The simple and efficient semisupervised learning method for deep neural networks","volume-title":"Workshop on challenges in representation learning, ICML","volume":"3","author":"Lee"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1056\/NEJMoa1210384"},{"key":"ref56","first-page":"6256","article-title":"Unsupervised data augmentation for consistency training","volume":"33","author":"Xie","year":"2020","journal-title":"Advances in neural information processing systems"},{"article-title":"Image augmentation is all you need: Regularizing deep reinforcement learning from pixels","volume-title":"International Conference on Learning Representations","author":"Yarats","key":"ref57"},{"key":"ref58","first-page":"19 884","article-title":"Reinforcement learning with augmented data","volume":"33","author":"Laskin","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref59","first-page":"5639","article-title":"Curl: Contrastive unsupervised representations for reinforcement learning","volume-title":"International Conference on Machine Learning","author":"Laskin"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561333"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00433"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01070"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5763"},{"key":"ref64","article-title":"A bayesian approach for policy learning from trajectory preference queries","volume":"25","author":"Wilson","year":"2012","journal-title":"Advances in neural information processing systems"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.2307\/2334029"},{"article-title":"Remixmatch: Semi-supervised learning with distribution matching and augmentation anchoring","volume-title":"International Conference on Learning Representations","author":"Berthelot","key":"ref66"},{"article-title":"Proximal policy optimization algorithms","year":"2017","author":"Schulman","key":"ref67"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1561\/2200000056"},{"article-title":"Openai gym","year":"2016","author":"Brockman","key":"ref69"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1016\/j.simpa.2020.100022"},{"key":"ref71","first-page":"1094","article-title":"Meta-world: A benchmark and evaluation for multi-task and meta reinforcement learning","volume-title":"Conference on robot learning","author":"Yu"},{"key":"ref72","first-page":"1861","article-title":"Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proceedings of the 35th International Conference on machine learning (ICML-18)","author":"Haarnoja"},{"article-title":"Adam: a method for stochastic optimization 3rd int","volume-title":"Conf. for Learning Representations, San","author":"Kingma","key":"ref73"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3128237"},{"article-title":"Unity: A general platform for intelligent agents","year":"2020","author":"Juliani","key":"ref75"}],"event":{"name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2024,5,13]]},"location":"Yokohama, Japan","end":{"date-parts":[[2024,5,17]]}},"container-title":["2024 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10609961\/10609862\/10610534.pdf?arnumber=10610534","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,10]],"date-time":"2024-08-10T05:50:51Z","timestamp":1723269051000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10610534\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":75,"URL":"https:\/\/doi.org\/10.1109\/icra57147.2024.10610534","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]}}}