{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T18:19:12Z","timestamp":1773771552105,"version":"3.50.1"},"reference-count":46,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,10,1]],"date-time":"2023-10-01T00:00:00Z","timestamp":1696118400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,10,1]],"date-time":"2023-10-01T00:00:00Z","timestamp":1696118400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100004270","name":"KTH","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004270","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001729","name":"Swedish Foundation for Strategic Research","doi-asserted-by":"publisher","award":["SSF FFL18-0199"],"award-info":[{"award-number":["SSF FFL18-0199"]}],"id":[{"id":"10.13039\/501100001729","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004063","name":"Knut and Alice Wallenberg Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004063","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,10,1]]},"DOI":"10.1109\/iros55552.2023.10341795","type":"proceedings-article","created":{"date-parts":[[2023,12,13]],"date-time":"2023-12-13T14:17:55Z","timestamp":1702477075000},"page":"7878-7885","source":"Crossref","is-referenced-by-count":12,"title":["VARIQuery: VAE Segment-Based Active Learning for Query Selection in Preference-Based Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Daniel","family":"Marta","sequence":"first","affiliation":[{"name":"School of Electrical Engineering and Computer Science, KTH Royal Institute of Technology,Division of Robotics, Perception and Learning,Stockholm,Sweden,114 28"}]},{"given":"Simon","family":"Holk","sequence":"additional","affiliation":[{"name":"School of Electrical Engineering and Computer Science, KTH Royal Institute of Technology,Division of Robotics, Perception and Learning,Stockholm,Sweden,114 28"}]},{"given":"Christian","family":"Pek","sequence":"additional","affiliation":[{"name":"School of Electrical Engineering and Computer Science, KTH Royal Institute of Technology,Division of Robotics, Perception and Learning,Stockholm,Sweden,114 28"}]},{"given":"Jana","family":"Tumova","sequence":"additional","affiliation":[{"name":"School of Electrical Engineering and Computer Science, KTH Royal Institute of Technology,Division of Robotics, Perception and Learning,Stockholm,Sweden,114 28"}]},{"given":"Iolanda","family":"Leite","sequence":"additional","affiliation":[{"name":"School of Electrical Engineering and Computer Science, KTH Royal Institute of Technology,Division of Robotics, Perception and Learning,Stockholm,Sweden,114 28"}]}],"member":"263","reference":[{"key":"ref1","first-page":"2285","article-title":"Interactive learning from policy-dependent human feedback","volume-title":"Int. Conf. on Machine Learning","author":"MacGlashan"},{"key":"ref2","article-title":"Nonverbal robot feedback for human teachers","author":"Huang","year":"2019","journal-title":"arXiv preprint"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33486-3_8"},{"issue":"136","key":"ref4","first-page":"1","article-title":"A survey of preference-based reinforcement learning methods","volume":"18","author":"Wirth","year":"2017","journal-title":"Journal of Machine Learning Research"},{"key":"ref5","article-title":"Deep reinforcement learning from human preferences","volume-title":"Advances in neural information processing systems","volume":"30","author":"Christiano","year":"2017"},{"key":"ref6","article-title":"Reward learning from human preferences and demonstrations in atari","volume-title":"Advances in neural information processing systems","volume":"31","author":"Ibarz","year":"2018"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.tcs.2010.12.054"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1561\/2200000056"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3472291"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2018.2874225"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206627"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/130385.130417"},{"key":"ref13","article-title":"Multiple-instance active learning","volume-title":"Advances in neural information processing systems","volume":"20","author":"Settles","year":"2007"},{"key":"ref14","first-page":"441","article-title":"Toward optimal active learning through monte carlo estimation of error reduction","volume-title":"ICML","volume":"2","author":"Roy"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10593-2_37"},{"key":"ref16","first-page":"9","article-title":"Link-based active learning","volume-title":"NIPS Workshop on Analyzing Networks and Learning with Graphs","volume":"4","author":"Bilgic","year":"2009"},{"key":"ref17","article-title":"Active instance sampling via matrix partition","volume-title":"Advances in Neural Information Processing Systems","volume":"23","author":"Guo","year":"2010"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015349"},{"key":"ref19","first-page":"1308","article-title":"Deep active learning: Unified and principled method for query and training","volume-title":"International Conference on Artificial Intelligence and Statistics","author":"Shui"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2017.67"},{"key":"ref21","article-title":"Diverse mini-batch active learning","author":"Zhdanov","year":"2019","journal-title":"arXiv preprint"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2019.00236"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2017.8297020"},{"key":"ref24","article-title":"Adversarial active learning for deep networks: a margin based approach","author":"Ducoffe","year":"2018","journal-title":"arXiv preprint"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00607"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00807"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00018"},{"key":"ref28","article-title":"Ranking cgans: Subjective control over semantic image attributes","volume-title":"British Machine Vision Conference","author":"Saquil","year":"2018"},{"key":"ref29","article-title":"Few-shot preference learning for human-in-the-loop rl","author":"Hejna","year":"2022","journal-title":"arXiv preprint"},{"key":"ref30","article-title":"A bayesian approach for policy learning from trajectory preference queries","volume-title":"Advances in neural information processing systems","volume":"25","author":"Wilson","year":"2012"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-23780-5_11"},{"issue":"32","key":"ref32","first-page":"1503","article-title":"Programming by feedback","volume-title":"International Conference on Machine Learning","author":"Akrour"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-012-5313-8"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1177\/0278364915581193"},{"key":"ref35","article-title":"Active comparison based learning incorporating user uncertainty and noise","volume-title":"RSS Workshop on Model Learning for Human-Robot Communication","author":"Holladay","year":"2016"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2017.xiii.053"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2020.xvi.041"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1177\/02783649211041652"},{"key":"ref39","article-title":"Pebble: Feedback-efficient interactive reinforcement learning via relabeling experience and unsupervised pre-training","author":"Lee","year":"2021","journal-title":"arXiv preprint"},{"key":"ref40","first-page":"18 459","article-title":"Behavior from the void: Unsupervised active pre-training","volume-title":"Advances in Neural Information Processing Systems","volume":"34","author":"Liu","year":"2021"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-44816-0_31"},{"key":"ref42","article-title":"Optimal bayesian recommendation sets and myopically optimal choice query sets","volume-title":"Advances in neural information processing systems","volume":"23","author":"Viappiani","year":"2010"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-49257-7_15"},{"key":"ref44","article-title":"Openai gym","author":"Brockman","year":"2016","journal-title":"arXiv preprint"},{"key":"ref45","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv preprint"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3128237"}],"event":{"name":"2023 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","location":"Detroit, MI, USA","start":{"date-parts":[[2023,10,1]]},"end":{"date-parts":[[2023,10,5]]}},"container-title":["2023 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10341341\/10341342\/10341795.pdf?arnumber=10341795","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,19]],"date-time":"2023-12-19T19:14:41Z","timestamp":1703013281000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10341795\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,1]]},"references-count":46,"URL":"https:\/\/doi.org\/10.1109\/iros55552.2023.10341795","relation":{},"subject":[],"published":{"date-parts":[[2023,10,1]]}}}