{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T04:26:54Z","timestamp":1773376014261,"version":"3.50.1"},"reference-count":44,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001729","name":"Swedish Foundation for Strategic Research","doi-asserted-by":"publisher","award":["SSF FFL18-0199"],"award-info":[{"award-number":["SSF FFL18-0199"]}],"id":[{"id":"10.13039\/501100001729","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004063","name":"Knut and Alice Wallenberg Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004063","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,5,29]]},"DOI":"10.1109\/icra48891.2023.10161261","type":"proceedings-article","created":{"date-parts":[[2023,7,4]],"date-time":"2023-07-04T17:20:56Z","timestamp":1688491256000},"page":"7562-7568","source":"Crossref","is-referenced-by-count":14,"title":["Aligning Human Preferences with Baseline Objectives in Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Daniel","family":"Marta","sequence":"first","affiliation":[{"name":"School of Electrical Engineering and Computer Science, KTH Royal Institute of Technology,Division of Robotics, Perception and Learning,Stockholm,Sweden,11428"}]},{"given":"Simon","family":"Holk","sequence":"additional","affiliation":[{"name":"School of Electrical Engineering and Computer Science, KTH Royal Institute of Technology,Division of Robotics, Perception and Learning,Stockholm,Sweden,11428"}]},{"given":"Christian","family":"Pek","sequence":"additional","affiliation":[{"name":"School of Electrical Engineering and Computer Science, KTH Royal Institute of Technology,Division of Robotics, Perception and Learning,Stockholm,Sweden,11428"}]},{"given":"Jana","family":"Tumova","sequence":"additional","affiliation":[{"name":"School of Electrical Engineering and Computer Science, KTH Royal Institute of Technology,Division of Robotics, Perception and Learning,Stockholm,Sweden,11428"}]},{"given":"Iolanda","family":"Leite","sequence":"additional","affiliation":[{"name":"School of Electrical Engineering and Computer Science, KTH Royal Institute of Technology,Division of Robotics, Perception and Learning,Stockholm,Sweden,11428"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Continuous control with deep reinforcement learning","author":"Lillicrap","year":"2015","journal-title":"arXiv preprint"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8461039"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460655"},{"key":"ref4","first-page":"839","article-title":"Motivating physical activity via competitive human-robot interaction","volume-title":"Conference on Robot Learning","author":"Yang"},{"key":"ref5","article-title":"Concrete problems in ai safety","author":"Amodei","year":"2016","journal-title":"arXiv preprint"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.12360"},{"key":"ref7","first-page":"2285","article-title":"Interactive learning from policy-dependent human feedback","volume-title":"Int. Conf. on Machine Learning","author":"MacGlashan"},{"key":"ref8","article-title":"Deep reinforcement learning from human preferences","volume-title":"Advances in neural information processing systems","volume":"30","author":"Christiano","year":"2017"},{"key":"ref9","article-title":"Nonverbal robot feedback for human teachers","author":"Huang","year":"2019","journal-title":"arXiv preprint"},{"key":"ref10","article-title":"Reinforcement learning and the reward engineering principle","volume-title":"2014 AAAI Spring Symposium Series","author":"Dewey","year":"2014"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-022-09552-y"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/s10676-017-9440-6"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevE.51.4282"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202312"},{"key":"ref15","first-page":"663","article-title":"Algorithms for inverse reinforcement learning","volume-title":"Int. Conf. on Machine Learning","author":"Ng","year":"2000"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref17","first-page":"1433","article-title":"Maximum entropy inverse reinforcement learning","volume-title":"AAAI","author":"Ziebart","year":"2008"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2017.xiii.053"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.048"},{"key":"ref20","first-page":"6952","article-title":"Learning a prior over intent via meta-inverse reinforcement learning","volume-title":"Int. Conf. on Machine Learning","author":"Xu","year":"2019"},{"key":"ref21","first-page":"8020","article-title":"Learning human objectives by evaluating hypothetical behavior","volume-title":"Int. Conf. on Machine Learning","author":"Reddy","year":"2020"},{"key":"ref22","volume-title":"Data-efficient visuomotor policy training using reinforcement learning and generative models","author":"Ghadirzadeh","year":"2020"},{"key":"ref23","first-page":"2479","article-title":"Bayesian robust optimization for imitation learning","volume":"33","author":"Brown","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref24","first-page":"4785","article-title":"Policy gradient bayesian robust optimization for imitation learning","volume-title":"International Conference on Machine Learning","author":"Javed"},{"key":"ref25","first-page":"1259","article-title":"Skill preferences: Learning to extract and execute robotic skills from human feedback","volume-title":"Conference on Robot Learning","author":"Wang"},{"key":"ref26","article-title":"The empathic framework for task learning from implicit human feedback","author":"Cui","year":"2020","journal-title":"arXiv preprint"},{"key":"ref27","first-page":"1279","article-title":"Co-GAIL: Learning diverse strategies for human-robot collaboration","volume-title":"Conference on Robot Learning","author":"Wang"},{"key":"ref28","first-page":"513","article-title":"Learning backchanneling behaviors for a social robot via data augmentation from human-human conversations","volume-title":"Conference on Robot Learning","author":"Murray"},{"key":"ref29","first-page":"740","article-title":"My house, my rules: Learning tidying preferences with graph neural networks","volume-title":"Proceedings of the 5th Conference on Robot Learning","author":"Kapelyukh","year":"2022"},{"key":"ref30","article-title":"Repeated inverse reinforcement learning","volume-title":"Advances in neural information processing systems","volume":"30","author":"Amin","year":"2017"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-44051-0_14"},{"key":"ref32","first-page":"1211","article-title":"Learning parametric constraints in high dimensions from demonstrations","volume-title":"Conference on Robot Learning","author":"Chou"},{"issue":"136","key":"ref33","first-page":"1","article-title":"A survey of preference-based reinforcement learning methods","volume":"18","author":"Wirth","year":"2017","journal-title":"Journal of Machine Learning Research"},{"key":"ref34","article-title":"Reward learning from human preferences and demonstrations in atari","volume":"31","author":"Ibarz","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2019.XV.023"},{"key":"ref36","article-title":"PEBBLE: Feedback-efficient interactive reinforcement learning via relabeling experience and unsupervised pre-training","author":"Lee","year":"2021","journal-title":"arXiv preprint"},{"key":"ref37","article-title":"Maximum likelihood constraint inference for inverse reinforcement learning","author":"Scobee","year":"2019","journal-title":"arXiv preprint"},{"key":"ref38","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv preprint"},{"key":"ref39","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"Int. Conf. on machine learning","author":"Schulman"},{"key":"ref40","first-page":"11","article-title":"A distributional view on multi-objective policy optimization","volume-title":"International Conference on Machine Learning","author":"Abdolmaleki"},{"key":"ref41","article-title":"On multi-objective policy optimization as a tool for reinforcement learning","author":"Abdolmaleki","year":"2021","journal-title":"arXiv preprint"},{"key":"ref42","volume-title":"Unity: A general platform for intelligent agents","author":"Juliani","year":"2020"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3128237"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1098\/rspb.2009.0405"}],"event":{"name":"2023 IEEE International Conference on Robotics and Automation (ICRA)","location":"London, United Kingdom","start":{"date-parts":[[2023,5,29]]},"end":{"date-parts":[[2023,6,2]]}},"container-title":["2023 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10160211\/10160212\/10161261.pdf?arnumber=10161261","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,1]],"date-time":"2024-03-01T11:45:22Z","timestamp":1709293522000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10161261\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,29]]},"references-count":44,"URL":"https:\/\/doi.org\/10.1109\/icra48891.2023.10161261","relation":{},"subject":[],"published":{"date-parts":[[2023,5,29]]}}}