{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T18:06:23Z","timestamp":1776881183981,"version":"3.51.2"},"reference-count":47,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1109\/icra57147.2024.10611095","type":"proceedings-article","created":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T17:51:05Z","timestamp":1723139465000},"page":"15039-15046","source":"Crossref","is-referenced-by-count":2,"title":["Bayesian Constraint Inference from User Demonstrations Based on Margin-Respecting Preference Models"],"prefix":"10.1109","author":[{"given":"Dimitris","family":"Papadimitriou","sequence":"first","affiliation":[{"name":"University of California at Berkeley,Department of Mechanical Engineering"}]},{"given":"Daniel S.","family":"Brown","sequence":"additional","affiliation":[{"name":"University of Utah,School of Computing"}]}],"member":"263","reference":[{"key":"ref1","first-page":"22","article-title":"Constrained policy optimization","volume-title":"International conference on machine learning","author":"Achiam"},{"key":"ref2","first-page":"15 666","article-title":"A simple reward-free approach to constrained reinforcement learning","volume-title":"International Conference on Machine Learning","author":"Miryoosefi"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2021.103500"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref5","first-page":"2","article-title":"Algorithms for inverse reinforcement learning","volume":"1","author":"Ng","year":"2000","journal-title":"Icml"},{"key":"ref6","author":"Wulfmeier","year":"2015","journal-title":"Maximum entropy deep inverse reinforcement learning"},{"key":"ref7","first-page":"1433","article-title":"Maximum entropy inverse reinforcement learning","volume-title":"Aaai","volume":"8","author":"Ziebart","year":"2008"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-04174-7_3"},{"key":"ref9","first-page":"2586","article-title":"Bayesian inverse reinforcement learning","volume-title":"IJCAI","volume":"7","author":"Ramachandran"},{"key":"ref10","first-page":"783","article-title":"Extrapolating beyond suboptimal demonstrations via inverse reinforcement learning from observations","volume-title":"International conference on machine learning","author":"Brown"},{"key":"ref11","article-title":"Inverse reinforcement learning through structured classification","volume":"25","author":"Klein","year":"2012","journal-title":"Advances in neural information processing systems"},{"key":"ref12","author":"Scobee","year":"2019","journal-title":"Maximum likelihood constraint inference for inverse reinforcement learning"},{"key":"ref13","article-title":"Bayesian methods for constraint inference in reinforcement learning","author":"Papadimitriou","journal-title":"Transactions on Machine Learning Research"},{"key":"ref14","first-page":"7390","article-title":"Inverse constrained reinforcement learning","volume-title":"International Conference on Machine Learning","author":"Malik"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.713"},{"key":"ref16","author":"Liu","year":"2016","journal-title":"Large-margin soft-max loss for convolutional neural networks"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2018.2822810"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.2974427"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CDC49753.2023.10383197"},{"key":"ref20","author":"Gaurav","year":"2022","journal-title":"Learning soft constraints from constrained expert demonstrations"},{"key":"ref21","author":"Baert","year":"2023","journal-title":"Maximum causal entropy inverse constrained reinforcement learning"},{"key":"ref22","author":"Kim","year":"2023","journal-title":"Learning shared safety constraints from multi-task demonstrations"},{"key":"ref23","author":"Lindner","year":"2023","journal-title":"Learning safety constraints from demonstrations with unknown rewards"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10341801"},{"issue":"136","key":"ref25","first-page":"1","article-title":"A survey of preference-based reinforcement learning methods","volume":"18","author":"Wirth","year":"2017","journal-title":"Journal of Machine Learning Research"},{"key":"ref26","article-title":"B-pref: Bench-marking preference-based reinforcement learning","volume-title":"Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 1)","author":"Lee"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-23780-5_11"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.2307\/2334029"},{"key":"ref29","article-title":"Deep reinforcement learning from human preferences","volume":"30","author":"Christiano","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref30","first-page":"1165","article-title":"Safe imitation learning via fast bayesian reward inference from preferences","volume-title":"International Conference on Machine Learning","author":"Brown"},{"key":"ref31","author":"Lee","year":"2021","journal-title":"Pebble: Feedback-efficient interactive reinforcement learning via relabeling experience and unsupervised pre-training"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3568162.3576989"},{"key":"ref33","article-title":"Direct preference optimization: Your language model is secretly a reward model","volume":"36","author":"Rafailov","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref34","first-page":"342","article-title":"Learning multi-modal rewards from rankings","volume-title":"Conference on Robot Learning","author":"Myers"},{"key":"ref35","author":"Shin","year":"2023","journal-title":"Benchmarks and algorithms for offline preference-based reward learning"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161081"},{"key":"ref37","author":"Wilde","year":"2021","journal-title":"Learning reward functions from scale feedback"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2005.1470372"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/s10589-016-9847-8"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/168"},{"key":"ref41","author":"Teso","year":"2016","journal-title":"Constructive preference elicitation by setwise max-margin learning"},{"key":"ref42","first-page":"1254","article-title":"Non-gaussian discriminative factor models via the max-margin rank-likelihood","volume-title":"International Conference on Machine Learning","author":"Yuan"},{"key":"ref43","article-title":"Gymnasium robotics","author":"de Lazcano","year":"2023"},{"key":"ref44","author":"Haarnoja","year":"2018","journal-title":"Soft actor-critic algorithms and applications"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref46","first-page":"1597","article-title":"A simple framework for contrastive learning of visual representations","volume-title":"International conference on machine learning","author":"Chen"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1145\/3583131.3590443"}],"event":{"name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","location":"Yokohama, Japan","start":{"date-parts":[[2024,5,13]]},"end":{"date-parts":[[2024,5,17]]}},"container-title":["2024 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10609961\/10609862\/10611095.pdf?arnumber=10611095","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,11]],"date-time":"2024-08-11T04:11:21Z","timestamp":1723349481000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10611095\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":47,"URL":"https:\/\/doi.org\/10.1109\/icra57147.2024.10611095","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]}}}