{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T14:31:56Z","timestamp":1763389916904,"version":"3.32.0"},"reference-count":29,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,10,14]],"date-time":"2024-10-14T00:00:00Z","timestamp":1728864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,10,14]],"date-time":"2024-10-14T00:00:00Z","timestamp":1728864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,10,14]]},"DOI":"10.1109\/iros58592.2024.10801540","type":"proceedings-article","created":{"date-parts":[[2024,12,25]],"date-time":"2024-12-25T19:17:39Z","timestamp":1735154259000},"page":"9254-9261","source":"Crossref","is-referenced-by-count":2,"title":["Interactive Reward Tuning: Interactive Visualization for Preference Elicitation"],"prefix":"10.1109","author":[{"given":"Danqing","family":"Shi","sequence":"first","affiliation":[{"name":"Aalto University,Finland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shibei","family":"Zhu","sequence":"additional","affiliation":[{"name":"Aalto University,Finland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tino","family":"Weinkauf","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology,Sweden"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Antti","family":"Oulasvirta","sequence":"additional","affiliation":[{"name":"Aalto University,Finland"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","first-page":"2601","article-title":"Where do rewards come from?","volume-title":"Proceedings of the Annual Conference of the Cognitive Science Society","author":"Singh"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/BF00114727"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/2449396.2449422"},{"key":"ref4","article-title":"Behavior alignment via reward function optimization","volume":"36","author":"Gupta","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8968092"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/tnn.1998.712192"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2022.103829"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2017.XIII.053"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2019.XV.023"},{"issue":"136","key":"ref10","article-title":"A survey of preference-based reinforcement learning methods","volume":"18","author":"Wirth","year":"2017","journal-title":"Journal of Machine Learning Research"},{"key":"ref11","first-page":"1142","article-title":"A bayesian approach for policy learning from trajectory preference queries","volume":"25","author":"Wilson","year":"2012","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3386569.3392444"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/VL.1996.545307"},{"key":"ref14","first-page":"1255","article-title":"Modeling interaction via the principle of maximum causal entropy","volume-title":"ICML \u201910: Proceedings of the 27th International Conference on Machine Learning","author":"Ziebart"},{"article-title":"The Boltzmann policy distribution: Accounting for systematic suboptimality in human models","volume-title":"International Conference on Learning Representations","author":"Laidlaw","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33486-3_8"},{"key":"ref17","first-page":"663","article-title":"Algorithms for inverse reinforcement learning","volume-title":"ICML \u201900: Proceedings of the Seventeenth International Conference on Machine Learning","author":"Ng"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-012-5313-8"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390162"},{"issue":"1","key":"ref21","first-page":"3253","article-title":"Linear fitted-Q iteration with multiple reward functions","volume":"13","author":"Lizotte","year":"2012","journal-title":"The Journal of Machine Learning Research"},{"key":"ref22","first-page":"70","article-title":"FastDTW: Toward accurate dynamic time warping in linear time and space","volume-title":"KDD Workshop on Mining Temporal and Sequential Data","volume":"6","author":"Salvador"},{"issue":"11","key":"ref23","first-page":"2579","article-title":"Visualizing data using t-SNE","volume":"9","author":"van der Maaten","year":"2008","journal-title":"Journal of Machine Learning Research"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICVRV.2016.33"},{"journal-title":"Dynamics-aware unsupervised discovery of skills","year":"2019","author":"Sharma","key":"ref25"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2023.xix.019"},{"key":"ref27","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proceedings of the 35th International Conference on Machine Learning","author":"Haarnoja"},{"journal-title":"Ordered preference elicitation strategies for supporting multi-objective decision making","year":"2018","author":"Zintgraf","key":"ref28"},{"key":"ref29","first-page":"1133","article-title":"Deep reinforcement learning from human preferences","volume":"30","author":"Christiano","year":"2017","journal-title":"Advances in Neural Information Processing Systems"}],"event":{"name":"2024 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2024,10,14]]},"location":"Abu Dhabi, United Arab Emirates","end":{"date-parts":[[2024,10,18]]}},"container-title":["2024 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10801246\/10801290\/10801540.pdf?arnumber=10801540","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,26]],"date-time":"2024-12-26T06:57:31Z","timestamp":1735196251000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10801540\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,14]]},"references-count":29,"URL":"https:\/\/doi.org\/10.1109\/iros58592.2024.10801540","relation":{},"subject":[],"published":{"date-parts":[[2024,10,14]]}}}