{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T15:25:10Z","timestamp":1730301910838,"version":"3.28.0"},"reference-count":32,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,6,24]],"date-time":"2024-06-24T00:00:00Z","timestamp":1719187200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,6,24]],"date-time":"2024-06-24T00:00:00Z","timestamp":1719187200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,6,24]]},"DOI":"10.1109\/ur61395.2024.10597501","type":"proceedings-article","created":{"date-parts":[[2024,7,26]],"date-time":"2024-07-26T17:23:58Z","timestamp":1722014638000},"page":"220-226","source":"Crossref","is-referenced-by-count":1,"title":["Integrating Human Expertise in Continuous Spaces: A Novel Interactive Bayesian Optimization Framework with Preference Expected Improvement"],"prefix":"10.1109","author":[{"given":"Nikolaus","family":"Feith","sequence":"first","affiliation":[{"name":"Montanuniversit&#x00E4;t,Chair of Cyber-Physical Systems,Leoben,Austria,8700"}]},{"given":"Elmar","family":"Rueckert","sequence":"additional","affiliation":[{"name":"Montanuniversit&#x00E4;t,Chair of Cyber-Physical Systems,Leoben,Austria,8700"}]}],"member":"263","reference":[{"issue":"1","key":"ref1","first-page":"4945","article-title":"A survey of preference-based reinforcement learning methods","volume":"18","author":"Wirth","year":"2017","journal-title":"The Journal of Machine Learning Research"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33486-3_8"},{"key":"ref3","article-title":"Deep reinforcement learning from human preferences","volume":"30","author":"Christiano","year":"2017","journal-title":"Advances in neural information processing systems"},{"article-title":"Preference Exploration for Efficient Bayesian Optimization with Multiple Outcomes","volume-title":"International Conference on Artificial Intelligence and Statistics","author":"Lin","key":"ref4"},{"key":"ref5","article-title":"On Preference Learning Based on Sequential Bayesian Optimization with Pairwise Comparison","author":"Ignatenko","year":"2022","journal-title":"(ArXiv)"},{"key":"ref6","article-title":"Interactive Machine Learning: A State of the Art Review","author":"Wondimu","year":"2022","journal-title":"(ArXiv)"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3357236.3395525"},{"key":"ref8","first-page":"2625","article-title":"Policy Shaping: Integrating Human Feedback with Reinforcement Learning","volume-title":"Proc. 26th International Conference on Neural Information Processing Systems","volume":"2","author":"Griffith"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/1597735.1597738"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/iros47612.2022.9982282"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-012-0412-6"},{"key":"ref12","article-title":"Reinforcement learning from simultaneous human and MDP reward","volume-title":"AAMAS","volume":"1004","author":"Knox","year":"2012"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11485"},{"key":"ref14","article-title":"DQN-TAMER: Human-in-the-Loop Reinforcement Learning with Intractable Feedback","author":"Arakawa","year":"2018","journal-title":"(ArXiv)"},{"key":"ref15","first-page":"2285","article-title":"Interactive Learning from Policy-Dependent Human Feedback","volume-title":"Proc. 34th International Conference on Machine Learning","author":"MacGlashan"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-33950-0_31"},{"key":"ref17","article-title":"Preference-based reinforcement learning: A preliminary survey","volume-title":"Proceedings of the ECML\/PKDD-13 Workshop on Reinforcement Learning from Generalized Feedback: Beyond Numeric Rewards","author":"Wirth","year":"2013"},{"key":"ref18","article-title":"A bayesian approach for policy learning from trajectory preference queries","volume":"25","author":"Wilson","year":"2012","journal-title":"Advances in neural information processing systems"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10269"},{"article-title":"PEBBLE: Feedback-Efficient Interactive Reinforcement Learning via Relabeling Experience and Unsupervised Pre-training","volume-title":"Proc. 2021 International Conference on Machine Learning","author":"Lee","key":"ref20"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/icra48891.2023.10161081"},{"article-title":"Few-shot preference learning for human-in-the-loop rl","volume-title":"Conference on Robot Learning","author":"Hejna","key":"ref22"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161261"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-024-06543-w"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.3389\/fncom.2012.00097"},{"key":"ref26","article-title":"A Tutorial on Bayesian Optimization","author":"Frazier","year":"2018","journal-title":"(ArXiv)"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1017\/9781108348973"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/IISA52424.2021.9555522"},{"journal-title":"OpenAI Gym","year":"2016","author":"Brockman","key":"ref29"},{"journal-title":"DeepMind Control Suite","year":"2018","author":"Tassa","key":"ref30"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/4-431-31381-8_23"},{"key":"ref32","article-title":"Probabilistic movement primitive","volume":"26","author":"Paraschos","year":"2013","journal-title":"Advances in neural information processing systems"}],"event":{"name":"2024 21st International Conference on Ubiquitous Robots (UR)","start":{"date-parts":[[2024,6,24]]},"location":"New York, NY, USA","end":{"date-parts":[[2024,6,27]]}},"container-title":["2024 21st International Conference on Ubiquitous Robots (UR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10597436\/10597437\/10597501.pdf?arnumber=10597501","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,27]],"date-time":"2024-07-27T05:11:44Z","timestamp":1722057104000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10597501\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,24]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/ur61395.2024.10597501","relation":{},"subject":[],"published":{"date-parts":[[2024,6,24]]}}}