{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T07:32:49Z","timestamp":1763191969817,"version":"3.45.0"},"reference-count":38,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1109\/ijcnn64981.2025.11228808","type":"proceedings-article","created":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T18:46:15Z","timestamp":1763145975000},"page":"1-8","source":"Crossref","is-referenced-by-count":0,"title":["FLIP: Adaptive Comparison Method Selection for Efficient Preference-Based Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Ziang","family":"Liu","sequence":"first","affiliation":[{"name":"School of Computer Science and Technology, East China Normal University"}]},{"given":"Xingjiao","family":"Wu","sequence":"additional","affiliation":[{"name":"School of Pharmacy, East China Normal University"}]},{"given":"Hongxin","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, East China Normal University"}]},{"given":"Luwei","family":"Xiao","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, East China Normal University"}]},{"given":"Jing","family":"Yang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, East China Normal University"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-021-04301-9"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-023-06419-4"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8461039"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460655"},{"key":"ref6","article-title":"Inverse reward design","volume":"30","author":"Hadfield-Menell","year":"2017","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref7","first-page":"21 406","article-title":"Avoiding side effects in complex environments","volume":"33","author":"Turner","year":"2020","journal-title":"NeurIPS"},{"key":"ref8","first-page":"9460","article-title":"Defining and characterizing reward gaming","volume":"35","author":"Skalse","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/s12559-024-10393-y"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-76827-9_11"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.289"},{"key":"ref12","article-title":"Deep reinforcement learning from human preferences","volume":"30","author":"Christiano","year":"2017","journal-title":"NeurIPS"},{"article-title":"Open problems and fundamental limitations of reinforcement learning from human feedback","year":"2023","author":"Casper","key":"ref13"},{"key":"ref14","first-page":"3008","article-title":"Learning to summarize with human feedback","volume":"33","author":"Stiennon","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2022.3204972"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2024.102726"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1177\/02783649211041652"},{"key":"ref18","first-page":"22 270","article-title":"Meta-reward-net: Implicitly differentiable reward learning for preference-based reinforcement learning","volume":"35","author":"Liu","year":"2022","journal-title":"NeurIPS"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TCSS.2024.3523322"},{"article-title":"Pebble: Feedback-efficient interactive reinforcement learning via relabeling experience and unsupervised pre-training","volume-title":"Proceedings of International Conference on Machine Learning","author":"Lee","key":"ref20"},{"key":"ref21","article-title":"Reward uncertainty for exploration in preference-based reinforcement learning","author":"Liang","year":"2021","journal-title":"ICLR"},{"key":"ref22","article-title":"SURF: Semi-supervised reward learning with data augmentation for feedback-efficient preference-based reinforcement learning","author":"Park","year":"2022","journal-title":"ICLR"},{"key":"ref23","first-page":"2014","article-title":"Few-shot preference learning for human-in-the-loop rl","volume-title":"Conference on Robot Learning","author":"Hejna III"},{"article-title":"B-pref: Benchmarking preference-based reinforcement learning","volume-title":"Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track","author":"Lee","key":"ref24"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.52202\/079017-3406"},{"article-title":"Deepmind control suite","year":"2018","author":"Tassa","key":"ref26"},{"key":"ref27","first-page":"1094","article-title":"Meta-world: A benchmark and evaluation for multi-task and meta reinforcement learning","volume-title":"Conference on Robot Learning","author":"Yu"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-022-06295-5"},{"key":"ref29","article-title":"Flow to better: Offline preference-based reinforcement learning via preferred trajectory generation","author":"Zhang","year":"2023","journal-title":"ICLR"},{"key":"ref30","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proceedings of International Conference on Machine Learning","author":"Haarnoja"},{"article-title":"Rime: Robust preference-based reinforcement learning with noisy preferences","year":"2024","author":"Cheng","key":"ref31"},{"article-title":"Iterative preference learning from human feedback: Bridging theory and practice for rlhf under kl-constraint","volume-title":"Proceedings of International Conference on Machine Learning","author":"Xiong","key":"ref32"},{"key":"ref33","article-title":"Hindsight priors for reward learning from human preferences","author":"Verma","year":"2024","journal-title":"ICLR"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i9.28886"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/icra55743.2025.11127694"},{"key":"ref36","article-title":"Preference transformer: Modeling human preferences using transformers for rl","author":"Kim","year":"2023","journal-title":"ICLR"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i15.29666"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.2307\/2334029"}],"event":{"name":"2025 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2025,6,30]]},"location":"Rome, Italy","end":{"date-parts":[[2025,7,5]]}},"container-title":["2025 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11227166\/11227148\/11228808.pdf?arnumber=11228808","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T07:28:36Z","timestamp":1763191716000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11228808\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":38,"URL":"https:\/\/doi.org\/10.1109\/ijcnn64981.2025.11228808","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]}}}