{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T03:05:40Z","timestamp":1763348740997,"version":"3.44.0"},"reference-count":55,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,5,19]]},"DOI":"10.1109\/icra55743.2025.11127633","type":"proceedings-article","created":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T17:28:56Z","timestamp":1756834136000},"page":"4789-4796","source":"Crossref","is-referenced-by-count":1,"title":["Flora: Sample-Efficient Preference-Based Rl Via Low-Rank Style Adaptation of Reward Functions"],"prefix":"10.1109","author":[{"given":"Daniel","family":"Marta","sequence":"first","affiliation":[{"name":"KTH Royal Institute of Technology,Sweden"}]},{"given":"Simon","family":"Holk","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology,Sweden"}]},{"given":"Miguel","family":"Vasco","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology,Sweden"}]},{"given":"Jens","family":"Lundell","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology,Sweden"}]},{"given":"Timon","family":"Homberger","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology,Sweden"}]},{"given":"Finn","family":"Busch","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology,Sweden"}]},{"given":"Olov","family":"Andersson","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology,Sweden"}]},{"given":"Danica","family":"Kragic","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology,Sweden"}]},{"given":"Iolanda","family":"Leite","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology,Sweden"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Open xembodiment: Robotic learning datasets and rt-x models","author":"Padalkar","year":"2023","journal-title":"arXiv preprint"},{"key":"ref2","first-page":"270","article-title":"Gpu-accelerated robotic simulation for distributed reinforcement learning","volume-title":"Conference on Robot Learning. PMLR","author":"Liang","year":"2018"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3270034"},{"issue":"136","key":"ref4","first-page":"1","article-title":"A survey of preference-based reinforcement learning methods","volume":"18","author":"Wirth","year":"2017","journal-title":"Journal of Machine Learning Research"},{"key":"ref5","article-title":"Deep reinforcement learning from human preferences","volume":"30","author":"Christiano","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref6","article-title":"Re ward learning from human preferences and demonstrations in atari","volume":"31","author":"Ibarz","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref7","article-title":"Fine-tuning language models from human preferences","author":"Ziegler","year":"2019","journal-title":"arXiv preprint"},{"key":"ref8","article-title":"Measuring the intrinsic dimension of objective landscapes","author":"Li","year":"2018","journal-title":"arXiv preprint"},{"key":"ref9","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018","journal-title":"arXiv preprint"},{"key":"ref10","article-title":"Lora: Lo-rank adaptation of large language models","author":"Hu","year":"2021","journal-title":"arXiv preprint"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73232-4_24"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr.2019.00453"},{"key":"ref13","article-title":"Multi-lora composition for image generation","author":"Zhong","year":"2024","journal-title":"arXiv preprint"},{"key":"ref14","article-title":"Pebble: Feedback-efficient interactive reinforcement learning via relabeling experience and unsupervised pretraining","author":"Lee","year":"2021","journal-title":"arXiv preprint"},{"key":"ref15","first-page":"2014","article-title":"Fe-shot preference learning for human-in-the-loop rl","volume-title":"Conference on Robot Learning. PMLR","author":"Hejna","year":"2023"},{"key":"ref16","article-title":"Plan-seq-learn: Language model guided rl for solving long horizon robotics tasks","author":"Dalal","year":"2023","journal-title":"To ards Generalist Robots: Learning Paradigms for Scalable Skill Acquisition@ CoRL2023"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-023-10087-8"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.568"},{"key":"ref19","article-title":"Fe-shot preference learning for human-in-the-loop rl","author":"Hejna","year":"2022","journal-title":"arXiv preprint"},{"key":"ref20","first-page":"3597","article-title":"Learning human contribution preferences in collaborative human-robot tasks","volume-title":"Conference on Robot Learning. PMLR","author":"Zhao","year":"2023"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3610977.3634970"},{"key":"ref22","article-title":"Re ward uncertainty for exploration in preference-based reinforcement learning","volume-title":"International Conference on Learning Representations","author":"Liang","year":"2022"},{"key":"ref23","article-title":"SURF: Semi-supervised re ward learning with data augmentation for feedback-efficient preference-based reinforcement learning","volume-title":"International Conference on Learning Representations","author":"Park","year":"2022"},{"key":"ref24","article-title":"Explaining preferencesith shapley values","volume-title":"Advances in Neural Information Processing Systems","author":"Hu","year":"2022"},{"key":"ref25","article-title":"Repeated inverse reinforcement learning","volume":"30","author":"Amin","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref26","article-title":"Preference transformer: Modeling human preferences using transformers for RL","volume-title":"The Eleventh International Conference on Learning Representations","author":"Kim","year":"2023"},{"key":"ref27","first-page":"22270","article-title":"Meta-re ard-net: Implicitly differentiable reard learning for preference-based reinforcement learning","volume":"35","author":"Liu","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref28","article-title":"Sequential preference ranking for efficient reinforcement learning from human feedback","volume":"36","author":"Hwang","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref29","article-title":"A study of causal confusion in preference-based re ward learning","author":"Tien","year":"2022","journal-title":"arXiv preprint"},{"key":"ref30","article-title":"Concrete problems in ai safety","author":"Amodei","year":"2016","journal-title":"arXiv preprint"},{"key":"ref31","article-title":"Inverse re ward design","volume":"30","author":"Hadfield-Menell","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref32","article-title":"Causal confusion in imitation learning","volume":"32","author":"De Haan","year":"2019","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref33","first-page":"32033","article-title":"Invariance in policy optimisation and partial identifiability in re ward learning","volume-title":"International Conference on Machine Learning. PMLR","author":"Skalse","year":"2023"},{"key":"ref34","article-title":"On the fragility of learned re ard functions","author":"McKinney","year":"2023","journal-title":"arXiv preprint"},{"key":"ref35","article-title":"A dissection of overfitting and generalization in continuous reinforcement learning","author":"Zhang","year":"2018","journal-title":"arXiv preprint"},{"key":"ref36","first-page":"1282","article-title":"Quantifying generalization in reinforcement learning","volume-title":"International Conference on Machine Learning. PMLR","author":"Cobbe","year":"2019"},{"key":"ref37","article-title":"Observational overfitting in reinforcement learning","author":"Song","year":"2019","journal-title":"arXiv preprint"},{"key":"ref38","first-page":"928","article-title":"Deft: Dexterous fine-tuning for hand policies","volume-title":"Conference on Robot Learning. PMLR","author":"Kannan","year":"2023"},{"key":"ref39","article-title":"Uncertainty estimation for language re ward models","author":"Gleave","year":"2022","journal-title":"arXiv preprint"},{"key":"ref40","article-title":"Improving alignment of dialogue agents via targeted human judgements","author":"Glaese","year":"2022","journal-title":"arXiv preprint"},{"key":"ref41","article-title":"Aligning language modelsith preferences through f-divergence minimization","author":"Go","year":"2023","journal-title":"arXiv preprint"},{"key":"ref42","first-page":"10835","article-title":"Scaling la s for reard model overoptimization","volume-title":"International Conference on Machine Learning. PMLR","author":"Gao","year":"2023"},{"key":"ref43","article-title":"Open problems and fundamental limitations of reinforcement learning from human feedback","author":"Casper","year":"2023","journal-title":"arXiv preprint"},{"key":"ref44","first-page":"1094","article-title":"Meta-orld: A benchmark and evaluation for multi-task and meta reinforcement learning","volume-title":"Conference on robot learning. PMLR","author":"Yu","year":"2020"},{"key":"ref45","article-title":"A bayesian approach for policy learning from trajectory preference queries","volume":"25","author":"Wilson","year":"2012","journal-title":"Advances in neural information processing systems"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.2307\/2334029"},{"key":"ref47","article-title":"Adam: A method for stochastic optimization","author":"Kingma","year":"2014","journal-title":"arXiv preprint"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1016\/j.simpa.2020.100022"},{"key":"ref49","first-page":"627","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","volume-title":"Proceedings of the fourteenth international conference on artificial intelligence and statistics. JMLR workshop and Conference Proceedings","author":"Ross","year":"2011"},{"key":"ref50","first-page":"1861","article-title":"Soft actorov-critic: Offpolicy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"International conference on machine learning. PMLR","author":"Haarnoja","year":"2018"},{"key":"ref51","article-title":"On the opportunities and risks of foundation models","author":"Bommasani","year":"2021","journal-title":"arXiv preprint"},{"key":"ref52","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref53","first-page":"4904","article-title":"Scaling up visual and vision-language representation learning with noisy text supervision","volume-title":"International conference on machine learning. PMLR","author":"Jia","year":"2021"},{"journal-title":"Autort: Embodied foundation models for large scale orchestration of robotic agents","year":"2024","author":"Ahn","key":"ref54"},{"key":"ref55","article-title":"Octo: An open-source generalist robot policy","author":"Team","year":"2024","journal-title":"arXiv preprint"}],"event":{"name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2025,5,19]]},"location":"Atlanta, GA, USA","end":{"date-parts":[[2025,5,23]]}},"container-title":["2025 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11127273\/11127223\/11127633.pdf?arnumber=11127633","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,3]],"date-time":"2025-09-03T06:11:43Z","timestamp":1756879903000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11127633\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,19]]},"references-count":55,"URL":"https:\/\/doi.org\/10.1109\/icra55743.2025.11127633","relation":{},"subject":[],"published":{"date-parts":[[2025,5,19]]}}}