{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,28]],"date-time":"2026-02-28T18:09:57Z","timestamp":1772302197493,"version":"3.50.1"},"reference-count":41,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF","doi-asserted-by":"publisher","award":["2218760,2132847"],"award-info":[{"award-number":["2218760,2132847"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,5,19]]},"DOI":"10.1109\/icra55743.2025.11128740","type":"proceedings-article","created":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T17:28:56Z","timestamp":1756834136000},"page":"11546-11553","source":"Crossref","is-referenced-by-count":1,"title":["Efficiently Generating Expressive Quadruped Behaviors via Language-Guided Preference Learning"],"prefix":"10.1109","author":[{"given":"Jaden","family":"Clark","sequence":"first","affiliation":[]},{"given":"Joey","family":"Hejna","sequence":"additional","affiliation":[]},{"given":"Dorsa","family":"Sadigh","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref1","first-page":"22","article-title":"Walk these ways: Tuning robot control for generalization with multiplicity of behavior","volume-title":"Conference on Robot Learning","author":"Margolis","year":"2023"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2021.XVII.011"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.abc5986"},{"key":"ref4","article-title":"Inverse reward design","volume":"30","author":"Hadfield-Menell","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref5","article-title":"Reward design with language models","author":"Kwon","year":"2023","journal-title":"arXiv preprint"},{"key":"ref6","article-title":"Language to rewards for robotic skill synthesis","author":"Yu","year":"2023","journal-title":"arXiv preprint"},{"key":"ref7","article-title":"Saytap: Language to quadrupedal locomotion","author":"Tang","year":"2023","journal-title":"arXiv preprint"},{"key":"ref8","article-title":"Eureka: Human-level reward design via coding large language models","author":"Ma","year":"2023","journal-title":"arXiv preprint"},{"key":"ref9","first-page":"2285","article-title":"Interactive learning from policy-dependent human feedback","volume-title":"International conference on machine learning","author":"MacGlashan","year":"2017"},{"key":"ref10","article-title":"Deep reinforcement learning from human preferences","volume":"30","author":"Christiano","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref11","article-title":"Pebble: Feedback-efficient interactive reinforcement learning via relabeling experience and unsupervised pre-training","author":"Lee","year":"2021","journal-title":"arXiv preprint"},{"key":"ref12","first-page":"2014","article-title":"Few-shot preference learning for human-in-the-loop rl","volume-title":"Conference on Robot Learning","author":"Hejna","year":"2023"},{"key":"ref13","article-title":"Barkour: Bench-marking animal-level agility with quadruped robots","author":"Caluwaerts","year":"2023","journal-title":"arXiv preprint"},{"key":"ref14","article-title":"Minimizing energy consumption leads to the emergence of gaits in legged robots","author":"Fu","year":"2021","journal-title":"arXiv preprint"},{"key":"ref15","first-page":"773","article-title":"Fast and efficient locomotion via learned gait transitions","volume-title":"Conference on robot learning","author":"Yang","year":"2022"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160591"},{"key":"ref17","first-page":"287","article-title":"Do as i can, not as i say: Grounding language in robotic affordances","volume-title":"Conference on robot learning","author":"Brohan","year":"2023"},{"key":"ref18","doi-asserted-by":"crossref","first-page":"482","DOI":"10.1145\/3610977.3634999","article-title":"Generative expressive robot behaviors using large language models","volume-title":"Proceedings of the 2024 ACM\/IEEE International Conference on Human-Robot Interaction","author":"Mahadevan","year":"2024"},{"key":"ref19","article-title":"Text2interaction: Establishing safe and preferable human-robot interaction","author":"Thumm","year":"2024","journal-title":"arXiv preprint"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2024.XX.125"},{"key":"ref21","first-page":"13584","article-title":"Language instructed reinforcement learning for human-ai coordination","volume-title":"International Conference on Machine Learning","author":"Hu","year":"2023"},{"key":"ref22","article-title":"Open problems and fundamental limitations of reinforcement learning from human feedback","author":"Casper","year":"2023","journal-title":"arXiv preprint"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9560829"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/1597735.1597738"},{"key":"ref25","first-page":"342","article-title":"Learning multimodal rewards from rankings","volume-title":"Conference on robot learning","author":"Myers","year":"2022"},{"key":"ref26","first-page":"783","article-title":"Extrapolating beyond suboptimal demonstrations via inverse reinforcement learning from observations","volume-title":"International conference on machine learning","author":"Brown","year":"2019"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2017.XIII.053"},{"key":"ref28","article-title":"Fine-tuning language models from human preferences. arxiv 2019","author":"Ziegler","year":"1909","journal-title":"arXiv preprint"},{"key":"ref29","doi-asserted-by":"crossref","first-page":"572","DOI":"10.1145\/3610977.3634930","article-title":"Preference-conditioned language-guided abstraction","volume-title":"Proceedings of the 2024 ACM\/IEEE International Conference on Human-Robot Interaction","author":"Peng","year":"2024"},{"key":"ref30","article-title":"Maple: A framework for active preference learning guided by large language models","author":"Mahmud","year":"2024","journal-title":"arXiv preprint"},{"key":"ref31","first-page":"519","article-title":"Batch active preference-based learning of reward functions","volume-title":"Conference on robot learning","author":"Biyik","year":"2018"},{"key":"ref32","article-title":"Few-shot in-context preference learning using large language models","author":"Yu","year":"2024","journal-title":"arXiv preprint"},{"key":"ref33","article-title":"B-pref: Bench-marking preference-based reinforcement learning","author":"Lee","year":"2021","journal-title":"arXiv preprint"},{"key":"ref34","article-title":"A bayesian approach for policy learning from trajectory preference queries","volume":"25","author":"Wilson","year":"2012","journal-title":"Advances in neural information processing systems"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.2307\/2334029"},{"key":"ref36","first-page":"1165","article-title":"Safe imitation learning via fast bayesian reward inference from preferences","volume-title":"International Conference on Machine Learning","author":"Brown","year":"2020"},{"key":"ref37","article-title":"Surf: Semi-supervised reward learning with data augmentation for feedback-efficient preference-based reinforcement learning","author":"Park","year":"2022","journal-title":"arXiv preprint"},{"key":"ref38","article-title":"Isaac gym: High performance gpu-based physics simulation for robot learning","author":"Makoviychuk","year":"2021","journal-title":"arXiv preprint"},{"key":"ref39","article-title":"Prox-imal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv preprint"},{"key":"ref40","article-title":"Gpt-4 technical report","author":"Achiam","year":"2023","journal-title":"arXiv preprint"},{"key":"ref41","first-page":"24824","article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume":"35","author":"Wei","year":"2022","journal-title":"Advances in neural information processing systems"}],"event":{"name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","location":"Atlanta, GA, USA","start":{"date-parts":[[2025,5,19]]},"end":{"date-parts":[[2025,5,23]]}},"container-title":["2025 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11127273\/11127223\/11128740.pdf?arnumber=11128740","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,3]],"date-time":"2025-09-03T06:11:58Z","timestamp":1756879918000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11128740\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,19]]},"references-count":41,"URL":"https:\/\/doi.org\/10.1109\/icra55743.2025.11128740","relation":{},"subject":[],"published":{"date-parts":[[2025,5,19]]}}}