{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T20:02:53Z","timestamp":1780084973219,"version":"3.54.0"},"reference-count":62,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF","doi-asserted-by":"publisher","award":["IIS-2339769,CCF-2344955"],"award-info":[{"award-number":["IIS-2339769,CCF-2344955"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,5,19]]},"DOI":"10.1109\/icra55743.2025.11128012","type":"proceedings-article","created":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T17:28:56Z","timestamp":1756834136000},"page":"7407-7414","source":"Crossref","is-referenced-by-count":1,"title":["Data-Efficient Learning from Human Interventions for Mobile Robots"],"prefix":"10.1109","author":[{"given":"Zhenghao","family":"Peng","sequence":"first","affiliation":[{"name":"University of California,Department of Computer Science,Los Angeles"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhizheng","family":"Liu","sequence":"additional","affiliation":[{"name":"University of California,Department of Computer Science,Los Angeles"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Bolei","family":"Zhou","sequence":"additional","affiliation":[{"name":"University of California,Department of Computer Science,Los Angeles"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.3390\/su15032774"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/s43154-022-00095-4"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1177\/1938965520923961"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9196602"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3068639"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161302"},{"key":"ref7","article-title":"Rt-2: Vision-language-action models transfer web knowledge to robotic control","author":"Brohan","year":"2023","journal-title":"arXiv preprint"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610665"},{"key":"ref9","article-title":"Scalable agent alignment via reward modeling: a research direction","author":"Leike","year":"2018","journal-title":"arXiv preprint"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-86144-5_3"},{"key":"ref11","article-title":"The effects of reward misspecification: Mapping and mitigating misaligned models","volume-title":"International Conference on Learning Representations","author":"Pan","year":"2022"},{"key":"ref12","first-page":"4565","article-title":"Generative adversarial imitation learning","volume-title":"Advances in Neural Information Processing Systems 29: Annual Conference on Neural Information Processing Systems 2016, December 5\u201310, 2016","author":"Ho","year":"2016"},{"key":"ref13","article-title":"Learning robust rewards with adverserial inverse reinforcement learning","volume-title":"International Conference on Learning Representations","author":"Fu","year":"2018"},{"key":"ref14","first-page":"661","article-title":"Efficient reductions for imitation learning","volume-title":"Proceedings of the thirteenth international conference on artificial intelligence and statistics. JMLR Workshop and Conference Proceedings","author":"Ross","year":"2010"},{"issue":"136","key":"ref15","first-page":"1","article-title":"A survey of preference-based reinforcement learning methods","volume":"18","author":"Wirth","year":"2017","journal-title":"Journal of Machine Learning Research"},{"key":"ref16","first-page":"4299","article-title":"Deep reinforcement learning from human preferences","volume-title":"Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, December 4\u20139, 2017","author":"Christiano"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2018.xiv.005"},{"key":"ref18","first-page":"1545","article-title":"Deep TAMER: interactive agent shaping in high-dimensional state spaces","volume-title":"Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence, (AAAI-18), the 30th innovative Applications of Artificial Intelligence (IAAI-18), and the 8th AAAI Symposium on Educational Advances in Artificial Intelligence (EAAI-18), New Orleans, Louisiana, USA, February 2\u20137, 2018","author":"Warnell","year":"2018"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2019.XV.023"},{"key":"ref20","article-title":"Widening the pipeline in human-guided reinforcement learning with explanation and context-aware data augmentation","volume":"34","author":"Guan","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref21","article-title":"Training language models to follow instructions with human feedback","author":"Ouyang","year":"2022","journal-title":"arXiv preprint"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793698"},{"key":"ref23","article-title":"Learning from interventions","author":"Spencer","year":"2020","journal-title":"Robotics: Science and Systems (RSS)"},{"key":"ref24","article-title":"Human-in-the-loop imitation learning using remote teleoperation","volume":"abs\/2012.06733","author":"Mandlekar","year":"2020","journal-title":"ArXiv preprint"},{"key":"ref25","article-title":"Efficient learning of safe driving policy via human-ai copilot optimization","volume-title":"International Conference on Learning Representations","author":"Li","year":"2022"},{"key":"ref26","article-title":"Learning from active human involvement through proxy value propagation","volume-title":"Thirtyseventh Conference on Neural Information Processing Systems","author":"Peng","year":"2023"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/j.commtr.2024.100127"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10342038"},{"key":"ref29","article-title":"Sft memorizes, rl generalizes: A comparative study of foundation model post-training","author":"Chu","year":"2025","journal-title":"arXiv preprint"},{"key":"ref30","first-page":"20 132","article-title":"A minimalist approach to offline reinforcement learning","volume":"34","author":"Fujimoto","year":"2021","journal-title":"Advances in neural information processing systems"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10857"},{"key":"ref32","article-title":"Agent-agnostic human-in-the-loop reinforcement learning","volume":"abs\/1701.04079","author":"Abel","year":"2017","journal-title":"ArXiv preprint"},{"key":"ref33","first-page":"2067","article-title":"Trial without error: Towards safe reinforcement learning via human intervention","volume-title":"Proceedings of the 17th International Conference on Autonomous Agents and MultiAgent Systems.","author":"Saunders","year":"2018"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445563"},{"key":"ref35","first-page":"332","article-title":"Look before you leap: Safe model-based reinforcement learning with human intervention","author":"Xu","year":"2022","journal-title":"Conference on Robot Learning. PMLR"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561311"},{"key":"ref37","first-page":"2285","article-title":"Interactive learning from policy-dependent human feedback","author":"MacGlashan","year":"2017","journal-title":"International conference on machine learning. PMLR"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8968287"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636748"},{"key":"ref40","article-title":"Rlif: Interactive imitation learning as reinforcement learning","author":"Luo","year":"2023","journal-title":"arXiv preprint"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2577031"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2015.2463671"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989381"},{"key":"ref44","article-title":"End to end learning for self-driving cars","volume":"abs\/1604.07316","author":"Bojarski","year":"2016","journal-title":"ArXiv preprint"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989182"},{"key":"ref46","article-title":"Driving policy transfer via modularity and abstraction","author":"M\u00fcller","year":"2018","journal-title":"arXiv preprint"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.adf6991"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2018.2869644"},{"key":"ref49","article-title":"Learning from active human involvement through proxy value propagation","volume":"36","author":"Peng","year":"2024","journal-title":"Advances in neural information processing systems"},{"key":"ref50","article-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems","volume":"abs\/2005.01643","author":"Levine","year":"2020","journal-title":"ArXiv preprint"},{"key":"ref51","article-title":"How to leverage unlabeled data in offline reinforcement learning","author":"Yu","year":"2022","journal-title":"arXiv preprint"},{"key":"ref52","first-page":"1582","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"Proceedings of the 35th International Conference on Machine Learning, ICML 2018, Stockholmsm\u00e4ssan, Stockholm, Sweden, July 10\u201315, 2018, ser. Proceedings of Machine Learning Research","volume":"80","author":"Fujimoto","year":"2018"},{"issue":"268","key":"ref53","first-page":"1","article-title":"Stable-baselines3: Reliable reinforcement learning implementations","volume":"22","author":"Raffin","year":"2021","journal-title":"Journal of Machine Learning Research"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3190471"},{"key":"ref55","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv preprint"},{"key":"ref56","article-title":"Safe driving via expert guided policy optimization","volume-title":"5th Annual Conference on Robot Learning","author":"Peng","year":"2021"},{"key":"ref57","article-title":"Guarded policy optimization with imperfect online demonstrations","volume-title":"The Eleventh International Conference on Learning Representations","author":"Xue","year":"2023"},{"key":"ref58","first-page":"627","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","volume-title":"Proceedings of the fourteenth international conference on artificial intelligence and statistics. JMLR Workshop and Conference Proceedings","author":"Ross","year":"2011"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161302"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00628"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161227"}],"event":{"name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","location":"Atlanta, GA, USA","start":{"date-parts":[[2025,5,19]]},"end":{"date-parts":[[2025,5,23]]}},"container-title":["2025 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11127273\/11127223\/11128012.pdf?arnumber=11128012","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T19:52:05Z","timestamp":1780084325000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11128012\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,19]]},"references-count":62,"URL":"https:\/\/doi.org\/10.1109\/icra55743.2025.11128012","relation":{},"subject":[],"published":{"date-parts":[[2025,5,19]]}}}