{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,9]],"date-time":"2026-04-09T04:40:51Z","timestamp":1775709651909,"version":"3.50.1"},"reference-count":33,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"8","license":[{"start":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T00:00:00Z","timestamp":1722470400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T00:00:00Z","timestamp":1722470400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T00:00:00Z","timestamp":1722470400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62273280"],"award-info":[{"award-number":["62273280"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62373305"],"award-info":[{"award-number":["62373305"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Aerospace Flight Dynamics Technology Key Laboratory Foundation of China","award":["KJW6142210210303"],"award-info":[{"award-number":["KJW6142210210303"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2024,8]]},"DOI":"10.1109\/lra.2024.3418275","type":"journal-article","created":{"date-parts":[[2024,6,24]],"date-time":"2024-06-24T20:10:31Z","timestamp":1719259831000},"page":"7150-7157","source":"Crossref","is-referenced-by-count":3,"title":["Aligning Human Intent From Imperfect Demonstrations With Confidence-Based Inverse Soft-Q Learning"],"prefix":"10.1109","volume":"9","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-8132-4599","authenticated-orcid":false,"given":"Xizhou","family":"Bu","sequence":"first","affiliation":[{"name":"School of Astronautics, Northwestern Polytechnical University, Xi&#x0027;an, China"}]},{"given":"Wenjuan","family":"Li","sequence":"additional","affiliation":[{"name":"School of Astronautics, Northwestern Polytechnical University, Xi&#x0027;an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9427-4066","authenticated-orcid":false,"given":"Zhengxiong","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Astronautics, Northwestern Polytechnical University, Xi&#x0027;an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6791-1162","authenticated-orcid":false,"given":"Zhiqiang","family":"Ma","sequence":"additional","affiliation":[{"name":"School of Astronautics, Northwestern Polytechnical University, Xi&#x0027;an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5132-9602","authenticated-orcid":false,"given":"Panfeng","family":"Huang","sequence":"additional","affiliation":[{"name":"School of Astronautics, Northwestern Polytechnical University, Xi&#x0027;an, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487517"},{"key":"ref2","first-page":"1678","article-title":"What matters in learning from offline human demonstrations for robot manipulation","volume-title":"Proc. 5th Conf. Robot Learn.","volume":"164","author":"Mandlekar","year":"2022"},{"key":"ref3","first-page":"1732","article-title":"Imitation learning by estimating expertise of demonstrators","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Beliaev","year":"2022"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8968114"},{"key":"ref5","first-page":"9407","article-title":"VILD: Variational imitation learning with diverse-quality demonstrations","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","volume":"119","author":"Tangkaratt","year":"2020"},{"key":"ref6","first-page":"783","article-title":"Extrapolating beyond suboptimal demonstrations via inverse reinforcement learning from observations","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Brown","year":"2019"},{"key":"ref7","first-page":"330","article-title":"Better-than-demonstrator imitation learning via automatically-ranked demonstrations","volume-title":"Proc. Conf. Robot Learn.","author":"Brown","year":"2020"},{"key":"ref8","first-page":"1262","article-title":"Learning from suboptimal demonstration via self-supervised reward regression","volume-title":"Proc. Conf. Robot Learn.","author":"Chen","year":"2021"},{"key":"ref9","first-page":"10961","article-title":"Learning to weight imperfect demonstrations","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wang","year":"2021"},{"key":"ref10","first-page":"6818","article-title":"Imitation learning from imperfect demonstration","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wu","year":"2019"},{"key":"ref11","first-page":"12340","article-title":"Confidence-aware imitation learning from demonstrations with varying optimality","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Zhang","year":"2021"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3068912"},{"key":"ref13","first-page":"3581","article-title":"Semi-supervised learning with deep generative models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"27","author":"Kingma","year":"2014"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1093\/oso\/9780198538677.003.0006"},{"key":"ref15","first-page":"661","article-title":"Efficient reductions for imitation learning","volume-title":"Proc. 13th Int. Conf. Artif. Intell. Statist. JMLR Workshop Conf. Proc.","author":"Ross","year":"2010"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref17","first-page":"15737","article-title":"Error bounds of imitating policies and environments","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Xu","year":"2020"},{"key":"ref18","first-page":"4565","article-title":"Generative adversarial imitation learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Ho","year":"2016"},{"key":"ref19","first-page":"2817","article-title":"Robust adversarial reinforcement learning","volume-title":"Proc. 34th Int. Conf. Mach. Learn.","volume":"70","author":"Pinto","year":"2017"},{"key":"ref20","first-page":"390","article-title":"End-to-end differentiable adversarial imitation learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Baram","year":"2017"},{"key":"ref21","first-page":"4028","article-title":"IQ-learn: Inverse soft-Q learning for imitation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Garg","year":"2021"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/s11023-020-09539-2"},{"key":"ref23","first-page":"10835","article-title":"Scaling laws for reward model overoptimization","volume-title":"Proc. 40th Int. Conf. Mach. Learn.","volume":"202","author":"Gao","year":"2023"},{"key":"ref24","first-page":"12004","article-title":"Goal misgeneralization in deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Langosco","year":"2022"},{"key":"ref25","article-title":"AI alignment: A comprehensive survey","author":"Ji","year":"2023"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/591"},{"key":"ref27","article-title":"Open problems in cooperative AI","author":"Dafoe","year":"2020"},{"key":"ref28","article-title":"Frontier AI regulation: Managing emerging risks to public safety","author":"Anderljung","year":"2023"},{"key":"ref29","article-title":"Model evaluation for extreme risks","author":"Shevlane","year":"2023"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1177\/02783649221078031"},{"key":"ref31","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"Puterman","year":"2014"},{"key":"ref32","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja","year":"2018"},{"key":"ref33","article-title":"Inverse reinforcement learning with multiple ranked experts","author":"Castro","year":"2019"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7083369\/10561888\/10569029.pdf?arnumber=10569029","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,23]],"date-time":"2024-12-23T19:34:30Z","timestamp":1734982470000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10569029\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8]]},"references-count":33,"journal-issue":{"issue":"8"},"URL":"https:\/\/doi.org\/10.1109\/lra.2024.3418275","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"value":"2377-3766","type":"electronic"},{"value":"2377-3774","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,8]]}}}