{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T19:39:10Z","timestamp":1769542750472,"version":"3.49.0"},"reference-count":64,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"ABRC","award":["RFGA2024-022-013"],"award-info":[{"award-number":["RFGA2024-022-013"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Robot."],"published-print":{"date-parts":[[2026]]},"DOI":"10.1109\/tro.2026.3651678","type":"journal-article","created":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T22:05:09Z","timestamp":1768255509000},"page":"750-769","source":"Crossref","is-referenced-by-count":0,"title":["Safe MPC Alignment With Human Directional Feedback"],"prefix":"10.1109","volume":"42","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-8436-2665","authenticated-orcid":false,"given":"Zhixian","family":"Xie","sequence":"first","affiliation":[{"name":"Intelligent Robotics and Interactive Systems (IRIS) Lab, School for Engineering of Matter, Transport and Energy, Arizona State University, Tempe, AZ, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4046-2213","authenticated-orcid":false,"given":"Wenlong","family":"Zhang","sequence":"additional","affiliation":[{"name":"Polytechnic School, Arizona State University, Tempe, AZ, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2306-5993","authenticated-orcid":false,"given":"Yi","family":"Ren","sequence":"additional","affiliation":[{"name":"School for Engineering of Matter, Transport and Energy, Arizona State University, Tempe, AZ, USA"}]},{"given":"Zhaoran","family":"Wang","sequence":"additional","affiliation":[{"name":"Departments of Industrial Engineering and Management Sciences, Northwestern University, Evanston, IL, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9081-0637","authenticated-orcid":false,"given":"George J.","family":"Pappas","sequence":"additional","affiliation":[{"name":"Department of Electrical and Systems Engineering, University of Pennsylvania, Philadelphia, PA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5330-855X","authenticated-orcid":false,"given":"Wanxin","family":"Jin","sequence":"additional","affiliation":[{"name":"Intelligent Robotics and Interactive Systems (IRIS) Lab, School for Engineering of Matter, Transport and Energy, Arizona State University, Tempe, AZ, USA"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/MCS.2023.3291885"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3457538"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-44051-0_14"},{"key":"ref4","first-page":"2386","article-title":"Learning safety constraints from demonstrations with unknown rewards","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Lindner","year":"2024"},{"key":"ref5","article-title":"Maximum likelihood constraint inference for inverse reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Scobee","year":"2020"},{"key":"ref6","article-title":"Deep reinforcement learning from human preferences","volume":"30","author":"Christiano","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2017.XIII.053"},{"key":"ref8","article-title":"Reward learning from human preferences and demonstrations in atari","volume":"31","author":"Ibarz","year":"2018","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref9","first-page":"6152","article-title":"Pebble: Feedback-efficient interactive reinforcement learning via relabeling experience and unsupervised pre-training","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","volume":"139","author":"Lee","year":"2021"},{"key":"ref10","first-page":"2014","article-title":"Few-shot preference learning for human-in-the-loop RL","volume-title":"Proc. Conf. Robot Learn.","author":"Hejna III","year":"2023"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160439"},{"key":"ref12","first-page":"342","article-title":"Learning multimodal rewards from rankings","volume-title":"Proc. 5th Conf. Robot Learn.","author":"Myers","year":"2022"},{"key":"ref13","first-page":"217","article-title":"Learning robot objectives from physical human interaction","volume-title":"Proc. Conf. Robot Learn.","author":"Bajcsy","year":"2017"},{"key":"ref14","first-page":"123","article-title":"Including uncertainty when learning from human corrections","volume-title":"Proc. Conf. Robot Learn.","author":"Losey","year":"2018"},{"key":"ref15","first-page":"141","article-title":"Learning from physical human corrections, one feature at a time","volume-title":"Proc. ACM\/IEEE Int. Conf. Hum.-Robot Interact.","author":"Bajcsy","year":"2018"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793554"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9560829"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1177\/02783649211050958"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3623384"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2022.3190221"},{"key":"ref21","first-page":"663","article-title":"Algorithms for inverse reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Y. Ng","year":"2000"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143936"},{"key":"ref24","first-page":"1433","article-title":"Maximum entropy inverse reinforcement learning","volume-title":"Proc. AAAI Conf. Artif. Intell.","volume":"8","author":"Ziebart","year":"2008"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-009-9170-7"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1177\/0278364917745980"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-100819-063206"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1177\/0278364915581193"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.2307\/2334029"},{"key":"ref30","article-title":"Contrastive prefence learning: Learning from human feedback without RL","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hejna","year":"2024"},{"key":"ref31","article-title":"Shared autonomy for robotic manipulation with language corrections","volume-title":"Proc. ACL Workshop Learn. Natural Lang. Supervision","author":"Karamcheti","year":"2022"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3568162.3578623"},{"key":"ref33","first-page":"32561","article-title":"Generating language corrections for teaching physical control tasks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Srivastava","year":"2023"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610455"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2024.XX.025"},{"key":"ref36","first-page":"1211","article-title":"Learning parametric constraints in high dimensions from demonstrations","volume-title":"Proc. Conf. Robot Learn.","author":"Chou","year":"2020"},{"key":"ref37","article-title":"Learning soft constraints from constrained expert demonstrations","volume-title":"Proc. 11th Int. Conf. Learn. Representations","author":"Gaurav","year":"2023"},{"key":"ref38","first-page":"5808","article-title":"Learning shared safety constraints from multi-task demonstrations","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Kim","year":"2023"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.2974427"},{"key":"ref40","first-page":"1612","article-title":"Uncertainty-aware constraint learning for adaptive safe motion planning from demonstrations","volume-title":"Proc. Conf. Robot Learn.","author":"Chou","year":"2021"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3148436"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9811705"},{"key":"ref43","first-page":"7390","article-title":"Inverse constrained reinforcement learning","volume-title":"Int. Conf. Mach. Learn.","author":"Malik","year":"2021"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CCTA48906.2021.9658862"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-024-06653-5"},{"key":"ref46","first-page":"1005","article-title":"Inferring task goals and constraints using Bayesian nonparametric inverse reinforcement learning","volume-title":"Proc. Conf. robot Learn.","author":"Park","year":"2020"},{"key":"ref47","article-title":"Bayesian methods for constraint inference in reinforcement learning","author":"Papadimitriou","year":"2022","journal-title":"Trans. Mach. Learn. Res."},{"key":"ref48","first-page":"16034","article-title":"Safe pontryagin differentiable programming","volume":"34","author":"Jin","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref49","first-page":"738","article-title":"A dual representation framework for robot learning with human guidance","volume-title":"Proc. Conf. Robot Learn.","author":"Zhang","year":"2023"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/HRI53351.2022.9889650"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2020.2971415"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1145\/1247069.1247123"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/s00454-023-00497-x"},{"key":"ref54","first-page":"45","article-title":"Support vector machine active learning with applications to text classification","volume":"2","author":"Tong","year":"2001","journal-title":"J. Mach. Learn. Res."},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511804441"},{"key":"ref56","article-title":"CVX: Matlab software for disciplined convex programming","year":"2012"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i5.25740"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00025"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1145\/3354139"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.65109\/aywk9567"},{"key":"ref62","article-title":"Adam: A method for stochastic optimization","volume-title":"Proc. 3rd Int. Conf. Learn. Representations","author":"Kingma","year":"2015"},{"key":"ref63","first-page":"226","article-title":"The method of inscribed ellipsoids","volume-title":"Soviet Mathematics-Doklady","volume":"37","author":"Tarasov","year":"1988"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2022.XVIII.012"}],"container-title":["IEEE Transactions on Robotics"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/8860\/11297026\/11342351.pdf?arnumber=11342351","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T06:10:09Z","timestamp":1769494209000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11342351\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"references-count":64,"URL":"https:\/\/doi.org\/10.1109\/tro.2026.3651678","relation":{},"ISSN":["1552-3098","1941-0468"],"issn-type":[{"value":"1552-3098","type":"print"},{"value":"1941-0468","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]}}}