{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T18:13:58Z","timestamp":1772907238010,"version":"3.50.1"},"reference-count":47,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2021,10,19]],"date-time":"2021-10-19T00:00:00Z","timestamp":1634601600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,10,19]],"date-time":"2021-10-19T00:00:00Z","timestamp":1634601600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["R01EB019335"],"award-info":[{"award-number":["R01EB019335"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1544797"],"award-info":[{"award-number":["1544797"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1637748"],"award-info":[{"award-number":["1637748"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Auton Robot"],"published-print":{"date-parts":[[2022,1]]},"DOI":"10.1007\/s10514-021-10006-9","type":"journal-article","created":{"date-parts":[[2021,10,19]],"date-time":"2021-10-19T12:24:24Z","timestamp":1634646264000},"page":"99-113","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":26,"title":["Expert Intervention Learning"],"prefix":"10.1007","volume":"46","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5894-132X","authenticated-orcid":false,"given":"Jonathan","family":"Spencer","sequence":"first","affiliation":[]},{"given":"Sanjiban","family":"Choudhury","sequence":"additional","affiliation":[]},{"given":"Matthew","family":"Barnes","sequence":"additional","affiliation":[]},{"given":"Matthew","family":"Schmittle","sequence":"additional","affiliation":[]},{"given":"Mung","family":"Chiang","sequence":"additional","affiliation":[]},{"given":"Peter","family":"Ramadge","sequence":"additional","affiliation":[]},{"given":"Sidd","family":"Srinivasa","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,10,19]]},"reference":[{"key":"10006_CR1","doi-asserted-by":"crossref","unstructured":"Abbeel, P., & Ng, A.Y. (2004). Apprenticeship learning via inverse reinforcement learning. In: Proceedings of the twenty-first International Conference on Machine learning (ICML)","DOI":"10.1145\/1015330.1015430"},{"key":"10006_CR2","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1142\/S0218195995000064","volume":"5","author":"H Alt","year":"1995","unstructured":"Alt, H., & Godau, M. (1995). Computing the Fr\u00e9chet distance between two polygonal curves. International Journal of Computational Geometry & Applications, 5, 75\u201391.","journal-title":"International Journal of Computational Geometry & Applications"},{"key":"10006_CR3","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1609\/aimag.v35i4.2513","volume":"35","author":"S Amershi","year":"2014","unstructured":"Amershi, S., Cakmak, M., Knox, W. B., & Kulesza, T. (2014). Power to the people: The role of humans in interactive machine learning. AI Magazine, 35, 105\u2013120.","journal-title":"AI Magazine"},{"key":"10006_CR4","doi-asserted-by":"crossref","unstructured":"Argall, B. D., Chernova, S., Veloso, M., & Browning, B. (2009). A survey of robot learning from demonstration. Robotics and Autonomous Systems.","DOI":"10.1016\/j.robot.2008.10.024"},{"key":"10006_CR5","doi-asserted-by":"crossref","unstructured":"Bajcsy, A., Losey, D.P., O\u2019Malley, M.K., & Dragan, A.D. (2018). Learning from physical human corrections, one feature at a time. In: Proceedings of the 2018 ACM\/IEEE International Conference on Human-Robot Interaction (HRI)","DOI":"10.1145\/3171221.3171267"},{"key":"10006_CR6","unstructured":"Bajcsy, A., Losey, D.P., O\u2019Malley, M.K., & Dragan, A.D. (2017). Learning robot objectives from physical human interaction. In: Proceedings of the 1st Annual Conference on Robot Learning (CoRL). PMLR"},{"key":"10006_CR7","doi-asserted-by":"publisher","first-page":"10352","DOI":"10.1609\/aaai.v34i06.6602","volume":"34\u201306","author":"J Bi","year":"2020","unstructured":"Bi, J., Dhiman, V., Xiao, T., & Xu, C. (2020). Learning from interventions using hierarchical policies for safe learning. Proceedings of the AAAI Conference on Artificial Intelligence, 34\u201306, 10352\u201310360.","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"10006_CR8","unstructured":"Bi, J., Xiao, T., Sun, Q., & Xu, C. (2018). Navigation by imitation in a pedestrian-rich environment. arXiv preprint arXiv:1811.00506"},{"key":"10006_CR9","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1007\/s10846-018-0839-z","volume":"95","author":"C Celemin","year":"2019","unstructured":"Celemin, C., & Ruiz-del Solar, J. (2019). An interactive framework for learning continuous actions policies based on corrective feedback. Journal of Intelligent & Robotic Systems, 95, 77\u201397.","journal-title":"Journal of Intelligent & Robotic Systems"},{"key":"10006_CR10","doi-asserted-by":"crossref","unstructured":"Chen, M., Nikolaidis, S., Soh, H., Hsu, D., & Srinivasa, S. (2018). Planning with trust for human-robot collaboration. In: Proceedings of the 2018 ACM\/IEEE International Conference on Human-Robot Interaction (HRI)","DOI":"10.1145\/3171221.3171264"},{"key":"10006_CR11","doi-asserted-by":"crossref","unstructured":"Chernova, S., & Veloso, M. (2009). Interactive policy learning through confidence-based autonomy. Journal of Artificial Intelligence Research, 34, 1\u201325.","DOI":"10.1613\/jair.2584"},{"key":"10006_CR12","doi-asserted-by":"crossref","unstructured":"Choudhury, S., Dugar, V., Maeta, S., MacAllister, B., Arora, S., Althoff, D., & Scherer, S. (2019). High performance and safe flight of full-scale helicopters from takeoff to landing with an ensemble of planners. Journal of Field Robotics (JFR), 36(8), 1275\u20131332.","DOI":"10.1002\/rob.21906"},{"key":"10006_CR13","doi-asserted-by":"crossref","unstructured":"Daum\u00e9 III, H., Langford, J., & Marcu, D. (2009). Search-based structured prediction. Machine Learning Journal (MLJ), 75(3), 297\u2013325.","DOI":"10.1007\/s10994-009-5106-x"},{"key":"10006_CR14","unstructured":"Dhariwal, P., Hesse, C., Klimov, O., Nichol, A., Plappert, M., Radford, A., Schulman, J., Sidor, S., Wu, Y., & Zhokhov, P. (2017). Openai baselines. https:\/\/github.com\/openai\/baselines"},{"key":"10006_CR15","doi-asserted-by":"crossref","unstructured":"Fisac, J.F., Gates, M.A., Hamrick, J.B., Liu, C., Hadfield-Menell, D., Palaniappan, M., Malik, D., Sastry, S.S., Griffiths, T.L., & Dragan, A.D. (2019). Pragmatic-pedagogic value alignment. Robotics Research p. 49-57.","DOI":"10.1007\/978-3-030-28619-4_7"},{"key":"10006_CR16","doi-asserted-by":"publisher","first-page":"2462","DOI":"10.1609\/aaai.v33i01.33012462","volume":"33","author":"VG Goecks","year":"2019","unstructured":"Goecks, V. G., Gremillion, G. M., Lawhern, V. J., Valasek, J., & Waytowich, N. R. (2019). Efficiently combining human demonstrations and interventions for safe training of autonomous systems in real-time. Proceedings of the AAAI Conference on Artificial Intelligence, 33, 2462\u20132470.","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"10006_CR17","doi-asserted-by":"crossref","unstructured":"Grollman, D.H., & Jenkins, O.C. (2007). Dogged learning for robots. In: Proceedings 2007 IEEE International Conference on Robotics and Automation (ICRA).","DOI":"10.1109\/ROBOT.2007.363692"},{"key":"10006_CR18","doi-asserted-by":"crossref","unstructured":"Gupta, S., Davidson, J., Levine, S., Sukthankar, R., & Malik, J. (2017). Cognitive mapping and planning for visual navigation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR.2017.769"},{"key":"10006_CR19","unstructured":"Hadfield-Menell, D., Russell, S.J., Abbeel, P., & Dragan, A. (2016). Cooperative inverse reinforcement learning. In: Advances in Neural Information Processing Systems (NeurIPS)."},{"key":"10006_CR20","unstructured":"Jain, A., Wojcik, B., Joachims, T., & Saxena, A. (2013). Learning trajectory preferences for manipulators via iterative improvement. In: Advances in Neural Information Processing Systems (NeurIPS)."},{"key":"10006_CR21","unstructured":"Judah, K., Fern, A.P., & Dietterich, T.G. (2012). Active imitation learning via reduction to iid active learning. In: 2012 AAAI Fall Symposium Series."},{"key":"10006_CR22","doi-asserted-by":"crossref","unstructured":"Kelly, M., Sidrane, C., Driggs-Campbell, K., & Kochenderfer, M.J. (2019). Hg-dagger: Interactive imitation learning with human experts. In: 2019 International Conference on Robotics and Automation (ICRA).","DOI":"10.1109\/ICRA.2019.8793698"},{"key":"10006_CR23","unstructured":"Kim, B., Farahmand, A., Pineau, J., & Precup, D. (2013). Learning from limited demonstrations. In: Advances in Neural Information Processing Systems (NeurIPS)."},{"key":"10006_CR24","doi-asserted-by":"crossref","unstructured":"Kim, B., & Pineau, J. (2013). Maximum mean discrepancy imitation learning. In: Robotics: Science and Systems (RSS)","DOI":"10.15607\/RSS.2013.IX.038"},{"key":"10006_CR25","doi-asserted-by":"crossref","unstructured":"Kollmitz, M., Koller, T., Boedecker, J., & Burgard, W. (2020). Learning human-aware robot navigation from physical interaction via inverse reinforcement learning. In: 2020 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 11025\u201311031. IEEE","DOI":"10.1109\/IROS45743.2020.9340865"},{"key":"10006_CR26","doi-asserted-by":"crossref","unstructured":"Laskey, M., Chuck, C., Lee, J., Mahler, J., Krishnan, S., Jamieson, K., Dragan, A., & Goldberg, K. (2017). Comparing human-centric and robot-centric sampling for robot deep learning from demonstrations. In: IEEE International Conference on Robotics and Automation (ICRA).","DOI":"10.1109\/ICRA.2017.7989046"},{"key":"10006_CR27","unstructured":"Laskey, M., Lee, J., Hsieh, W., Liaw, R., Mahler, J., Fox, R., & Goldberg, K. (2017). Iterative noise injection for scalable imitation learning. arXiv preprint arXiv:1703.09327"},{"key":"10006_CR28","doi-asserted-by":"crossref","unstructured":"Laskey, M., Staszak, S., Hsieh, W.Y.S., Mahler, J., Pokorny, F.T., Dragan, A.D., & Goldberg, K. (2016). SHIV: Reducing supervisor burden in dagger using support vectors for efficient learning from demonstrations in high dimensional state spaces. In: 2016 IEEE International Conference on Robotics and Automation (ICRA).","DOI":"10.1109\/ICRA.2016.7487167"},{"key":"10006_CR29","doi-asserted-by":"crossref","unstructured":"Levine, S., Pastor, P., Krizhevsky, A., Ibarz, J., & Quillen, D. (2018). Learning hand-eye coordination for robotic grasping with deep learning and large-scale data collection. The International Journal of Robotics Research (IJRR).","DOI":"10.1007\/978-3-319-50115-4_16"},{"key":"10006_CR30","doi-asserted-by":"crossref","unstructured":"Loftin, R., Peng, B., MacGlashan, J., Littman, M. L., Taylor, M. E., Huang, J., & Roberts, D. L. (2016). Learning behaviors via human-delivered discrete feedback: modeling implicit feedback strategies to speed up learning. Autonomous Agents and Multi-Agent Systems.","DOI":"10.1007\/s10458-015-9283-7"},{"key":"10006_CR31","unstructured":"MacGlashan, J., Ho, M.K., Loftin, R., Peng, B., Wang, G., Roberts, D.L., Taylor, M.E., & Littman, M.L. (2017). Interactive learning from policy-dependent human feedback. In: Proceedings of the 34th International Conference on Machine Learning (ICML)."},{"key":"10006_CR32","doi-asserted-by":"crossref","unstructured":"McPherson, D.L., Scobee, D.R., Menke, J., Yang, A.Y., & Sastry, S.S. (2018). Modeling supervisor safe sets for improving collaboration in human-robot teams. In: 2018 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 861\u2013868. IEEE.","DOI":"10.1109\/IROS.2018.8593865"},{"key":"10006_CR33","doi-asserted-by":"crossref","unstructured":"Menda, K., Driggs-Campbell, K.R., & Kochenderfer, M.J. (2018). EnsembleDAgger: A Bayesian Approach to Safe Imitation Learning. arXiv preprint arXiv:1807.08364","DOI":"10.1109\/IROS40897.2019.8968287"},{"key":"10006_CR34","doi-asserted-by":"crossref","unstructured":"Osa, T., Pajarinen, J., Neumann, G., Bagnell, J.A., Abbeel, P., & Peters, J. (2018). An algorithmic perspective on imitation learning. Foundations and Trends in Robotics, 7(1-2), 1\u2013179.","DOI":"10.1561\/2300000053"},{"key":"10006_CR35","unstructured":"Packard, B., & Onta\u00f1\u00f3n, S. (2017). Policies for active learning from demonstration. In: 2017 AAAI Spring Symposium Series"},{"key":"10006_CR36","unstructured":"Pomerleau, D.A. (1989). Alvinn: An autonomous land vehicle in a neural network. In: Advances in Neural Information Processing Systems (NeurIPS)"},{"key":"10006_CR37","unstructured":"Ross, S., Gordon, G., & Bagnell, D. (2011). A reduction of imitation learning and structured prediction to no-regret online learning. In: Proceedings of the fourteenth international conference on artificial intelligence and statistics (AIStats)."},{"key":"10006_CR38","doi-asserted-by":"crossref","unstructured":"Ross, S., Melik-Barkhudarov, N., Shankar, K.S., Wendel, A., Dey, D., Bagnell, J.A., & Hebert, M. (2013). Learning monocular reactive UAV control in cluttered natural environments. In: IEEE International Conference on Robotics and Automation (ICRA).","DOI":"10.1109\/ICRA.2013.6630809"},{"key":"10006_CR39","doi-asserted-by":"crossref","unstructured":"Sadat, A., Ren, M., Pokrovsky, A., Lin, Y.C., Yumer, E., & Urtasun, R. (2019). Jointly learnable behavior and trajectory planning for self-driving vehicles. arXiv preprint arXiv:1910.04586","DOI":"10.1109\/IROS40897.2019.8967615"},{"key":"10006_CR40","doi-asserted-by":"crossref","unstructured":"Sadigh, D., Sastry, S.S., Seshia, S.A., & Dragan, A. (2016). Information gathering actions over human internal state. In: 2016 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","DOI":"10.1109\/IROS.2016.7759036"},{"key":"10006_CR41","unstructured":"Saunders, W., Sastry, G., Stuhlmueller, A., & Evans, O. (2017). Trial without error: Towards safe reinforcement learning via human intervention. arXiv preprint arXiv:1707.05173"},{"issue":"2","key":"10006_CR42","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1561\/2200000018","volume":"4","author":"S Shalev-Shwartz","year":"2012","unstructured":"Shalev-Shwartz, S. (2012). Online learning and online convex optimization. Foundations and Trends in Machine Learning, 4(2), 107\u2013194.","journal-title":"Foundations and Trends in Machine Learning"},{"key":"10006_CR43","doi-asserted-by":"crossref","unstructured":"Spencer, J., Choudhury, S., Barnes, M., Schmittle, M., Chiang, M., Ramadge, P., & Srinivasa, S. (2020). Learning from interventions: Human-robot interaction as both explicit and implicit feedback. In: Robotics: Science and Systems (RSS).","DOI":"10.15607\/RSS.2020.XVI.055"},{"key":"10006_CR44","unstructured":"Spencer, J., Choudhury, S., Venkatraman, A., Ziebart, B., & Bagnell, J.A. (2021). Feedback in imitation learning: The three regimes of covariate shift. arXiv preprint arXiv:2102.02872"},{"key":"10006_CR45","unstructured":"Srinivasa, S.S., Lancaster, P., Michalove, J., Schmittle, M., Summers, C., Rockett, M., Smith, J.R., Choudhury, S., Mavrogiannis, C., & Sadeghi, F. (2019). MuSHR: A Low-Cost, Open-Source Robotic Racecar for Education and Research. arXiv preprint arXiv:1908.08031"},{"key":"10006_CR46","unstructured":"Sun, W., Venkatraman, A., Gordon, G.J., Boots, B., & Bagnell, J.A. (2017). Deeply AggreVaTeD: Differentiable Imitation Learning for Sequential Prediction. In: Proceedings of the 34th International Conference on Machine Learning (ICML)."},{"key":"10006_CR47","unstructured":"Zinkevich, M. (2003). Online convex programming and generalized infinitesimal gradient ascent. In: Proceedings of the 20th International Conference on Machine Learning (ICML)."}],"container-title":["Autonomous Robots"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10514-021-10006-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10514-021-10006-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10514-021-10006-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,2,5]],"date-time":"2022-02-05T09:08:51Z","timestamp":1644052131000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10514-021-10006-9"}},"subtitle":["An online framework for robot learning from explicit and implicit human feedback"],"short-title":[],"issued":{"date-parts":[[2021,10,19]]},"references-count":47,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2022,1]]}},"alternative-id":["10006"],"URL":"https:\/\/doi.org\/10.1007\/s10514-021-10006-9","relation":{},"ISSN":["0929-5593","1573-7527"],"issn-type":[{"value":"0929-5593","type":"print"},{"value":"1573-7527","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,10,19]]},"assertion":[{"value":"31 January 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 June 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 October 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}