{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T17:51:39Z","timestamp":1740160299003,"version":"3.37.3"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2021,10,30]],"date-time":"2021-10-30T00:00:00Z","timestamp":1635552000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,10,30]],"date-time":"2021-10-30T00:00:00Z","timestamp":1635552000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100004663","name":"Ministry of Science and Technology, Taiwan","doi-asserted-by":"publisher","award":["109-2221-E-029-022"],"award-info":[{"award-number":["109-2221-E-029-022"]}],"id":[{"id":"10.13039\/501100004663","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int. J. Mach. Learn. &amp; Cyber."],"published-print":{"date-parts":[[2022,5]]},"DOI":"10.1007\/s13042-021-01454-x","type":"journal-article","created":{"date-parts":[[2021,10,30]],"date-time":"2021-10-30T17:02:28Z","timestamp":1635613348000},"page":"1409-1423","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["A critical state identification approach to inverse reinforcement learning for autonomous systems"],"prefix":"10.1007","volume":"13","author":[{"given":"Maxwell","family":"Hwang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4432-8801","authenticated-orcid":false,"given":"Wei-Cheng","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yu-Jen","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,10,30]]},"reference":[{"key":"1454_CR1","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: an introduction. MIT press, Cambridge"},{"issue":"4","key":"1454_CR2","doi-asserted-by":"publisher","first-page":"1956","DOI":"10.1109\/TII.2018.2865004","volume":"15","author":"H Shi","year":"2018","unstructured":"Shi H, Sun G, Wang Y, Hwang KS (2018) Adaptive image-based visual servoing with temporary loss of the visual signal. IEEE Trans Industr Inf 15(4):1956\u20131965","journal-title":"IEEE Trans Industr Inf"},{"issue":"5","key":"1454_CR3","doi-asserted-by":"publisher","first-page":"978","DOI":"10.1109\/TCYB.2014.2341582","volume":"45","author":"KS Hwang","year":"2014","unstructured":"Hwang KS, Jiang WC, Chen YJ (2014) Model learning and knowledge sharing for a multiagent system with Dyna-Q learning. IEEE Trans Cybern 45(5):978\u2013990","journal-title":"IEEE Trans Cybern"},{"issue":"1","key":"1454_CR4","doi-asserted-by":"publisher","first-page":"241","DOI":"10.1109\/TII.2016.2617464","volume":"14","author":"H Shi","year":"2016","unstructured":"Shi H, Li X, Hwang KS, Pan W, Xu G (2016) Decoupled visual servoing with fuzzy Q-learning. IEEE Trans Industr Inf 14(1):241\u2013252","journal-title":"IEEE Trans Industr Inf"},{"issue":"4","key":"1454_CR5","doi-asserted-by":"publisher","first-page":"286","DOI":"10.1109\/TAMD.2014.2362682","volume":"6","author":"B Liu","year":"2014","unstructured":"Liu B, Singh S, Lewis RL, Qin S (2014) Optimal rewards for cooperative agents. IEEE Trans Auton Ment Dev 6(4):286\u2013297","journal-title":"IEEE Trans Auton Ment Dev"},{"key":"1454_CR6","doi-asserted-by":"crossref","unstructured":"Abbeel P, Dolgov D, Ng AY, Thrun S (2008) Apprenticeship learning for motion planning with application to parking lot navigation. In: 2008 IEEE\/RSJ international conference on intelligent robots and systems, IEEE (pp 1083\u20131090)","DOI":"10.1109\/IROS.2008.4651222"},{"key":"1454_CR7","doi-asserted-by":"crossref","unstructured":"Abbeel P, Ng AY (2004) Apprenticeship learning via inverse reinforcement learning. In: Proceedings of the twenty-first international conference on Machine learning, p 1","DOI":"10.1145\/1015330.1015430"},{"key":"1454_CR8","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2019.2957831","author":"M Hwang","year":"2019","unstructured":"Hwang M, Jiang WC, Chen YJ, Hwang KS, Tseng YC (2019) An efficient unified approach using demonstrations for inverse reinforcement learning. IEEE Trans Cogn Develop Syst. https:\/\/doi.org\/10.1109\/TCDS.2019.2957831","journal-title":"IEEE Trans Cogn Develop Syst"},{"issue":"2","key":"1454_CR9","doi-asserted-by":"publisher","first-page":"369","DOI":"10.1109\/TRO.2015.2405593","volume":"31","author":"B Michini","year":"2015","unstructured":"Michini B, Walsh TJ, Agha-Mohammadi AA, How JP (2015) Bayesian nonparametric reward learning from demonstration. IEEE Trans Rob 31(2):369\u2013386","journal-title":"IEEE Trans Rob"},{"issue":"4","key":"1454_CR10","doi-asserted-by":"publisher","first-page":"793","DOI":"10.1109\/TCYB.2014.2336867","volume":"45","author":"J Choi","year":"2014","unstructured":"Choi J, Kim KE (2014) Hierarchical bayesian inverse reinforcement learning. IEEE Trans Cybern 45(4):793\u2013805","journal-title":"IEEE Trans Cybern"},{"key":"1454_CR11","unstructured":"Daskalakis C, Foster DJ, Golowich N (2021) Independent policy gradient methods for competitive reinforcement learning. arXiv:2101.04233"},{"key":"1454_CR12","unstructured":"Moerland TM, Broekens J, Jonker CM (2020) Model-based reinforcement learning: a survey. arXiv:2006.16712"},{"key":"1454_CR13","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TITS.2021.3054625","volume":"2021","author":"BR Kiran","year":"2021","unstructured":"Kiran BR, Sobh I, Talpaert V, Mannion P, Al Sallab AA, Yogamani S, P\u00e9rez P (2021) Deep reinforcement learning for autonomous driving: a survey. IEEE Trans Intell Transp Syst 2021:1","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"1454_CR14","first-page":"2","volume":"2020","author":"A Haydari","year":"2020","unstructured":"Haydari A, Yilmaz Y (2020) Deep reinforcement learning for intelligent transportation systems: a survey. IEEE Trans Intell Transp Syst 2020:2","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"1454_CR15","unstructured":"Levine S, Popovic Z, Koltun V (2010) Feature construction for inverse reinforcement learning. In: NIPS, vol 23, p 1342"},{"issue":"13","key":"1454_CR16","doi-asserted-by":"publisher","first-page":"1608","DOI":"10.1177\/0278364910371999","volume":"29","author":"P Abbeel","year":"2010","unstructured":"Abbeel P, Coates A, Ng AY (2010) Autonomous helicopter aerobatics through apprenticeship learning. Int J Rob Res 29(13):1608\u20131639","journal-title":"Int J Rob Res"},{"key":"1454_CR17","doi-asserted-by":"publisher","first-page":"103500","DOI":"10.1016\/j.artint.2021.103500","volume":"2021","author":"S Arora","year":"2021","unstructured":"Arora S, Doshi P (2021) A survey of inverse reinforcement learning: challenges, methods and progress. Artif Intell 2021:103500","journal-title":"Artif Intell"},{"key":"1454_CR18","doi-asserted-by":"crossref","unstructured":"Tang J, Singh A, Goehausen N, Abbeel P (2010) Parameterized maneuver learning for autonomous helicopter flight. In 2010 IEEE international conference on robotics and automation (pp 1142\u20131148), IEEE","DOI":"10.1109\/ROBOT.2010.5509832"},{"key":"1454_CR19","doi-asserted-by":"crossref","unstructured":"Grollman DH, Billard A (2011) Donut as i do: learning from failed demonstrations. In 2011 IEEE international conference on robotics and automation (pp 3804\u20133809), IEEE","DOI":"10.1109\/ICRA.2011.5979757"},{"key":"1454_CR20","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.ins.2020.01.023","volume":"520","author":"T Zhang","year":"2020","unstructured":"Zhang T, Liu Y, Hwang M, Hwang KS, Ma C, Cheng J (2020) An end-to-end inverse reinforcement learning by a boosting approach with relative entropy. Inf Sci 520:1\u201314","journal-title":"Inf Sci"},{"key":"1454_CR21","doi-asserted-by":"crossref","unstructured":"Lopes M, Melo F, Montesano L (2009) Active learning for reward estimation in inverse reinforcement learning. In: Joint European conference on machine learning and knowledge discovery in databases (pp 31\u201346). Springer, Berlin, Heidelberg","DOI":"10.1007\/978-3-642-04174-7_3"},{"key":"1454_CR22","unstructured":"Kolter JZ, Abbeel P, Ng AY (2008) Hierarchical apprenticeship learning with application to quadruped locomotion. In: Advances in neural information processing systems (pp 769\u2013776)"},{"key":"1454_CR23","unstructured":"Ng AY, Russell SJ (2000) Algorithms for inverse reinforcement learning. In: Icml (Vol 1, p2)"},{"key":"1454_CR24","unstructured":"Schapire RE (1999) A brief introduction to boosting. In: Ijcai (Vol 99, pp 1401\u20131406)"},{"issue":"2","key":"1454_CR25","doi-asserted-by":"publisher","first-page":"1387","DOI":"10.1109\/LRA.2019.2895892","volume":"4","author":"M Pflueger","year":"2019","unstructured":"Pflueger M, Agha A, Sukhatme GS (2019) Rover-IRL: inverse reinforcement learning with soft value iteration networks for planetary rover path planning. IEEE Rob Autom Lett 4(2):1387\u20131394","journal-title":"IEEE Rob Autom Lett"},{"key":"1454_CR26","doi-asserted-by":"publisher","first-page":"15392","DOI":"10.1109\/ACCESS.2020.2967642","volume":"8","author":"Y Zeng","year":"2020","unstructured":"Zeng Y, Xu K, Qin L, Yin Q (2020) A semi-Markov decision model with inverse reinforcement learning for recognizing the destination of a maneuvering agent in real time strategy games. IEEE Access 8:15392\u201315409","journal-title":"IEEE Access"},{"issue":"2","key":"1454_CR27","doi-asserted-by":"publisher","first-page":"183","DOI":"10.3844\/jcssp.2013.183.197","volume":"9","author":"D Pelusi","year":"2013","unstructured":"Pelusi D, Mascella R (2013) Optimal control Algorithms for second order Systems. J Comput Sci 9(2):183\u2013197","journal-title":"J Comput Sci"},{"key":"1454_CR28","doi-asserted-by":"publisher","first-page":"373","DOI":"10.1016\/j.ejcon.2020.08.001","volume":"58","author":"RC Roman","year":"2021","unstructured":"Roman RC, Precup RE, Petriu EM (2021) Hybrid data-driven fuzzy active disturbance rejection control for tower crane systems. Eur J Control 58:373\u2013387","journal-title":"Eur J Control"},{"issue":"1","key":"1454_CR29","first-page":"193","volume":"18","author":"A Turnip","year":"2020","unstructured":"Turnip A, Panggabean JH (2020) Hybrid controller design based magneto-rheological damper lookup table for quarter car suspension. Int J Artif Intell 18(1):193\u2013206","journal-title":"Int J Artif Intell"},{"key":"1454_CR30","first-page":"5","volume":"2021","author":"W Xue","year":"2021","unstructured":"Xue W, Kolaric P, Fan J, Lian B, Chai T, Lewis FL (2021) Inverse reinforcement learning in tracking control based on inverse optimal control. IEEE Trans Cybern 2021:5","journal-title":"IEEE Trans Cybern"},{"key":"1454_CR31","unstructured":"Dvijotham K, Todorov E (2010) Inverse optimal control with linearly-solvable MDPs. In: ICML"},{"issue":"20","key":"1454_CR32","doi-asserted-by":"publisher","first-page":"5022","DOI":"10.1364\/AO.52.005022","volume":"52","author":"F Xiang","year":"2013","unstructured":"Xiang F, Wang Z, Yuan X (2013) Dissimilarity sparsity-preserving projections in feature extraction for visual recognition. Appl Opt 52(20):5022\u20135029","journal-title":"Appl Opt"},{"issue":"3","key":"1454_CR33","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1049\/iet-ipr.2017.0327","volume":"12","author":"F Xiang","year":"2018","unstructured":"Xiang F, Jian Z, Liang P, Xueqiang G (2018) Robust image fusion with block sparse representation and online dictionary learning. IET Image Proc 12(3):345\u2013353","journal-title":"IET Image Proc"},{"key":"1454_CR34","doi-asserted-by":"crossref","unstructured":"Dai W, Yang Q, Xue G, Yu Y (2007) Boosting for Transfer Learning. 2007. In: Proceedings of the 24th international conference on machine learning","DOI":"10.1145\/1273496.1273521"},{"key":"1454_CR35","unstructured":"Ziebart BD, Maas AL, Bagnell JA, Dey AK (2008) Maximum entropy inverse reinforcement learning. In: Aaai (vol 8, pp 1433\u20131438)"},{"key":"1454_CR36","doi-asserted-by":"publisher","first-page":"518","DOI":"10.1016\/j.ins.2019.09.066","volume":"512","author":"JL Lin","year":"2020","unstructured":"Lin JL, Hwang KS, Shi H, Pan W (2020) An ensemble method for inverse reinforcement learning. Inf Sci 512:518\u2013532","journal-title":"Inf Sci"},{"issue":"1","key":"1454_CR37","doi-asserted-by":"publisher","first-page":"95","DOI":"10.1007\/s40815-018-0535-y","volume":"21","author":"W Pan","year":"2019","unstructured":"Pan W, Qu R, Hwang KS, Lin HS (2019) An ensemble fuzzy approach for inverse reinforcement learning. Int J Fuzzy Syst 21(1):95\u2013103","journal-title":"Int J Fuzzy Syst"},{"issue":"5","key":"1454_CR38","doi-asserted-by":"publisher","first-page":"1141","DOI":"10.1109\/TSMCA.2012.2227719","volume":"43","author":"A Konar","year":"2013","unstructured":"Konar A, Chakraborty IG, Singh SJ, Jain LC, Nagar AK (2013) A deterministic improved Q-learning for path planning of a mobile robot. IEEE Trans Syst Man Cybern Syst 43(5):1141\u20131153","journal-title":"IEEE Trans Syst Man Cybern Syst"},{"issue":"12","key":"1454_CR39","doi-asserted-by":"publisher","first-page":"1481","DOI":"10.1109\/TSMC.2015.2418321","volume":"45","author":"KS Hwang","year":"2015","unstructured":"Hwang KS, Lin JL, Yeh KH (2015) Learning to adjust and refine gait patterns for a biped robot. IEEE Trans Syst Man Cybern Syst 45(12):1481\u20131490","journal-title":"IEEE Trans Syst Man Cybern Syst"},{"issue":"9","key":"1454_CR40","doi-asserted-by":"publisher","first-page":"1125","DOI":"10.1109\/TSMC.2013.2294155","volume":"44","author":"S Doltsinis","year":"2014","unstructured":"Doltsinis S, Ferreira P, Lohse N (2014) An MDP model-based reinforcement learning approach for production station ramp-up optimization: Q-learning analysis. IEEE Trans Syst Man Cybern Syst 44(9):1125\u20131138","journal-title":"IEEE Trans Syst Man Cybern Syst"},{"issue":"9","key":"1454_CR41","doi-asserted-by":"publisher","first-page":"1470","DOI":"10.1109\/TSMC.2017.2671848","volume":"48","author":"KS Hwang","year":"2017","unstructured":"Hwang KS, Jiang WC, Chen YJ, Hwang I (2017) Model learning for multistep backward prediction in dyna-$${Q}$$ learning. IEEE Trans Syst Man Cybern Syst 48(9):1470\u20131481","journal-title":"IEEE Trans Syst Man Cybern Syst"},{"key":"1454_CR42","first-page":"1","volume":"2020","author":"Z Xie","year":"2020","unstructured":"Xie Z, Zhang Q, Jiang Z, Liu H (2020) Robot learning from demonstration for path planning: a review. Sci China Technol Sci 2020:1\u201310","journal-title":"Sci China Technol Sci"},{"key":"1454_CR43","first-page":"119","volume":"38","author":"R Balian","year":"2004","unstructured":"Balian R (2004) Entropy, a protean concept. Progress Math Phys 38:119","journal-title":"Progress Math Phys"},{"key":"1454_CR44","unstructured":"IRIS (2017) Inverse reinforcement learning based on critical state demo. IRIS Lab. National Sun Yat-sen University, Kaohsiung, Taiwan. [Online]. https:\/\/www.youtube.com\/watch?v=cMaOdoTt4Hw. Accessed 16 Nov 2015"}],"container-title":["International Journal of Machine Learning and Cybernetics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-021-01454-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13042-021-01454-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-021-01454-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T03:54:57Z","timestamp":1726026897000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13042-021-01454-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10,30]]},"references-count":44,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2022,5]]}},"alternative-id":["1454"],"URL":"https:\/\/doi.org\/10.1007\/s13042-021-01454-x","relation":{},"ISSN":["1868-8071","1868-808X"],"issn-type":[{"type":"print","value":"1868-8071"},{"type":"electronic","value":"1868-808X"}],"subject":[],"published":{"date-parts":[[2021,10,30]]},"assertion":[{"value":"15 January 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 October 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 October 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Detailed information of all authors\u2019 receives research support is listing as: This study was funded by the Ministry of Science and Technology, Taiwan, under Grant MOST 109-2221-E-029-022-. No other author has reported a potential conflict of interest relevant to this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}