{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,22]],"date-time":"2026-03-22T05:45:37Z","timestamp":1774158337107,"version":"3.50.1"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2025,4,7]],"date-time":"2025-04-07T00:00:00Z","timestamp":1743984000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,4,7]],"date-time":"2025-04-07T00:00:00Z","timestamp":1743984000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2025,5]]},"DOI":"10.1007\/s10489-025-06335-2","type":"journal-article","created":{"date-parts":[[2025,4,7]],"date-time":"2025-04-07T06:28:34Z","timestamp":1744007314000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["E-GAIL: efficient GAIL through including negative corruption and long-term rewards for robotic manipulations"],"prefix":"10.1007","volume":"55","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5263-836X","authenticated-orcid":false,"given":"Jiayi","family":"Tan","sequence":"first","affiliation":[]},{"given":"Gang","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Zeyuan","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Haofeng","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Marcelo","family":"H. Ang Jr","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,4,7]]},"reference":[{"key":"6335_CR1","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller MA, Fidjeland AK, Ostrovski G, Petersen S, Beattie C, Sadik A, Antonoglou I, King H, Kumaran D, Wierstra D, Legg S, Hassabis D (2015) Human-level control through deep reinforcement learning. Nature 518:529\u2013533","journal-title":"Nature"},{"key":"6335_CR2","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver D, Huang A, Maddison CJ, Guez A, Sifre L, Driessche G, Schrittwieser J, Antonoglou I, Panneershelvam V, Lanctot M, Dieleman S, Grewe D, Nham J, Kalchbrenner N, Sutskever I, Lillicrap T, Leach M, Kavukcuoglu K, Graepel T, Hassabis D (2016) Mastering the game of go with deep neural networks and tree search. Nature 529:484\u2013503","journal-title":"Nature"},{"key":"6335_CR3","doi-asserted-by":"publisher","unstructured":"Kumar A, Sharma R, Varshney P (2018) Lyapunov fuzzy markov game controller for two link robotic manipulator. J Intell Fuzzy Syst 34:1479\u20131490. https:\/\/doi.org\/10.3233\/JIFS-169443","DOI":"10.3233\/JIFS-169443"},{"key":"6335_CR4","doi-asserted-by":"publisher","unstructured":"Kumar A, Sharma R (2018) Neural\/fuzzy self learning lyapunov control for non linear systems. Int J Inf Technol 14. https:\/\/doi.org\/10.1007\/s41870-017-0074-z","DOI":"10.1007\/s41870-017-0074-z"},{"key":"6335_CR5","doi-asserted-by":"publisher","unstructured":"Kumar A, Sharma R (2018) Linguistic lyapunov reinforcement learning control for robotic manipulators. Neurocomput 272:84\u201395. https:\/\/doi.org\/10.1016\/j.neucom.2017.06.064","DOI":"10.1016\/j.neucom.2017.06.064"},{"key":"6335_CR6","doi-asserted-by":"publisher","unstructured":"Kumar A, Sharma R (2017) Fuzzy lyapunov reinforcement learning for non linear systems. ISA Trans 67:151\u2013159. https:\/\/doi.org\/10.1016\/j.isatra.2017.01.026","DOI":"10.1016\/j.isatra.2017.01.026"},{"key":"6335_CR7","unstructured":"Pomerleau DA (1988) Alvinn: An autonomous land vehicle in a neural network. Adv Neural Inf Process Syst 1"},{"key":"6335_CR8","unstructured":"Ng AY, Russell SJ (2000) Algorithms for inverse reinforcement learning. In: Proceedings of the Seventeenth International Conference on Machine Learning. ICML \u201900, pp 663\u2013670. Morgan Kaufmann Publishers Inc., San Francisco, CA, USA"},{"key":"6335_CR9","doi-asserted-by":"crossref","unstructured":"Levine S, Wagener N, Abbeel P (2015) Learning contact-rich manipulation skills with guided policy search. 2015 IEEE Int Conf Robot Automat (ICRA), 156\u2013163","DOI":"10.1109\/ICRA.2015.7138994"},{"key":"6335_CR10","doi-asserted-by":"crossref","unstructured":"Mahler J, Liang J, Niyaz S, Laskey M, Doan R, Liu X, Ojea JA, Goldberg K (2017) Dex-net 2.0: Deep learning to plan robust grasps with synthetic point clouds and analytic grasp metrics. ArXiv arXiv:1703.09312","DOI":"10.15607\/RSS.2017.XIII.058"},{"issue":"24","key":"6335_CR11","doi-asserted-by":"publisher","first-page":"30865","DOI":"10.1007\/s10489-023-05062-w","volume":"53","author":"D Boborzi","year":"2023","unstructured":"Boborzi D, Straehle C-N, Buchner J, Mikelsons L (2023) Imitation learning by state-only distribution matching. Appl Intell 53(24):30865\u201330886. https:\/\/doi.org\/10.1007\/s10489-023-05062-w","journal-title":"Appl Intell"},{"key":"6335_CR12","unstructured":"Gao Y, Xu H, Lin J, Yu F, Levine S, Darrell T (2018) Reinforcement learning from imperfect demonstrations. arXiv:1802.05313"},{"key":"6335_CR13","unstructured":"Ross S, Bagnell JA (2014) Reinforcement and imitation learning via interactive no-regret learning. arXiv:1406.5979"},{"key":"6335_CR14","unstructured":"Ho J, Ermon S (2016) Generative adversarial imitation learning. Adv Neural Inf Process Syst 29"},{"key":"6335_CR15","doi-asserted-by":"publisher","first-page":"11166","DOI":"10.1109\/LRA.2022.3196122","volume":"7","author":"H Shen","year":"2022","unstructured":"Shen H, Wan W, Wang H (2022) Learning category-level generalizable object manipulation policy via generative adversarial self-imitation learning from demonstrations. IEEE Robot Automat Lett 7:11166\u201311173","journal-title":"IEEE Robot Automat Lett"},{"key":"6335_CR16","unstructured":"Haarnoja T, Zhou A, Abbeel P, Levine S (2018) Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International Conference on Machine Learning, pp 1861\u20131870 . PMLR"},{"key":"6335_CR17","unstructured":"Karras T, Aila T, Laine S, Lehtinen J (2018) Progressive growing of GANs for improved quality, stability, and variation. In: International Conference on Learning Representations . https:\/\/openreview.net\/forum?id=Hk99zCeAb"},{"key":"6335_CR18","unstructured":"Pshikhachev G, Ivanov D, Egorov V, Shpilman A (2021) Self-imitation learning from demonstrations. In: Deep RL Workshop NeurIPS 2021 . https:\/\/openreview.net\/forum?id=fYcViVwEH3U"},{"key":"6335_CR19","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O (2017) Proximal policy optimization algorithms. CoRR arXiv:1707.06347"},{"key":"6335_CR20","unstructured":"Torabi F, Warnell G, Stone P (218) Generative adversarial imitation from observation. ArXiv arXiv:1807.06158"},{"key":"6335_CR21","unstructured":"Ji T, Luo Y, Sun F, Zhan X, Zhang J, Xu H (2024) Seizing serendipity: Exploiting the value of past success in off-policy actor-critic. In: Forty-first International Conference on Machine Learning"},{"key":"6335_CR22","unstructured":"Hansen N, Su H, Wang X (2024) TD-MPC2: Scalable, Robust World Models for Continuous Control"},{"key":"6335_CR23","unstructured":"Ji T, Liang Y, Zeng Y, Luo Y, Xu G, Guo J, Zheng R, Huang F, Sun F, Xu H (2024) Ace: Off-policy actor-critic with causality-aware entropy regularization. In: Forty-first International Conference on Machine Learning"},{"issue":"8","key":"6335_CR24","doi-asserted-by":"publisher","first-page":"5976","DOI":"10.1007\/s10489-024-05464-4","volume":"54","author":"K Liu","year":"2024","unstructured":"Liu K, Wu L, Zhang Z, Hu X, Lu N, Wei X (2024) CAAC: An effective reinforcement learning algorithm for sparse reward in automatic control systems. Appl Intell 54(8):5976\u20135995","journal-title":"Appl Intell"},{"key":"6335_CR25","unstructured":"Schulman J, Levine S, Abbeel P, Jordan M, Moritz P (2015) Trust region policy optimization. In: Bach, F., Blei, D. (eds) Proceedings of the 32nd International Conference on Machine Learning. Proceedings of Machine Learning Research, vol 37, pp 1889\u20131897. PMLR, Lille, France . https:\/\/proceedings.mlr.press\/v37\/schulman15.html"},{"key":"6335_CR26","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess NMO, Erez T, Tassa Y, Silver D, Wierstra D (2015) Continuous control with deep reinforcement learning. CoRR arXiv:1509.02971"},{"key":"6335_CR27","unstructured":"Gulcehre C, Paine TL, Shahriari B, Denil M, Hoffman M, Soyer H, Tanburn R, Kapturowski S, Rabinowitz N, Williams D, Barth-Maron G, Wang Z, Freitas N, Team W (2020) Making efficient use of demonstrations to solve hard exploration problems. In: International Conference on Learning Representations . https:\/\/openreview.net\/forum?id=SygKyeHKDH"},{"key":"6335_CR28","doi-asserted-by":"publisher","unstructured":"Zuo G, Zhao Q, Chen K, Li J, Gong D (2020) Off-policy adversarial imitation learning for robotic tasks with low-quality demonstrations. Appl Soft Comput 97:106795 . https:\/\/doi.org\/10.1016\/j.asoc.2020.106795","DOI":"10.1016\/j.asoc.2020.106795"},{"key":"6335_CR29","doi-asserted-by":"publisher","unstructured":"Kilinc O, Montana G (2022) Reinforcement learning for robotic manipulation using simulated locomotion demonstrations. Mach Learn 111:1\u201322. https:\/\/doi.org\/10.1007\/s10994-021-06116-1","DOI":"10.1007\/s10994-021-06116-1"},{"key":"6335_CR30","unstructured":"Xu H, Jiang L, Jianxiong L, Zhan X (2022) A policy-guided imitation approach for offline reinforcement learning. In: Advances in Neural Information Processing Systems, vol 35, pp 4085\u20134098 . https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2022\/file\/1a0755b249b772ed5529796b0a7cc9bd-Paper-Conference.pdf"},{"key":"6335_CR31","doi-asserted-by":"crossref","unstructured":"Strudel R, Pashevich A, Kalevatykh I, Laptev I, Sivic J, Schmid C (2019) Learning to combine primitive skills: A step towards versatile robotic manipulation. 2020 IEEE Int Conf Robot Automat (ICRA), 4637\u20134643","DOI":"10.1109\/ICRA40945.2020.9196619"},{"key":"6335_CR32","unstructured":"Vezhnevets AS, Osindero S, Schaul T, Heess N, Jaderberg M, Silver D, Kavukcuoglu K (2017) Feudal networks for hierarchical reinforcement learning. In: International Conference on Machine Learning, pp 3540\u20133549 . PMLR"},{"key":"6335_CR33","unstructured":"Nachum O, Gu SS, Lee H, Levine S (2018) Data-efficient hierarchical reinforcement learning. Adv Neural Inf Process Syst 31"},{"key":"6335_CR34","doi-asserted-by":"crossref","unstructured":"Abbeel P, Ng AY (2004) Apprenticeship learning via inverse reinforcement learning. In: Proceedings of the Twenty-first International Conference on Machine Learning, p 1","DOI":"10.1145\/1015330.1015430"},{"key":"6335_CR35","unstructured":"Das N, Bechtle S, Davchev T, Jayaraman D, Rai A, Meier F (2021) Model-based inverse reinforcement learning from visual demonstrations. In: Conference on Robot Learning, pp 1930\u20131942 . PMLR"},{"key":"6335_CR36","doi-asserted-by":"publisher","unstructured":"Zhan A, Zhao R, Pinto L, Abbeel P, Laskin M (2022) Learning visual robotic control efficiently with contrastive pre-training and data augmentation. In: 2022 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp 4040\u20134047 . https:\/\/doi.org\/10.1109\/IROS47612.2022.9981055","DOI":"10.1109\/IROS47612.2022.9981055"},{"key":"6335_CR37","doi-asserted-by":"crossref","unstructured":"Lin X, So J, Mahalingam S, Liu F, Abbeel P (2024) Spawnnet: Learning generalizable visuomotor skills from pre-trained network. In: 2024 IEEE International Conference on Robotics and Automation (ICRA), vol 33, pp 4781\u20134787","DOI":"10.1109\/ICRA57147.2024.10610356"},{"key":"6335_CR38","unstructured":"Duan Y, Andrychowicz M, Stadie B, Jonathan\u00a0Ho O, Schneider J, Sutskever I, Abbeel P, Zaremba W (2017) One-shot imitation learning. Adv Neural Inf Process Syst 30"},{"key":"6335_CR39","unstructured":"Finn C, Abbeel P, Levine S (2017) Model-agnostic meta-learning for fast adaptation of deep networks. In: International Conference on Machine Learning, pp 1126\u20131135. PMLR"},{"key":"6335_CR40","unstructured":"Guo Y, Oh J, Singh S, Lee H (2018) Generative adversarial self-imitation learning. arXiv:1812.00950"},{"key":"6335_CR41","unstructured":"Florence P, Lynch C, Zeng A, Ramirez OA, Wahid A, Downs L, Wong A, Lee J, Mordatch I, Tompson J (2021) Implicit behavioral cloning. In: 5th Annual Conference on Robot Learning. https:\/\/openreview.net\/forum?id=rif3a5NAxU6"},{"key":"6335_CR42","doi-asserted-by":"publisher","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp 770\u2013778. https:\/\/doi.org\/10.1109\/CVPR.2016.90","DOI":"10.1109\/CVPR.2016.90"},{"key":"6335_CR43","unstructured":"Kingma DP, Ba J (2014) Adam: A method for stochastic optimization. CoRR arXiv:1412.6980"},{"key":"6335_CR44","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser Lu, Polosukhin I (2017) Attention is all you need. In: Guyon I, Luxburg UV, Bengio S, Wallach H, Fergus R, Vishwanathan S, Garnett R (eds) Advances in Neural Information Processing Systems, vol 30 . https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2017\/file\/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"},{"key":"6335_CR45","unstructured":"Yu T, Quillen D, He Z, Julian R, Hausman K, Finn C, Levine S (2020) Meta-world: A benchmark and evaluation\u00a0for multi-task and meta reinforcement learning. In: Proceedings of the Conference on Robot Learning. Proceedings of Machine Learning Research, vol 100, pp 1094\u20131100. https:\/\/proceedings.mlr.press\/v100\/yu20a.html"}],"updated-by":[{"DOI":"10.1007\/s10489-025-06830-6","type":"correction","label":"Correction","source":"publisher","updated":{"date-parts":[[2026,1,5]],"date-time":"2026-01-05T00:00:00Z","timestamp":1767571200000}}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-025-06335-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-025-06335-2","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-025-06335-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,5]],"date-time":"2026-01-05T16:18:11Z","timestamp":1767629891000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-025-06335-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,7]]},"references-count":45,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2025,5]]}},"alternative-id":["6335"],"URL":"https:\/\/doi.org\/10.1007\/s10489-025-06335-2","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,4,7]]},"assertion":[{"value":"30 January 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 April 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 January 2026","order":4,"name":"change_date","label":"Change Date","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Correction","order":5,"name":"change_type","label":"Change Type","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"A Correction to this paper has been published:","order":6,"name":"change_details","label":"Change Details","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"https:\/\/doi.org\/10.1007\/s10489-025-06830-6","URL":"https:\/\/doi.org\/10.1007\/s10489-025-06830-6","order":7,"name":"change_details","label":"Change Details","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no relevant financial or non-financial interests to disclose.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing Interests"}},{"value":"This paper does not contain studies with human participants or animals. Statement of informed consent is not applicable since there is no human subject in this paper.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical and Informed Consent for Data Used"}}],"article-number":"633"}}