{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,7,8]],"date-time":"2024-07-08T02:54:42Z","timestamp":1720407282263},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2021,10,30]],"date-time":"2021-10-30T00:00:00Z","timestamp":1635552000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,10,30]],"date-time":"2021-10-30T00:00:00Z","timestamp":1635552000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Front. Comput. Sci."],"published-print":{"date-parts":[[2022,2]]},"DOI":"10.1007\/s11704-021-0147-9","type":"journal-article","created":{"date-parts":[[2021,10,30]],"date-time":"2021-10-30T11:02:42Z","timestamp":1635591762000},"update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["LIDAR: learning from imperfect demonstrations with advantage rectification"],"prefix":"10.1007","volume":"16","author":[{"given":"Xiaoqin","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Huimin","family":"Ma","sequence":"additional","affiliation":[]},{"given":"Xiong","family":"Luo","sequence":"additional","affiliation":[]},{"given":"Jian","family":"Yuan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,10,30]]},"reference":[{"issue":"7540","key":"147_CR1","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu A A, Veness J, Bellemare M G, Graves A, Riedmiller M, Fidjeland A K, Ostrovski G, Petersen S, Beattie C, Sadik A, Antonoglou I, King H, Kumaran D, Wierstra D, Legg S, Hassabis D. Human-level control through deep reinforcement learning. Nature, 2015, 518(7540): 529\u2013533","journal-title":"Nature"},{"issue":"7587","key":"147_CR2","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver D, Huang A, Maddison C J, Guez A, Sifre L, Van Den Driessche G, Schrittwieser J, Antonoglou I, Panneershelvam V, Lanctot M, Dieleman S, Grewe D, Nham J, Kalchbrenner N, Sutskever I, Lillicrap T, Leach M, Kavukcuoglu K, Graepel T, Hassabis D. Mastering the game of Go with deep neural networks and tree search. Nature, 2016, 529(7587): 484\u2013489","journal-title":"Nature"},{"key":"147_CR3","unstructured":"Mnih V, Badia A P, Mirza M, Graves A, Lillicrap T, Harley T, Silver D, Kavukcuoglu K. Asynchronous methods for deep reinforcement learning. In: Proceedings of the 33rd International Conference on Machine Learning. 2016, 1928\u20131937"},{"key":"147_CR4","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O. Proximal policy optimization algorithms. 2017, arXiv preprint arXiv:1707.06347"},{"key":"147_CR5","unstructured":"Fujimoto S, Hoof H, Meger D. Addressing function approximation error in actor-critic methods. In: Proceedings of the 35th International Conference on Machine Learning. 2018, 1582\u20131591"},{"key":"147_CR6","unstructured":"Lakshminarayanan A S, Ozair S, Bengio, Y. Reinforcement learning with few expert demonstrations. In: Proceedings of Neural Information Processing Systems Workshop on Deep Learning for Action and Interaction. 2016"},{"key":"147_CR7","doi-asserted-by":"crossref","unstructured":"Rajeswaran A, Kumar V, Gupta A, Vezzani G, Schulman J, Todorov E, Levine S. Learning complex dexterous manipulation with deep reinforcement learning and demonstrations. 2017, arXiv preprint arXiv:1709.10087","DOI":"10.15607\/RSS.2018.XIV.049"},{"key":"147_CR8","doi-asserted-by":"crossref","unstructured":"Nair A, McGrew B, Andrychowicz M, Zaremba W, Abbeel P. Overcoming exploration in reinforcement learning with demonstrations. In: Proceedings of 2018 IEEE International Conference on Robotics and Automation. 2018, 6292\u20136299","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"147_CR9","unstructured":"Ebrahimi S, Rohrbach A, Darrell T. Gradient-free policy architecture search and adaptation. In: Proceedings of the Conference on Robot Learning. 2017, 505\u2013514"},{"key":"147_CR10","unstructured":"Reddy S, Dragan A D, Levine, S. SQIL: imitation learning via regularized behavioral cloning. 2019, arXiv preprint arXiv:1905.11108"},{"key":"147_CR11","unstructured":"Ho J, Ermon S. Generative adversarial imitation learning. In: Proceedings of the 30th International Conference on Neural Information Processing Systems. 2016, 4565\u20134573"},{"key":"147_CR12","doi-asserted-by":"crossref","unstructured":"Todorov E, Erez T, Tassa Y. Mujoco: a physics engine for model-based control. In: Proceedings of 2012 IEEE\/RSJ International Conference on Intelligent Robots and Systems. 2012, 5026\u20135033","DOI":"10.1109\/IROS.2012.6386109"},{"key":"147_CR13","unstructured":"Kang B Y, Jie Z Q, Feng J S. Policy optimization with demonstrations. In: Proceedings of International Conference on Machine Learning. 2018, 2469\u20132478"},{"key":"147_CR14","unstructured":"Lillicrap T P, Hunt J J, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D. Continuous control with deep reinforcement learning. In: Proceedings of the 4th International Conference on Learning Representations. 2016"},{"key":"147_CR15","unstructured":"Haarnoja T, Zhou A, Hartikainen K, Tucker G, Ha S, Tan J, Kumar V, Zhu H, Gupta A, Abbeel P, Levine S. Soft actor-critic algorithms and applications. 2018, arXiv preprint arXiv:1812.05905"},{"key":"147_CR16","unstructured":"Ng A Y, Harada D, Russell S. Policy invariance under reward transformations: theory and application to reward shaping. In: Proceedings of the 16th International Conference on Machine Learning. 1999, 278\u2013287"},{"key":"147_CR17","unstructured":"Brys T, Harutyunyan A, Suay H B, Chernova S, Taylor M E, Now\u00e9 A. Reinforcement learning from demonstration through shaping. In: Proceedings of the 32nd International Joint Conferences on Artificial Intelligence. 2015, 3352\u20133358"},{"key":"147_CR18","doi-asserted-by":"crossref","unstructured":"Jing M X, Ma X J, Huang W B, Sun F C, Yang C, Fang B, Liu H P. Reinforcement learning from imperfect demonstrations under soft expert guidance. In: Proceedings of the 33rd AAAI Conference on Artificial Intelligence. 2020, 5109\u20135116","DOI":"10.1609\/aaai.v34i04.5953"},{"key":"147_CR19","unstructured":"Schulman J, Levine S, Abbeel P, Jordan M I, Moritz P. Trust region policy optimization. In: Proceedings of International Conference on Machine Learning. 2015, 1889\u20131897"},{"key":"147_CR20","doi-asserted-by":"crossref","unstructured":"Abbeel P, Ng A Y. Apprenticeship learning via inverse reinforcement learning. In: Proceedings of the 21st International Conference on Machine Learning. 2004","DOI":"10.1145\/1015330.1015430"},{"key":"147_CR21","unstructured":"Ng A Y, Russell S J. Algorithms for inverse reinforcement learning. In: Proceedings of the 17th International Conference on Machine Learning. 2000, 663\u2013670"},{"key":"147_CR22","unstructured":"Li Y Z, Song J M, Ermon S. InfoGAIL: interpretable imitation learning from visual demonstrations. In: Proceedings of the 31st International Conference on Neural Information Processing Systems. 2017, 3815\u20133825"},{"key":"147_CR23","unstructured":"Shiarlis K, Messias J, Whiteson S. Inverse reinforcement learning from failure. In: Proceedings of the 2016 International Conference on Autonomous Agents & Multiagent Systems. 2016, 1060\u20131068"},{"key":"147_CR24","unstructured":"Brown D S, Goo W, Nagarajan P, Niekum S. Extrapolating beyond suboptimal demonstrations via inverse reinforcement learning from observations. In: Proceedings of the International Conference on Machine Learning. 2019, 783\u2013792"},{"key":"147_CR25","unstructured":"Nagarajan P. Inverse reinforcement learning via ranked and failed demonstrations. 2016"},{"key":"147_CR26","unstructured":"Oh J, Guo Y J, Singh S, Lee H. Self-imitation learning. In: Proceedings of the 35th International Conference on Machine Learning. 2018, 3878\u20133887"},{"key":"147_CR27","unstructured":"Wu Y H, Charoenphakdee N, Bao H, Tangkaratt V, Sugiyama M. Imitation learning from imperfect demonstration. In: Proceedings of International Conference on Machine Learning. 2019, 6818\u20136827"},{"key":"147_CR28","unstructured":"Sun W, Bagnell J A, Boots B. Truncated horizon policy search: combining reinforcement learning & imitation learning. In: Proceedings of the 7th International Conference on Learning Representations. 2018"},{"key":"147_CR29","unstructured":"Vecer\u00edk M, Hester T, Scholz J, Wang F M, Pietquin O, Piot B, Heess N, Roth\u00f6rl T, Lampe T, Riedmiller M A. Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards. 2017, arXiv preprint arXiv:1707.08817"},{"key":"147_CR30","unstructured":"Gao Y, Xu H, Lin J, Yu F, Levine S, Darrell T. Reinforcement learning from imperfect demonstrations. 2018, arXiv preprint arXiv:1802.05313"},{"key":"147_CR31","unstructured":"Silver D, Lever G, Heess N, Degris T, Wierstra D, Riedmiller M. Deterministic policy gradient algorithms. In: Proceedings of the 31st International Conference on Machine Learning. 2014, 387\u2013395"},{"key":"147_CR32","unstructured":"Sutton R S, McAllester D A, Singh S, Mansour Y. Policy gradient methods for reinforcement learning with function approximation. In: Proceedings of the 12th International Conference on Neural Information Processing Systems. 1999, 1057\u20131063"},{"key":"147_CR33","unstructured":"Munos R, Stepleton T, Harutyunyan A, Bellemare M. Safe and efficient off-policy reinforcement learning. In: Proceedings of the 30th International Conference on Neural Information Processing Systems. 2016, 1054\u20131062"},{"key":"147_CR34","unstructured":"Kakade S, Langford J. Approximately optimal approximate reinforcement learning. In: Proceedings of the 19th International Conference on Machine Learning. 2002, 267\u2013274"},{"key":"147_CR35","unstructured":"Hasselt H. Double Q-learning. In: Proceedings of the 23rd International Conference on Neural Information Processing Systems. 2010, 2613\u20132621"},{"key":"147_CR36","unstructured":"Brockman G, Cheung V, Pettersson L, Schneider J, Schulman J, Tang J, Zaremba W. Openai gym. 2016, arXiv preprint arXiv:1606.01540"}],"container-title":["Frontiers of Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-021-0147-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11704-021-0147-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-021-0147-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,3,22]],"date-time":"2023-03-22T22:14:04Z","timestamp":1679523244000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11704-021-0147-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10,30]]},"references-count":36,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2022,2]]}},"alternative-id":["147"],"URL":"https:\/\/doi.org\/10.1007\/s11704-021-0147-9","relation":{},"ISSN":["2095-2228","2095-2236"],"issn-type":[{"value":"2095-2228","type":"print"},{"value":"2095-2236","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,10,30]]},"assertion":[{"value":"10 April 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 February 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 October 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"161312"}}