{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T21:44:41Z","timestamp":1769550281574,"version":"3.49.0"},"reference-count":24,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2023,10,17]],"date-time":"2023-10-17T00:00:00Z","timestamp":1697500800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,10,17]],"date-time":"2023-10-17T00:00:00Z","timestamp":1697500800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Artif Life Robotics"],"published-print":{"date-parts":[[2023,11]]},"DOI":"10.1007\/s10015-023-00912-9","type":"journal-article","created":{"date-parts":[[2023,10,17]],"date-time":"2023-10-17T06:02:45Z","timestamp":1697522565000},"page":"703-709","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Developing multi-agent adversarial environment using reinforcement learning and imitation learning"],"prefix":"10.1007","volume":"28","author":[{"given":"Ziyao","family":"Han","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yupeng","family":"Liang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kazuhiro","family":"Ohkura","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,10,17]]},"reference":[{"key":"912_CR1","doi-asserted-by":"crossref","unstructured":"Balaji et al (2010) \u201cAn introduction to multi-agent systems.\u201d Innovations in multi-agent systems and applications-1. Springer, Berlin, Heidelberg. 1\u201327","DOI":"10.1007\/978-3-642-14435-6_1"},{"key":"912_CR2","unstructured":"Jin et al (2003) \u201cTowards the applications of multi-agent techniques in intelligent transportation systems.\u201d Proceedings of the 2003 IEEE International Conference on Intelligent Transportation Systems. Vol. 2. IEEE"},{"issue":"2","key":"912_CR3","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1017\/S0269888905000494","volume":"20","author":"Serugendo","year":"2005","unstructured":"Serugendo et al (2005) Self-organization in multi-agent systems. Knowl Eng Rev 20(2):165\u2013189","journal-title":"Knowl Eng Rev"},{"issue":"2","key":"912_CR4","first-page":"229","volume":"17","author":"Sutton","year":"1999","unstructured":"Sutton et al (1999) Reinforcement learning: an introduction. Robotica 17(2):229\u2013235","journal-title":"Robotica"},{"issue":"1","key":"912_CR5","first-page":"1334","volume":"17","author":"Sergey Levine","year":"2016","unstructured":"Levine Sergey et al (2016) End-to-end training of deep visuomotor policies. J Mach Learn Res 17(1):1334\u20131373","journal-title":"J Mach Learn Res"},{"key":"912_CR6","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1613\/jair.3912","volume":"47","author":"Bellemare","year":"2013","unstructured":"Bellemare et al (2013) The arcade learning environment: an evaluation platform for general agents. J Artif Intell Res 47:253\u2013279","journal-title":"J Artif Intell Res"},{"key":"912_CR7","unstructured":"Steven W (1992) \u201cReinforcement learning for the adaptive control of perception and action\u201d"},{"key":"912_CR8","first-page":"182","volume":"47","author":"W Yang","year":"2020","unstructured":"Yang W et al (2020) Survey on sparse reward in deep reinforcement learning. Comput Sci 47:182\u2013191","journal-title":"Comput Sci"},{"key":"912_CR9","unstructured":"Vecerik M et al (2017) \u201cLeveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards.\u201d arXiv preprint arXiv:1707.08817"},{"key":"912_CR10","unstructured":"Wan L et al (2019) \u201cSurvey on deep reinforcement learning theory and its application.\u201d 32, 67-81. Pattem. Recognit. Aitificial Intell"},{"issue":"54","key":"912_CR11","first-page":"1","volume":"20","author":"H\u00fcttenrauch","year":"2019","unstructured":"H\u00fcttenrauch et al (2019) Deep reinforcement learning for swarm systems. J Mach Learn Res 20(54):1\u201331","journal-title":"J Mach Learn Res"},{"issue":"7676","key":"912_CR12","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver D et al (2017) Mastering the game of go without human knowledge. Nature 550(7676):354\u2013359","journal-title":"Nature"},{"key":"912_CR13","unstructured":"Oriol V et al (2017) \u201cStarcraft ii: A new challenge for reinforcement learning.\u201d arXiv:1708.04782"},{"issue":"7540","key":"912_CR14","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"key":"912_CR15","unstructured":"Tuomas H et al (2018) \u201cSoft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor.\u201d International conference on machine learning. PMLR"},{"key":"912_CR16","unstructured":"John S et al (2017) \u201cProximal policy optimization algorithms.\u201d arXiv preprint arXiv:1707.06347"},{"key":"912_CR17","doi-asserted-by":"publisher","first-page":"750","DOI":"10.1016\/j.ins.2022.07.111","volume":"609","author":"J Zhang","year":"2022","unstructured":"Zhang J et al (2022) Proximal policy optimization via enhanced exploration efficiency. Inform Sci 609:750\u2013765","journal-title":"Inform Sci"},{"issue":"4","key":"912_CR18","doi-asserted-by":"publisher","first-page":"1278","DOI":"10.3390\/s21041278","volume":"21","author":"Jiang Hua","year":"2021","unstructured":"Hua Jiang et al (2021) Learning for a robot: deep reinforcement learning, imitation learning, transfer learning. Sensors 21(4):1278","journal-title":"Sensors"},{"key":"912_CR19","unstructured":"Pierre S et al (2016) \u201cUnsupervised perceptual rewards for imitation learning.\u201d arXiv preprint arXiv:1612.06699"},{"key":"912_CR20","unstructured":"Ng Andrew Y, Russell S (2000) \u201cAlgorithms for inverse reinforcement learning.\u201d Icml. Vol. 1"},{"issue":"1","key":"912_CR21","doi-asserted-by":"publisher","first-page":"88","DOI":"10.1162\/neco.1991.3.1.88","volume":"3","author":"DA Pomerleau","year":"1991","unstructured":"Pomerleau DA (1991) Efficient training of artificial neural networks for autonomous navigation. Neural Comput 3(1):88\u201397","journal-title":"Neural Comput"},{"key":"912_CR22","unstructured":"Vikash K et al (2016) \u201cLearning dexterous manipulation policies from experience and imitation.\u201d arXiv preprint arXiv:1611.05095"},{"key":"912_CR23","unstructured":"Ross St\u00e9phane, Drew Bagnell (2010) \u201cEfficient reductions for imitation learning.\u201d Proceedings of the thirteenth international conference on artificial intelligence and statistics. JMLR Workshop and Conference Proceedings"},{"key":"912_CR24","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1016\/j.compedu.2016.03.017","volume":"98","author":"Radek Pel\u00e1nek","year":"2016","unstructured":"Pel\u00e1nek Radek (2016) Applications of the Elo rating system in adaptive educational systems. Comput Educ 98:169\u2013179","journal-title":"Comput Educ"}],"container-title":["Artificial Life and Robotics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10015-023-00912-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10015-023-00912-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10015-023-00912-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,4]],"date-time":"2023-11-04T08:09:02Z","timestamp":1699085342000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10015-023-00912-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,17]]},"references-count":24,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2023,11]]}},"alternative-id":["912"],"URL":"https:\/\/doi.org\/10.1007\/s10015-023-00912-9","relation":{},"ISSN":["1433-5298","1614-7456"],"issn-type":[{"value":"1433-5298","type":"print"},{"value":"1614-7456","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,10,17]]},"assertion":[{"value":"16 May 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 September 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 October 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}