{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,18]],"date-time":"2026-01-18T06:32:09Z","timestamp":1768717929450,"version":"3.49.0"},"reference-count":50,"publisher":"Springer Science and Business Media LLC","issue":"27","license":[{"start":{"date-parts":[[2023,7,10]],"date-time":"2023-07-10T00:00:00Z","timestamp":1688947200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,7,10]],"date-time":"2023-07-10T00:00:00Z","timestamp":1688947200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2023,9]]},"DOI":"10.1007\/s00521-023-08785-6","type":"journal-article","created":{"date-parts":[[2023,7,10]],"date-time":"2023-07-10T17:01:43Z","timestamp":1689008503000},"page":"19847-19863","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Expert demonstrations guide reward decomposition for multi-agent cooperation"],"prefix":"10.1007","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2496-7748","authenticated-orcid":false,"given":"Liu","family":"Weiwei","sequence":"first","affiliation":[]},{"given":"Jing","family":"Wei","sequence":"additional","affiliation":[]},{"given":"Liu","family":"Shanqi","sequence":"additional","affiliation":[]},{"given":"Ruan","family":"Yudi","sequence":"additional","affiliation":[]},{"given":"Zhang","family":"Kexin","sequence":"additional","affiliation":[]},{"given":"Yang","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Liu","family":"Yong","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,7,10]]},"reference":[{"key":"8785_CR1","unstructured":"Ferber J, Weiss G (1999) Multi-agent systems: an introduction to distributed artificial intelligence vol. 1. Addison-Wesley Reading, ???"},{"issue":"2","key":"8785_CR2","doi-asserted-by":"publisher","first-page":"1125","DOI":"10.1109\/LRA.2019.2894217","volume":"4","author":"W H\u00f6nig","year":"2019","unstructured":"H\u00f6nig W, Kiesel S, Tinka A, Durham JW, Ayanian N (2019) Persistent and robust execution of mapf schedules in warehouses. IEEE Robot Autom Lett 4(2):1125\u20131131","journal-title":"IEEE Robot Autom Lett"},{"issue":"1","key":"8785_CR3","doi-asserted-by":"publisher","first-page":"6","DOI":"10.3390\/machines5010006","volume":"5","author":"SD Pendleton","year":"2017","unstructured":"Pendleton SD, Andersen H, Du X, Shen X, Meghjani M, Eng YH, Rus D, Ang MH (2017) Perception, planning, control, and coordination for autonomous vehicles. Machines 5(1):6","journal-title":"Machines"},{"key":"8785_CR4","unstructured":"Sutton RS (1984) Temporal credit assignment in reinforcement learning. PhD thesis, University of Massachusetts Amherst"},{"key":"8785_CR5","unstructured":"Lowe R, Wu Y, Tamar A, Harb J, Abbeel P, Mordatch I (2017) Multi-agent actor-critic for mixed cooperative-competitive environments. Neural Inform Process Syst (NIPS)"},{"key":"8785_CR6","unstructured":"Sunehag P, Lever G, Gruslys A, Czarnecki WM, Zambaldi V, Jaderberg M, Lanctot M, Sonnerat N, Leibo JZ, Tuyls K (2018) Value-decomposition networks for cooperative multi-agent learning based on team reward. In: Proceedings of the 17th international conference on autonomous agents and multiagent systems (AAMAS\u201918), vol. 3, pp. 2085\u20132087. Assoc Computing Machinery"},{"key":"8785_CR7","unstructured":"Rashid T, Samvelyan M, Schroeder C, Farquhar G, Foerster J, Whiteson S (2018) Qmix: Monotonic value function factorisation for deep multi-agent reinforcement learning. In: International Conference on Machine Learning, pp. 4295\u20134304. PMLR"},{"key":"8785_CR8","unstructured":"Dou Z, Kuba JG, Yang Y (2022) Understanding value decomposition algorithms in deep cooperative multi-agent reinforcement learning. arXiv preprint arXiv:2202.04868"},{"key":"8785_CR9","doi-asserted-by":"crossref","unstructured":"Gronauer S, Diepold K (2022) Multi-agent deep reinforcement learning: a survey. Artif Intell Rev, 1\u201349","DOI":"10.1007\/s10462-021-09996-w"},{"issue":"4","key":"8785_CR10","doi-asserted-by":"publisher","first-page":"311","DOI":"10.3233\/AIC-2010-0476","volume":"23","author":"Y-M De Hauwere","year":"2010","unstructured":"De Hauwere Y-M, Vrancx P, Now\u00e9 A (2010) Generalized learning automata for multi-agent reinforcement learning. AI Commun 23(4):311\u2013324","journal-title":"AI Commun"},{"key":"8785_CR11","unstructured":"Ng AY, Russell S (2000) Algorithms for inverse reinforcement learning. In: Icml, vol. 1, p. 2"},{"key":"8785_CR12","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2021.103500","volume":"297","author":"S Arora","year":"2021","unstructured":"Arora S, Doshi P (2021) A survey of inverse reinforcement learning: challenges, methods and progress. Artif Intell 297:103500","journal-title":"Artif Intell"},{"issue":"6","key":"8785_CR13","first-page":"792","volume":"46","author":"ML Puterman","year":"1995","unstructured":"Puterman ML (1995) Markov decision processes: discrete stochastic dynamic programming. J Oper Res Soc 46(6):792\u2013792","journal-title":"J Oper Res Soc"},{"key":"8785_CR14","doi-asserted-by":"crossref","unstructured":"Tan M (1993) Multi-agent reinforcement learning: independent vs. cooperative agents. In: Proceedings of the 10th international conference on machine learning, pp. 330\u2013337","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"issue":"4","key":"8785_CR15","doi-asserted-by":"publisher","first-page":"0172395","DOI":"10.1371\/journal.pone.0172395","volume":"12","author":"A Tampuu","year":"2017","unstructured":"Tampuu A, Matiisen T, Kodelja D, Kuzovkin I, Korjus K, Aru J, Aru J, Vicente R (2017) Multiagent cooperation and competition with deep reinforcement learning. PLoS ONE 12(4):0172395","journal-title":"PLoS ONE"},{"key":"8785_CR16","doi-asserted-by":"crossref","unstructured":"Lee KM, Subramanian SG, Crowley M (2021) Investigation of independent reinforcement learning algorithms in multi-agent environments. arXiv preprint arXiv:2111.01100","DOI":"10.3389\/frai.2022.805823"},{"issue":"11","key":"8785_CR17","doi-asserted-by":"publisher","first-page":"4948","DOI":"10.3390\/app11114948","volume":"11","author":"L Canese","year":"2021","unstructured":"Canese L, Cardarilli GC, Di Nunzio L, Fazzolari R, Giardino D, Re M, Span\u00f2 S (2021) Multi-agent reinforcement learning: a review of challenges and applications. Appl Sci 11(11):4948","journal-title":"Appl Sci"},{"key":"8785_CR18","doi-asserted-by":"crossref","unstructured":"Zhang K, Yang Z, Ba\u015far T (2021) Multi-agent reinforcement learning: a selective overview of theories and algorithms. Handbook of Reinforcement Learning and Control, 321\u2013384","DOI":"10.1007\/978-3-030-60990-0_12"},{"issue":"5","key":"8785_CR19","doi-asserted-by":"publisher","first-page":"3215","DOI":"10.1007\/s10462-020-09938-y","volume":"54","author":"W Du","year":"2021","unstructured":"Du W, Ding S (2021) A survey on multi-agent deep reinforcement learning: from the perspective of challenges and applications. Artif Intell Rev 54(5):3215\u20133238","journal-title":"Artif Intell Rev"},{"issue":"746\u2013752","key":"8785_CR20","first-page":"2","volume":"1998","author":"C Claus","year":"1998","unstructured":"Claus C, Boutilier C (1998) The dynamics of reinforcement learning in cooperative multiagent systems. AAAI\/IAAI 1998(746\u2013752):2","journal-title":"AAAI\/IAAI"},{"key":"8785_CR21","unstructured":"Mahajan A, Rashid T, Samvelyan M, Whiteson S (2019) Maven: multi-agent variational exploration. In: Proceedings of the 33rd international conference on neural information processing systems, pp. 7613\u20137624"},{"key":"8785_CR22","unstructured":"Son K, Kim D, Kang WJ, Hostallero DE, Yi Y (2019) Qtran: learning to factorize with transformation for cooperative multi-agent reinforcement learning. In: International conference on machine learning, pp. 5887\u20135896. PMLR"},{"key":"8785_CR23","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2016) Continuous control with deep reinforcement learning. In: ICLR (Poster)"},{"key":"8785_CR24","doi-asserted-by":"crossref","unstructured":"Foerster J, Farquhar G, Afouras T, Nardelli N, Whiteson S (2018) Counterfactual multi-agent policy gradients. In: Proceedings of the AAAI conference on artificial intelligence, vol. 32","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"8785_CR25","unstructured":"Chung J, Gulcehre C, Cho K, Bengio Y (2014) Empirical evaluation of gated recurrent neural networks on sequence modeling. In: NIPS 2014 workshop on deep learning, December 2014"},{"key":"8785_CR26","unstructured":"Yu C, Velu A, Vinitsky E, Wang Y, Bayen A, Wu Y (2021) The surprising effectiveness of mappo in cooperative, multi-agent games. arXiv preprint arXiv:2103.01955"},{"key":"8785_CR27","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O (2017) Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347"},{"key":"8785_CR28","doi-asserted-by":"crossref","unstructured":"Littman ML (1994) Markov games as a framework for multi-agent reinforcement learning. In: Machine learning proceedings 1994, pp. 157\u2013163. Elsevier, ???","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"8785_CR29","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"LP Kaelbling","year":"1996","unstructured":"Kaelbling LP, Littman ML, Moore AW (1996) Reinforcement learning: a survey. J Artif Intell Res 4:237\u2013285","journal-title":"J Artif Intell Res"},{"key":"8785_CR30","unstructured":"Hadfield-Menell D, Russell SJ, Abbeel P, Dragan A (2016) Cooperative inverse reinforcement learning. Adv Neural Inform Process Syst, 29"},{"key":"8785_CR31","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.robot.2019.01.003","volume":"114","author":"C You","year":"2019","unstructured":"You C, Lu J, Filev D, Tsiotras P (2019) Advanced planning for autonomous vehicles using reinforcement learning and deep inverse reinforcement learning. Robot Auton Syst 114:1\u201318","journal-title":"Robot Auton Syst"},{"key":"8785_CR32","unstructured":"Wu P, Jia X, Chen L, Yan J, Li H, Qiao Y (2022) Trajectory-guided control prediction for end-to-end autonomous driving: a simple yet strong baseline. arXiv preprint arXiv:2206.08129"},{"key":"8785_CR33","doi-asserted-by":"crossref","unstructured":"Baumberg A (2000) Reliable feature matching across widely separated views. In: Proceedings IEEE conference on computer vision and pattern recognition. CVPR 2000 (Cat. No. PR00662), vol. 1, pp. 774\u2013781. IEEE","DOI":"10.1109\/CVPR.2000.855899"},{"key":"8785_CR34","doi-asserted-by":"crossref","unstructured":"Abbeel P, Ng AY (2004) Apprenticeship learning via inverse reinforcement learning. In: Proceedings of the 21st international conference on machine learning, p. 1","DOI":"10.1145\/1015330.1015430"},{"key":"8785_CR35","doi-asserted-by":"crossref","unstructured":"Ratliff ND, Bagnell JA, Zinkevich MA (2006) Maximum margin planning. In: Proceedings of the 23rd International Conference on Machine Learning, pp. 729\u2013736","DOI":"10.1145\/1143844.1143936"},{"key":"8785_CR36","unstructured":"Herman M, Gindele T, Wagner J, Schmitt F, Burgard W (2016) Inverse reinforcement learning with simultaneous estimation of rewards and dynamics. In: Artificial intelligence and statistics, pp. 102\u2013110 . PMLR"},{"key":"8785_CR37","unstructured":"Ziebart BD, Maas AL, Bagnell JA, Dey AK (2008) Maximum entropy inverse reinforcement learning. In: Aaai, vol. 8, pp. 1433\u20131438. Chicago, IL, USA"},{"issue":"1","key":"8785_CR38","doi-asserted-by":"publisher","first-page":"42","DOI":"10.1007\/BF03023004","volume":"7","author":"S Guiasu","year":"1985","unstructured":"Guiasu S, Shenitzer A (1985) The principle of maximum entropy. Math Intell 7(1):42\u201348","journal-title":"Math Intell"},{"key":"8785_CR39","doi-asserted-by":"crossref","unstructured":"Bloem M, Bambos N (2014) Infinite time horizon maximum causal entropy inverse reinforcement learning. In: 53rd IEEE conference on decision and control, pp. 4911\u20134916. IEEE","DOI":"10.1109\/CDC.2014.7040156"},{"key":"8785_CR40","first-page":"4565","volume":"29","author":"J Ho","year":"2016","unstructured":"Ho J, Ermon S (2016) Generative adversarial imitation learning. Adv Neural Inf Process Syst 29:4565\u20134573","journal-title":"Adv Neural Inf Process Syst"},{"issue":"11","key":"8785_CR41","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1145\/3422622","volume":"63","author":"I Goodfellow","year":"2020","unstructured":"Goodfellow I, Pouget-Abadie J, Mirza M, Xu B, Warde-Farley D, Ozair S, Courville A, Bengio Y (2020) Generative adversarial networks. Commun ACM 63(11):139\u2013144","journal-title":"Commun ACM"},{"key":"8785_CR42","unstructured":"Song J, Ren H, Sadigh D, Ermon S (2018) Multi-agent generative adversarial imitation learning. arXiv preprint arXiv:1807.09936"},{"key":"8785_CR43","doi-asserted-by":"crossref","unstructured":"Syed U, Bowling M, Schapire RE (2008) Apprenticeship learning using linear programming. In: Proceedings of the 25th international conference on machine learning, pp. 1032\u20131039","DOI":"10.1145\/1390156.1390286"},{"key":"8785_CR44","unstructured":"Samvelyan M, Rashid T, de Witt CS, Farquhar G, Nardelli N, Rudner TGJ, Hung C-M, Torr PHS, Foerster J, Whiteson S (2019) The StarCraft multi-agent challenge. CoRR abs\/1902.04043"},{"key":"8785_CR45","doi-asserted-by":"crossref","unstructured":"Liu S, Wen L, Cui J, Yang X, Cao J, Liu Y (2020) Moving forward in formation: a decentralized hierarchical learning approach to multi-agent moving together. arXiv preprint arXiv:2011.02373","DOI":"10.1109\/IROS51168.2021.9636224"},{"key":"8785_CR46","doi-asserted-by":"crossref","unstructured":"Codevilla F, Santana E, L\u00f3pez AM, Gaidon A (2019) Exploring the limitations of behavior cloning for autonomous driving. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 9329\u20139338","DOI":"10.1109\/ICCV.2019.00942"},{"key":"8785_CR47","unstructured":"de Witt CS, Gupta T, Makoviichuk D, Makoviychuk V, Torr PH, Sun M, Whiteson S (2020) Is independent learning all you need in the starcraft multi-agent challenge? arXiv preprint arXiv:2011.09533"},{"key":"8785_CR48","unstructured":"Wang T, Gupta T, Peng B, Mahajan A, Whiteson S, Zhang C (2021) Rode: learning roles to decompose multi- agent tasks. In: Proceedings of the international conference on learning representations . OpenReview"},{"key":"8785_CR49","doi-asserted-by":"crossref","unstructured":"Barer M, Sharon G, Stern R, Felner A (2014) Suboptimal variants of the conflict-based search algorithm for the multi-agent pathfinding problem. In: Seventh annual symposium on combinatorial search","DOI":"10.1016\/j.artint.2014.11.006"},{"key":"8785_CR50","unstructured":"Van\u00a0der Maaten L, Hinton G (2008) Visualizing data using t-sne. J Mach Learn Res 9(11)"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-023-08785-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-023-08785-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-023-08785-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,23]],"date-time":"2024-10-23T22:37:55Z","timestamp":1729723075000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-023-08785-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,7,10]]},"references-count":50,"journal-issue":{"issue":"27","published-print":{"date-parts":[[2023,9]]}},"alternative-id":["8785"],"URL":"https:\/\/doi.org\/10.1007\/s00521-023-08785-6","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,7,10]]},"assertion":[{"value":"7 December 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 June 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 July 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}