{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,16]],"date-time":"2026-05-16T00:57:15Z","timestamp":1778893035461,"version":"3.51.4"},"reference-count":26,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2022,6,20]],"date-time":"2022-06-20T00:00:00Z","timestamp":1655683200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,6,20]],"date-time":"2022-06-20T00:00:00Z","timestamp":1655683200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Evol. Intel."],"published-print":{"date-parts":[[2024,2]]},"DOI":"10.1007\/s12065-022-00703-4","type":"journal-article","created":{"date-parts":[[2022,6,20]],"date-time":"2022-06-20T02:02:41Z","timestamp":1655690561000},"page":"267-273","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["Centralized reinforcement learning for multi-agent cooperative environments"],"prefix":"10.1007","volume":"17","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9091-7952","authenticated-orcid":false,"given":"Chengxuan","family":"Lu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qihao","family":"Bao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shaojie","family":"Xia","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chongxiao","family":"Qu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,6,20]]},"reference":[{"issue":"7540","key":"703_CR1","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"issue":"7782","key":"703_CR2","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","volume":"575","author":"O Vinyals","year":"2019","unstructured":"Vinyals O, Babuschkin I, Czarnecki WM et al (2019) Grandmaster level in StarCraft II using multi-agent reinforcement learning. Nature 575(7782):350\u2013354","journal-title":"Nature"},{"issue":"1","key":"703_CR3","first-page":"1334","volume":"17","author":"S Levine","year":"2016","unstructured":"Levine S, Finn C, Darrell T et al (2016) End-to-end training of deep visuomotor policies. J Mach Learn Res 17(1):1334\u20131373","journal-title":"J Mach Learn Res"},{"key":"703_CR4","doi-asserted-by":"publisher","first-page":"289","DOI":"10.1613\/jair.2447","volume":"32","author":"FA Oliehoek","year":"2008","unstructured":"Oliehoek FA, Spaan MTJ, Vlassis N (2008) Optimal and approximate Q-value functions for decentralized POMDPs. J Artif Intell Res 32:289\u2013353","journal-title":"J Artif Intell Res"},{"key":"703_CR5","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1016\/j.neucom.2016.01.031","volume":"190","author":"L Kraemer","year":"2016","unstructured":"Kraemer L, Banerjee B (2016) Multi-agent reinforcement learning as a rehearsal for decentralized planning. Neurocomputing 190:82\u201394","journal-title":"Neurocomputing"},{"key":"703_CR6","doi-asserted-by":"crossref","unstructured":"Tavakoli A, Pardo F, Kormushev P 2018 Action branching architectures for deep reinforcement learning. In: Proceedings of the 32nd AAAI conference on artificial intelligence (AAAI 2018)","DOI":"10.1609\/aaai.v32i1.11798"},{"key":"703_CR7","doi-asserted-by":"crossref","unstructured":"Tan M (1993) Multi-agent reinforcement learning: independent vs. cooperative agents. In: Proceedings of the tenth international conference on machine learning, pp 330\u2013337","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"key":"703_CR8","unstructured":"Vaswani A, Shazeer N, Parmar N, et al (2017) Attention is all you need. In: Advances in neural information processing systems, pp 5998\u20136008"},{"key":"703_CR9","unstructured":"Devlin J, Chang M W, Lee K, et al (2018) Bert: Pre-training of deep bidirectional transformers for language understanding. https:\/\/arxiv.org\/abs\/1810.04805"},{"key":"703_CR10","unstructured":"Brown T B, Mann B, Ryder N, et al (2020) Language models are few-shot learners. https:\/\/arxiv.org\/abs\/2005.14165"},{"key":"703_CR11","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, et al (2020) An image is worth 16x16 words: Transformers for image recognition at scale. https:\/\/arxiv.org\/abs\/2010.11929"},{"issue":"1","key":"703_CR12","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3158369","volume":"52","author":"S Zhang","year":"2019","unstructured":"Zhang S, Yao L, Sun A et al (2019) Deep learning based recommender system: a survey and new perspectives. ACM Comput Surv (CSUR) 52(1):1\u201338","journal-title":"ACM Comput Surv (CSUR)"},{"key":"703_CR13","unstructured":"Bahdanau D, Cho K, Bengio Y (2015) Neural machine translation by jointly learning to align and translate. In: 3rd International conference on learning representations, ICLR 2015"},{"key":"703_CR14","unstructured":"Sunehag P, Lever G, Gruslys A, et al Value-decomposition networks for cooperative multi-agent learning based on team reward. In: AAMAS. 2018: 2085\u20132087"},{"key":"703_CR15","unstructured":"Lowe R, Wu Y I, Tamar A, et al (2017) Multi-agent actor-critic for mixed cooperative-competitive environments. In: Advances in neural information processing systems, pp 6379\u20136390"},{"key":"703_CR16","unstructured":"illicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2016) Continuous control with deep reinforcement learning. In: International conference on learning representations"},{"key":"703_CR17","unstructured":"Iqbal S, Sha F (2019) Actor-attention-critic for multi-agent reinforcement learning. In: International conference on machine learning. PMLR, pp 2961\u20132970"},{"key":"703_CR18","unstructured":"Jiang J, Lu Z (2018) Learning attentional communication for multi-agent cooperation. In: Advances in neural information processing systems, pp 7254\u20137264"},{"key":"703_CR19","unstructured":"Khan A, Zhang C, Lee D D, et al (2018) Scalable centralized deep multi-agent reinforcement learning via policy gradients. https:\/\/arxiv.org\/abs\/1805.08776"},{"key":"703_CR20","unstructured":"Sutton R S, McAllester D A, Singh S P, et al (2000) Policy gradient methods for reinforcement learning with function approximation. In: Advances in neural information processing systems, pp 1057\u20131063"},{"key":"703_CR21","unstructured":"Schulman J, Levine S, Abbeel P, et al (2015) Trust region policy optimization. In: International conference on machine learning, pp 1889\u20131897"},{"key":"703_CR22","unstructured":"Schulman J, Wolski F, Dhariwal P, et al (2017) Proximal policy optimization algorithms. https:\/\/arxiv.org\/abs\/1707.06347"},{"key":"703_CR23","unstructured":"Schulman J, Moritz P, Levine S, et al (2015) High-dimensional continuous control using generalized advantage estimation. https:\/\/arxiv.org\/abs\/1506.02438"},{"issue":"8","key":"703_CR24","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neural Comput 9(8):1735\u20131780","journal-title":"Neural Comput"},{"key":"703_CR25","doi-asserted-by":"crossref","unstructured":"Tang Y, Agrawal S (2020) Discretizing continuous action space for on-policy optimization. In: Proceedings of the AAAI conference on artificial intelligence, vol 34, no (04), pp 5981\u20135988","DOI":"10.1609\/aaai.v34i04.6059"},{"key":"703_CR26","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: an introduction. MIT press, Cambridge"}],"container-title":["Evolutionary Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12065-022-00703-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s12065-022-00703-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12065-022-00703-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,19]],"date-time":"2024-02-19T19:22:42Z","timestamp":1708370562000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s12065-022-00703-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,20]]},"references-count":26,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2024,2]]}},"alternative-id":["703"],"URL":"https:\/\/doi.org\/10.1007\/s12065-022-00703-4","relation":{},"ISSN":["1864-5909","1864-5917"],"issn-type":[{"value":"1864-5909","type":"print"},{"value":"1864-5917","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,6,20]]},"assertion":[{"value":"9 May 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 January 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 February 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 June 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}