{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T02:31:05Z","timestamp":1780626665646,"version":"3.54.1"},"reference-count":66,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2020,3,25]],"date-time":"2020-03-25T00:00:00Z","timestamp":1585094400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,3,25]],"date-time":"2020-03-25T00:00:00Z","timestamp":1585094400000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"the National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["No.61572044"],"award-info":[{"award-number":["No.61572044"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"the National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["No. 61872397"],"award-info":[{"award-number":["No. 61872397"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Auton Agent Multi-Agent Syst"],"published-print":{"date-parts":[[2020,4]]},"DOI":"10.1007\/s10458-020-09455-w","type":"journal-article","created":{"date-parts":[[2020,3,25]],"date-time":"2020-03-25T10:02:35Z","timestamp":1585130555000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":37,"title":["Learning multi-agent communication with double attentional deep reinforcement learning"],"prefix":"10.1007","volume":"34","author":[{"given":"Hangyu","family":"Mao","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhengchao","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhen","family":"Xiao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhibo","family":"Gong","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yan","family":"Ni","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2020,3,25]]},"reference":[{"key":"9455_CR1","volume-title":"Introduction to reinforcement learning","author":"RS Sutton","year":"1998","unstructured":"Sutton, R. S., & Barto, A. G. (1998). Introduction to reinforcement learning (Vol. 135). Cambridge: MIT Press."},{"key":"9455_CR2","doi-asserted-by":"crossref","unstructured":"Tan, M. (1993). Multi-agent reinforcement learning: Independent versus cooperative agents. In Proceedings of the tenth international conference on machine learning (pp. 330\u2013337).","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"issue":"2","key":"9455_CR3","doi-asserted-by":"publisher","first-page":"487","DOI":"10.1016\/j.artint.2010.09.008","volume":"175","author":"F Wu","year":"2011","unstructured":"Wu, F., Zilberstein, S., & Chen, X. (2011). Online planning for multi-agent systems with bounded communication. Artificial Intelligence, 175(2), 487\u2013511.","journal-title":"Artificial Intelligence"},{"key":"9455_CR4","unstructured":"Zhang, C., & Lesser, V. (2013). Coordinating multi-agent reinforcement learning with limited communication. In Proceedings of the 2013 international conference on Autonomous agents and multi-agent systems, international foundation for autonomous agents and multiagent systems (pp. 1101\u20131108)."},{"key":"9455_CR5","doi-asserted-by":"crossref","unstructured":"Roth, M., Simmons, R., & Veloso, M. (2005). Reasoning about joint beliefs for execution-time communication decisions. In Proceedings of the fourth international joint conference on autonomous agents and multiagent systems, ACM (pp. 786\u2013793).","DOI":"10.1145\/1082473.1082593"},{"key":"9455_CR6","unstructured":"Roth, M., Simmons, R., & Veloso, M. (2006). What to communicate? Execution-time decision in multi-agent pomdps. In Distributed autonomous robotic systems (Vol. 7, pp. 177\u2013186). Berlin: Springer."},{"key":"9455_CR7","unstructured":"Sukhbaatar, S., Fergus, R., et al. (2016). Learning multiagent communication with backpropagation. In Advances in neural information processing systems (pp. 2244\u20132252)."},{"key":"9455_CR8","unstructured":"Foerster, J., Assael, Y. M., de Freitas, N., & Whiteson, S. (2016). Learning to communicate with deep multi-agent reinforcement learning. In Advances in neural information processing systems (pp 2137\u20132145)."},{"key":"9455_CR9","unstructured":"Peng, P., Yuan, Q., Wen, Y., Yang, Y., Tang, Z., Long, H., & Wang, J. (2017). Multiagent bidirectionally-coordinated nets for learning to play starcraft combat games. arXiv preprint arXiv:170310069."},{"key":"9455_CR10","unstructured":"Mao, H., Gong, Z., Ni, Y., & Xiao, Z. (2017). Accnet: Actor-coordinator-critic net for \u201clearning-to-communicate\u201d with deep multi-agent reinforcement learning. arXiv preprint arXiv:170603235."},{"key":"9455_CR11","unstructured":"Kong, X., Xin, B., Liu, F., & Wang, Y. (2017). Revisiting the master-slave architecture in multi-agent deep reinforcement learning. arXiv preprint arXiv:171207305."},{"key":"9455_CR12","unstructured":"Kilinc, O., & Montana, G. (2019). Multi-agent deep reinforcement learning with extremely noisy observations. In International conference on learning representations."},{"key":"9455_CR13","unstructured":"Kim, D., Moon, S., Hostallero, D., Kang, W. J., Lee, T., Son, K., & Yi, Y. (2019). Learning to schedule communication in multi-agent reinforcement learning. In International conference on learning representations. https:\/\/openreview.net\/forum?id=SJxu5iR9KQ."},{"key":"9455_CR14","unstructured":"Singh, A., Jain, T., & Sukhbaatar, S. (2019). Individualized controlled continuous communication model for multiagent cooperative and competitive tasks. In International conference on learning representations. https:\/\/openreview.net\/forum?id=rye7knCqK7."},{"key":"9455_CR15","unstructured":"Kim, W., Cho, M., & Sung, Y. (2019). Message-dropout: An efficient training method for multi-agent deep reinforcement learning. arXiv preprint arXiv:190206527."},{"key":"9455_CR16","unstructured":"Mao, H., Gong, Z., Zhang, Z., Xiao, Z., & Ni, Y. (2019). Learning multi-agent communication under limited-bandwidth restriction for internet packet routing. arXiv preprint arXiv:190305561."},{"key":"9455_CR17","doi-asserted-by":"crossref","unstructured":"Mao, H., Zhang, Z., Xiao, Z., Gong, Z., & Ni, Y. (2020). Learning agent communication under limited bandwidth by message pruning. In AAAI 2020.","DOI":"10.1609\/aaai.v34i04.5957"},{"key":"9455_CR18","unstructured":"Mao, H., Zhang, Z., Xiao, Z., & Gong, Z. (2019). Modelling the dynamic joint policy of teammates with attention multi-agent DDPG. In Proceedings of the 18th international joint conference on autonomous agents and multiagent systems, ACM."},{"issue":"4","key":"9455_CR19","doi-asserted-by":"publisher","first-page":"819","DOI":"10.1287\/moor.27.4.819.297","volume":"27","author":"DS Bernstein","year":"2002","unstructured":"Bernstein, D. S., Givan, R., Immerman, N., & Zilberstein, S. (2002). The complexity of decentralized control of MDP. Mathematics of Operations Research, 27(4), 819\u2013840.","journal-title":"Mathematics of Operations Research"},{"issue":"7540","key":"9455_CR20","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A. A., Veness, J., Bellemare, M. G., et al. (2015). Human-level control through deep reinforcement learning. Nature, 518(7540), 529\u2013533.","journal-title":"Nature"},{"key":"9455_CR21","unstructured":"Konda, V. R., & Tsitsiklis, J. N. (2000). Actor-critic algorithms. In Advances in neural information processing systems (pp. 1008\u20131014)."},{"issue":"4","key":"9455_CR22","doi-asserted-by":"publisher","first-page":"1143","DOI":"10.1137\/S0363012901385691","volume":"42","author":"VR Konda","year":"2003","unstructured":"Konda, V. R., & Tsitsiklis, J. N. (2003). On actor-critic algorithms. SIAM Journal on Control and Optimization, 42(4), 1143\u20131166.","journal-title":"SIAM Journal on Control and Optimization"},{"issue":"6","key":"9455_CR23","doi-asserted-by":"publisher","first-page":"1291","DOI":"10.1109\/TSMCC.2012.2218595","volume":"42","author":"I Grondman","year":"2012","unstructured":"Grondman, I., Busoniu, L., Lopes, G. A., & Babuska, R. (2012). A survey of actor-critic reinforcement learning: Standard and natural policy gradients. IEEE Transactions on Systems, Man, and Cybernetics, Part C (Applications and Reviews), 42(6), 1291\u20131307.","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics, Part C (Applications and Reviews)"},{"key":"9455_CR24","unstructured":"Silver, D., Lever, G., Heess, N., Degris, T., Wierstra, D., & Riedmiller, M. (2014). Deterministic policy gradient algorithms. In ICML."},{"key":"9455_CR25","unstructured":"Lillicrap, T. P., Hunt, J. J., Pritzel, A., Heess, N., Erez, T., Tassa, Y., Silver, D., & Wierstra, D. (2015). Continuous control with deep reinforcement learning. arXiv preprint arXiv:150902971."},{"key":"9455_CR26","unstructured":"Mnih, V., Heess, N., Graves, A., et al. (2014). Recurrent models of visual attention. In Advances in neural information processing systems (pp. 2204\u20132212)."},{"key":"9455_CR27","unstructured":"Cho, K., Van Merri\u00ebnboer, B., Gulcehre, C., Bahdanau, D., Bougares, F., Schwenk, H., & Bengio, Y. (2014). Learning phrase representations using rnn encoder-decoder for statistical machine translation. arXiv preprint arXiv:14061078."},{"key":"9455_CR28","unstructured":"Xu, K., Ba, J., Kiros, R., Cho, K., Courville, A., Salakhudinov, R., Zemel, R., & Bengio, Y. (2015). Show, attend and tell: Neural image caption generation with visual attention. In International conference on machine learning (pp. 2048\u20132057)."},{"key":"9455_CR29","unstructured":"Luong, M. T., Pham, H., & Manning, C. D. (2015). Effective approaches to attention-based neural machine translation. arXiv preprint arXiv:150804025."},{"key":"9455_CR30","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., & Polosukhin, I. (2017). Attention is all you need. In Advances in neural information processing systems (pp. 5998\u20136008)."},{"key":"9455_CR31","doi-asserted-by":"publisher","first-page":"389","DOI":"10.1613\/jair.1024","volume":"16","author":"DV Pynadath","year":"2002","unstructured":"Pynadath, D. V., & Tambe, M. (2002). The communicative multiagent team decision problem: Analyzing teamwork theories and models. Journal of Artificial Intelligence Research, 16, 389\u2013423.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"9455_CR32","doi-asserted-by":"publisher","first-page":"143","DOI":"10.1613\/jair.1427","volume":"22","author":"CV Goldman","year":"2004","unstructured":"Goldman, C. V., & Zilberstein, S. (2004). Decentralized control of cooperative systems: Categorization and complexity analysis. Journal of Artificial Intelligence Research, 22, 143\u2013174.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"9455_CR33","unstructured":"Lowe, R., Wu, Y., Tamar, A., Harb, J., Abbeel, O. P., & Mordatch, I. (2017). Multi-agent actor-critic for mixed cooperative-competitive environments. In Advances in neural information processing systems (pp. 6379\u20136390)."},{"key":"9455_CR34","unstructured":"Chu, X., & Ye, H. (2017). Parameter sharing deep deterministic policy gradient for cooperative multi-agent reinforcement learning. arXiv preprint arXiv:171000336."},{"key":"9455_CR35","unstructured":"Foerster, J., Farquhar, G., Afouras, T., Nardelli, N., & Whiteson, S. (2017). Counterfactual multi-agent policy gradients. arXiv preprint arXiv:170508926."},{"key":"9455_CR36","doi-asserted-by":"crossref","unstructured":"Peng, Z., Zhang, L., & Luo, T. (2018). Learning to communicate via supervised attentional message processing. In Proceedings of the 31st international conference on computer animation and social agents, ACM (pp. 11\u201316).","DOI":"10.1145\/3205326.3205346"},{"key":"9455_CR37","unstructured":"Jiang, J., & Lu, Z. (2018). Learning attentional communication for multi-agent cooperation. arXiv preprint arXiv:180507733."},{"key":"9455_CR38","unstructured":"Yang, Y., Luo, R., Li, M., Zhou, M., Zhang, W., & Wang, J. (2018). Mean field multi-agent reinforcement learning. arXiv preprint arXiv:180205438."},{"key":"9455_CR39","unstructured":"Sunehag, P., Lever, G., Gruslys, A., Czarnecki, W. M., Zambaldi, V., Jaderberg, M., Lanctot, M., Sonnerat, N., Leibo, J. Z., Tuyls, K. et al. (2017). Value-decomposition networks for cooperative multi-agent learning. arXiv preprint arXiv:170605296."},{"key":"9455_CR40","unstructured":"Rashid, T., Samvelyan, M., de Witt, C. S., Farquhar, G., Foerster, J., Whiteson, S. (2018). Qmix: Monotonic value function factorisation for deep multi-agent reinforcement learning. arXiv preprint arXiv:180311485."},{"key":"9455_CR41","unstructured":"Lazaridou, A., Peysakhovich, A., & Baroni, M. (2016). Multi-agent cooperation and the emergence of (natural) language. arXiv preprint arXiv:161207182."},{"key":"9455_CR42","unstructured":"Mordatch, I., & Abbeel, P. (2017). Emergence of grounded compositional language in multi-agent populations. arXiv preprint arXiv:170304908."},{"key":"9455_CR43","unstructured":"Das, A., Kottur, S., Moura, J. M., Lee, S., & Batra, D. (2017). Learning cooperative visual dialog agents with deep reinforcement learning. arXiv preprint arXiv:170306585."},{"key":"9455_CR44","unstructured":"Havrylov, S., & Titov, I. (2017). Emergence of language with multi-agent games: Learning to communicate with sequences of symbols. arXiv preprint arXiv:170511192."},{"key":"9455_CR45","unstructured":"Hernandez-Leal, P., Kaisers, M., Baarslag, T., & de Cote, E. M. (2017). A survey of learning in multiagent environments: Dealing with non-stationarity. arXiv preprint arXiv:170709183."},{"key":"9455_CR46","unstructured":"Sorokin, I., Seleznev, A., Pavlov, M., Fedorov, A., & Ignateva, A. (2015). Deep attention recurrent q-network. arXiv preprint arXiv:151201693."},{"key":"9455_CR47","unstructured":"Oh, J., Chockalingam, V., Singh, S., & Lee, H. (2016). Control of memory, active perception, and action in minecraft. In Proceedings of The 33rd international conference on machine learning, PMLR, New York, New York, USA, Proceedings of machine learning research (pp. 2790\u20132799)."},{"key":"9455_CR48","unstructured":"Omidshafiei, S., Kim, D. K., Pazis, J., & How, J. P. (2017). Crossmodal attentive skill learner. arXiv preprint arXiv:171110314."},{"key":"9455_CR49","unstructured":"Choi, J., Lee, B. J., & Zhang, B. T. (2017). Multi-focus attention network for efficient deep reinforcement learning. In Workshops at the thirty-first AAAI conference on artificial intelligence."},{"issue":"3","key":"9455_CR50","doi-asserted-by":"publisher","first-page":"294","DOI":"10.3390\/e21030294","volume":"21","author":"M Geng","year":"2019","unstructured":"Geng, M., Xu, K., Zhou, X., Ding, B., Wang, H., & Zhang, L. (2019). Learning to cooperate via an attention-based communication neural network in decentralized multi-robot exploration. Entropy, 21(3), 294.","journal-title":"Entropy"},{"key":"9455_CR51","unstructured":"Iqbal, S., & Sha, F. (2018). Actor-attention-critic for multi-agent reinforcement learning. arXiv preprint arXiv:181002912."},{"issue":"4","key":"9455_CR52","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/BF02551274","volume":"2","author":"G Cybenko","year":"1989","unstructured":"Cybenko, G. (1989). Approximation by superpositions of a sigmoidal function. Mathematics of Control, Signals and Systems, 2(4), 303\u2013314.","journal-title":"Mathematics of Control, Signals and Systems"},{"issue":"5","key":"9455_CR53","doi-asserted-by":"publisher","first-page":"359","DOI":"10.1016\/0893-6080(89)90020-8","volume":"2","author":"K Hornik","year":"1989","unstructured":"Hornik, K., Stinchcombe, M., & White, H. (1989). Multilayer feedforward networks are universal approximators. Neural Networks, 2(5), 359\u2013366.","journal-title":"Neural Networks"},{"key":"9455_CR54","unstructured":"Schaul, T., Horgan, D., Gregor, K., & Silver, D. (2015). Universal value function approximators. In International conference on machine learning (pp. 1312\u20131320)."},{"key":"9455_CR55","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1016\/j.artint.2018.01.002","volume":"258","author":"SV Albrecht","year":"2018","unstructured":"Albrecht, S. V., & Stone, P. (2018). Autonomous agents modelling other agents: A comprehensive survey and open problems. Artificial Intelligence, 258, 66\u201395.","journal-title":"Artificial Intelligence"},{"key":"9455_CR56","unstructured":"He, H., Boyd-Graber, J., Kwok, K., & Daum\u00e9 III, H. (2016). Opponent modeling in deep reinforcement learning. In International conference on machine learning (pp. 1804\u20131813)."},{"key":"9455_CR57","unstructured":"Wang, Z., Schaul, T., Hessel, M., Van Hasselt, H., Lanctot, M., & De Freitas, N. (2016). Dueling network architectures for deep reinforcement learning. In: Proceedings of the 33nd international conference on machine learning, ICML 2016 (pp. 1995\u20132003)."},{"key":"9455_CR58","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1145\/1090191.1080122","volume":"35","author":"S Kandula","year":"2005","unstructured":"Kandula, S., Katabi, D., Davie, B., & Charny, A. (2005). Walking the tightrope: Responsive yet stable traffic engineering. ACM SIGCOMM Computer Communication Review, 35, 253\u2013264.","journal-title":"ACM SIGCOMM Computer Communication Review"},{"key":"9455_CR59","unstructured":"Mataric, M. J. (1994). Reward functions for accelerated learning. In Machine learning proceedings 1994 (pp. 181\u2013189). New York: Elsevier."},{"key":"9455_CR60","unstructured":"Ha, D., & Schmidhuber, J. (2018). World models. arXiv preprint arXiv:180310122."},{"key":"9455_CR61","unstructured":"Chockalingam, V., Sung, T. T. K., Behbahani, F., Gargeya, R., Sivanantham, A., & Malysheva, A. (2018). Extending world models for multi-agent reinforcement learning in malm\u00f6. In Joint Proceedings of the AIIDE 2018 Workshops co-located with 14th AAAI conference on artificial intelligence and interactive digital entertainment (AIIDE 2018). http:\/\/ceur-ws.org\/Vol-2282\/MARLO_110.pdf."},{"key":"9455_CR62","unstructured":"Andreas, J., Dragan, A., & Klein, D. (2017). Translating neuralese. arXiv preprint arXiv:170406960."},{"key":"9455_CR63","unstructured":"Velickovic, P., Cucurull, G., Casanova, A., Romero, A., Lio, P., & Bengio, Y. (2017). Graph attention networks. arXiv preprint arXiv:171010903."},{"key":"9455_CR64","unstructured":"Lee, J. B., Rossi, R. A., Kim, S., Ahmed, N. K., & Koh, E. (2018). Attention models in graphs: A survey. arXiv preprint arXiv:180707984."},{"key":"9455_CR65","unstructured":"Wang, T., Liao, R., Ba, J., & Fidler, S. (2018). Nervenet: Learning structured policy with graph neural networks. In International conference on learning representations. https:\/\/openreview.net\/forum?id=S1sqHMZCb."},{"key":"9455_CR66","unstructured":"Jiang, J., Dun, C., & Lu, Z. (2018). Graph convolutional reinforcement learning for multi-agent cooperation. arXiv preprint arXiv:181009202."}],"container-title":["Autonomous Agents and Multi-Agent Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-020-09455-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10458-020-09455-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-020-09455-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,19]],"date-time":"2022-10-19T18:33:53Z","timestamp":1666204433000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10458-020-09455-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,3,25]]},"references-count":66,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2020,4]]}},"alternative-id":["9455"],"URL":"https:\/\/doi.org\/10.1007\/s10458-020-09455-w","relation":{},"ISSN":["1387-2532","1573-7454"],"issn-type":[{"value":"1387-2532","type":"print"},{"value":"1573-7454","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,3,25]]},"assertion":[{"value":"25 March 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"32"}}