{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,13]],"date-time":"2025-06-13T06:08:19Z","timestamp":1749794899834,"version":"3.37.3"},"reference-count":37,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"8","license":[{"start":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T00:00:00Z","timestamp":1722470400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T00:00:00Z","timestamp":1722470400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T00:00:00Z","timestamp":1722470400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"Autonomous Intelligent Unmanned Systems through NSFC","doi-asserted-by":"publisher","award":["62088101"],"award-info":[{"award-number":["62088101"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U21A20484"],"award-info":[{"award-number":["U21A20484"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2024,8]]},"DOI":"10.1109\/tnnls.2023.3262921","type":"journal-article","created":{"date-parts":[[2023,4,18]],"date-time":"2023-04-18T17:38:13Z","timestamp":1681839493000},"page":"11553-11564","source":"Crossref","is-referenced-by-count":5,"title":["Learning Multi-Agent Cooperation via Considering Actions of Teammates"],"prefix":"10.1109","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0583-2423","authenticated-orcid":false,"given":"Shanqi","family":"Liu","sequence":"first","affiliation":[{"name":"State Key Laboratory of Industrial Control Technology, Institute of Cyber-Systems and Control, Zhejiang University, Hangzhou, Zhejiang, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2496-7748","authenticated-orcid":false,"given":"Weiwei","family":"Liu","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Industrial Control Technology, Institute of Cyber-Systems and Control, Zhejiang University, Hangzhou, Zhejiang, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1443-1077","authenticated-orcid":false,"given":"Wenzhou","family":"Chen","sequence":"additional","affiliation":[{"name":"College of Computer Science, Hangzhou Dianzi University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7292-4056","authenticated-orcid":false,"given":"Guanzhong","family":"Tian","sequence":"additional","affiliation":[{"name":"Ningbo Innovation Center, Zhejiang University, Hangzhou, Zhejiang, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6568-8801","authenticated-orcid":false,"given":"Jun","family":"Chen","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Industrial Control Technology, Institute of Cyber-Systems and Control, Zhejiang University, Hangzhou, Zhejiang, China"}]},{"given":"Yao","family":"Tong","sequence":"additional","affiliation":[{"name":"Northwest Institute of Mechanical and Electrical Engineering and the Northwest Electromechanical Engineering Research Institute, Xianyang, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0484-8167","authenticated-orcid":false,"given":"Junjie","family":"Cao","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Industrial Control Technology, Institute of Cyber-Systems and Control, Zhejiang University, Hangzhou, Zhejiang, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4822-8939","authenticated-orcid":false,"given":"Yong","family":"Liu","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Industrial Control Technology, Institute of Cyber-Systems and Control, Zhejiang University, Hangzhou, Zhejiang, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2012.2219061"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2013.2293507"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2019.2897847"},{"key":"ref4","article-title":"Guided deep reinforcement learning for swarm systems","author":"H\u00fcttenrauch","year":"2017","journal-title":"arXiv:1709.06011"},{"key":"ref5","article-title":"Graph convolutional reinforcement learning","author":"Jiang","year":"2018","journal-title":"arXiv:1810.09202"},{"key":"ref6","first-page":"1","article-title":"ROMA: Multi-agent reinforcement learning with emergent roles","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","author":"Wang"},{"key":"ref7","article-title":"Value-decomposition networks for cooperative multi-agent learning","author":"Sunehag","year":"2017","journal-title":"arXiv:1706.05296"},{"key":"ref8","first-page":"4295","article-title":"QMIX: Monotonic value function factorisation for deep multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Rashid"},{"key":"ref9","first-page":"10199","article-title":"Weighted QMIX: Expanding monotonic value function factorisation for deep multi-agent reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Whiteson"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v24i1.7529"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1038\/s41562-017-0064"},{"key":"ref12","article-title":"The StarCraft multi-agent challenge","author":"Samvelyan","year":"2019","journal-title":"arXiv:1902.04043"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11371"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"ref15","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","author":"Lowe","year":"2017","journal-title":"arXiv:1706.02275"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1126\/science.aau6249"},{"key":"ref17","first-page":"2961","article-title":"Actor-attention-critic for multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Iqbal"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2022.3146858"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2016.01.031"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0172395"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2006.880330"},{"key":"ref23","article-title":"QPLEX: Duplex dueling multi-agent Q-learning","author":"Wang","year":"2020","journal-title":"arXiv:2008.01062"},{"key":"ref24","first-page":"5887","article-title":"QTRAN: Learning to factorize with transformation for cooperative multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Son"},{"key":"ref25","first-page":"3930","article-title":"UneVEn: Universal value exploration for multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Gupta"},{"key":"ref26","article-title":"Greedy-based value representation for optimal coordination in multi-agent reinforcement learning","author":"Wan","year":"2021","journal-title":"arXiv:2112.04454"},{"key":"ref27","article-title":"MAVEN: Multi-agent variational exploration","author":"Mahajan","year":"2019","journal-title":"arXiv:1910.07483"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2996209"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3105869"},{"key":"ref30","article-title":"Distributed prioritized experience replay","author":"Horgan","year":"2018","journal-title":"arXiv:1803.00933"},{"key":"ref31","first-page":"567","article-title":"Empirical evaluation of ad hoc teamwork in the pursuit domain","volume-title":"Proc. AAMAS","author":"Barrett"},{"key":"ref32","first-page":"1085","article-title":"Cooperating with a Markovian ad hoc teammate","volume-title":"Proc. AAMAS","author":"Chakraborty"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i03.5636"},{"key":"ref34","article-title":"Multi-agent collaboration via reward attribution decomposition","author":"Zhang","year":"2020","journal-title":"arXiv:2010.08531"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1287\/mnsc.1050.0451"},{"key":"ref36","article-title":"Rethinking the implementation tricks and monotonicity constraint in cooperative multi-agent reinforcement learning","author":"Hu","year":"2021","journal-title":"arXiv:2102.03479"},{"key":"ref37","article-title":"RODE: Learning roles to decompose multi-agent tasks","author":"Wang","year":"2020","journal-title":"arXiv:2010.01523"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/10623582\/10103926.pdf?arnumber=10103926","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,6]],"date-time":"2024-08-06T10:27:28Z","timestamp":1722940048000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10103926\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8]]},"references-count":37,"journal-issue":{"issue":"8"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2023.3262921","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"type":"print","value":"2162-237X"},{"type":"electronic","value":"2162-2388"}],"subject":[],"published":{"date-parts":[[2024,8]]}}}