{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T11:28:46Z","timestamp":1764588526173,"version":"3.37.3"},"reference-count":75,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1109\/tpami.2024.3399936","type":"journal-article","created":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T17:27:50Z","timestamp":1715621270000},"page":"8157-8172","source":"Crossref","is-referenced-by-count":10,"title":["Interaction Pattern Disentangling for Multi-Agent Reinforcement Learning"],"prefix":"10.1109","volume":"46","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0584-9129","authenticated-orcid":false,"given":"Shunyu","family":"Liu","sequence":"first","affiliation":[{"name":"State Key Laboratory of Blockchain and Security, Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3671-6521","authenticated-orcid":false,"given":"Jie","family":"Song","sequence":"additional","affiliation":[{"name":"School of Software Technology, Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-2595-6743","authenticated-orcid":false,"given":"Yihe","family":"Zhou","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Blockchain and Security, Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5499-5388","authenticated-orcid":false,"given":"Na","family":"Yu","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Blockchain and Security, Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2492-5230","authenticated-orcid":false,"given":"Kaixuan","family":"Chen","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Blockchain and Security, Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8640-8434","authenticated-orcid":false,"given":"Zunlei","family":"Feng","sequence":"additional","affiliation":[{"name":"School of Software Technology, Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2621-6048","authenticated-orcid":false,"given":"Mingli","family":"Song","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Blockchain and Security, Zhejiang University, Hangzhou, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/tnn.1998.712192"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3102140"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3103132"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3190471"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1126\/science.aau6249"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2019.2901791"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2020.2997896"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TPWRS.2020.2999890"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2020.2971427"},{"key":"ref13","first-page":"3271","article-title":"Multi-agent reinforcement learning for active voltage control on power distribution networks","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Wang"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-15-4095-0_11"},{"key":"ref15","first-page":"2085","article-title":"Value-decomposition networks for cooperative multi-agent learning based on team reward","volume-title":"Proc. 17th Int. Conf. Auton. Agents MultiAgent Syst.","author":"Sunehag"},{"key":"ref16","first-page":"4295","article-title":"QMIX: Monotonic value function factorisation for deep multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Rashid"},{"key":"ref17","first-page":"1","article-title":"QPLEX: Duplex dueling multi-agent Q-learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wang"},{"key":"ref18","first-page":"4596","article-title":"Randomized entity-wise factorization for multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Iqbal"},{"key":"ref19","first-page":"2961","article-title":"Actor-attention-critic for multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Iqbal"},{"key":"ref20","first-page":"7265","article-title":"Learning attentional communication for multi-agent cooperation","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Jiang"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i10.26370"},{"key":"ref22","first-page":"5887","article-title":"QTRAN: Learning to factorize with transformation for cooperative multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Son"},{"key":"ref23","first-page":"10199","article-title":"Weighted QMIX: Expanding monotonic value function factorisation for deep multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Rashid"},{"key":"ref24","first-page":"9876","article-title":"ROMA: Multi-agent reinforcement learning with emergent roles","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wang"},{"key":"ref25","first-page":"1","article-title":"RODE: Learning roles to decompose multi-agent tasks","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wang"},{"key":"ref26","first-page":"1","article-title":"Evolutionary population curriculum for scaling multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Long"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6221"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i9.21165"},{"key":"ref29","first-page":"1","article-title":"MAVEN: Multi-agent variational exploration","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Mahajan"},{"key":"ref30","first-page":"1","article-title":"Influence-based multi-agent exploration","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wang"},{"key":"ref31","first-page":"4992","article-title":"The emergence of individuality","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Jiang"},{"key":"ref32","first-page":"3991","article-title":"Celebrating diversity in shared multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Li"},{"key":"ref33","first-page":"3757","article-title":"Episodic ulti-agent reinforcement learning with curiosity-driven exploration","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Zheng"},{"key":"ref34","first-page":"1","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Lowe"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"ref36","first-page":"11853","article-title":"Learning implicit credit assignment for cooperative multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Zhou"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i13.17353"},{"key":"ref38","first-page":"1","article-title":"DOP: Off-policy multi-agent decomposed policy gradients","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wang"},{"key":"ref39","first-page":"12208","article-title":"FACMAC: Factored multi-agent centralised policy gradients","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Peng"},{"key":"ref40","first-page":"24611","article-title":"The surprising effectiveness of PPO in cooperative multi-agent games","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Yu"},{"key":"ref41","first-page":"1","article-title":"Trust region policy optimisation in multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kuba"},{"article-title":"Benchmarking multiagent deep reinforcement learning algorithms in cooperative tasks","year":"2020","author":"Papoudakis","key":"ref42"},{"key":"ref43","first-page":"6863","article-title":"Revisiting some common practices in cooperative multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fu"},{"key":"ref44","first-page":"26437","article-title":"Coordinated proximal policy optimization","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Wu"},{"key":"ref45","first-page":"13458","article-title":"Settling the variance of multi-agent policy gradients","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Kuba"},{"key":"ref46","first-page":"13066","article-title":"Difference advantage estimation for multi-agent policy gradients","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Li"},{"article-title":"Model-based multi-agent reinforcement learning: Recent progress and prospects","year":"2022","author":"Wang","key":"ref47"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/466"},{"key":"ref49","first-page":"19580","article-title":"Efficient model-based multi-agent reinforcement learning via optimistic equilibrium computation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Sessa"},{"key":"ref50","first-page":"1166","article-title":"Model-based multi-agent RL in zero-sum Markov games with near-optimal sample complexity","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Zhang"},{"key":"ref51","first-page":"7001","article-title":"A sharp analysis of model-based reinforcement learning with self-play","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Liu"},{"key":"ref52","first-page":"7301","article-title":"Tesseract: Tensorised actors for multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Mahajan"},{"article-title":"Model based multi-agent reinforcement learning with tensor decompositions","year":"2021","author":"Van Der","key":"ref53"},{"key":"ref54","first-page":"1","article-title":"Graph convolutional reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Jiang"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6212"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6214"},{"key":"ref57","first-page":"1","article-title":"Updet: Universal multi-agent RL via policy decoupling with transformers","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hu"},{"article-title":"Cooperative multi-agent transfer learning with level-adaptive credit assignment","year":"2021","author":"Zhou","key":"ref58"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i13.17357"},{"key":"ref60","first-page":"1","article-title":"Context-aware sparse deep coordination graphs","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wang"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6211"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539481"},{"article-title":"Generalization in cooperative multi-agent systems","year":"2022","author":"Mahajan","key":"ref63"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-28929-8"},{"key":"ref65","first-page":"1365","article-title":"Regularized softmax deep multi-agent Q-learning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Pan"},{"key":"ref66","first-page":"12491","article-title":"FOP: Factorizing optimal joint policy of maximum-entropy multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zhang"},{"key":"ref67","first-page":"10706","article-title":"Q-value path decomposition for deep multiagent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Yang"},{"key":"ref68","first-page":"1","article-title":"Action semantics network: Considering the effects of actions in multiagent systems","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wang"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref70","first-page":"1614","article-title":"From softmax to sparsemax: A sparse model of attention and multi-label classification","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Martins"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1223"},{"key":"ref72","first-page":"9929","article-title":"Understanding contrastive representation learning through alignment and uniformity on the hypersphere","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wang"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-4012"},{"key":"ref74","first-page":"2186","article-title":"The Starcraft multi-agent challenge","volume-title":"Proc. 18th Int. Conf. Auton. Agents MultiAgent Syst.","author":"Samvelyan"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5878"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/10746266\/10529613.pdf?arnumber=10529613","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T00:16:57Z","timestamp":1732666617000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10529613\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12]]},"references-count":75,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2024.3399936","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"type":"print","value":"0162-8828"},{"type":"electronic","value":"2160-9292"},{"type":"electronic","value":"1939-3539"}],"subject":[],"published":{"date-parts":[[2024,12]]}}}