{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T16:07:44Z","timestamp":1770739664691,"version":"3.49.0"},"reference-count":40,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62101029"],"award-info":[{"award-number":["62101029"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100021171","name":"Basic and Applied Basic Research Foundation of Guangdong Province","doi-asserted-by":"publisher","award":["2023A1515140071"],"award-info":[{"award-number":["2023A1515140071"]}],"id":[{"id":"10.13039\/501100021171","id-type":"DOI","asserted-by":"publisher"}]},{"name":"China Scholarship Council Award","award":["202006465043"],"award-info":[{"award-number":["202006465043"]}]},{"name":"China Scholarship Council Award","award":["202306460078"],"award-info":[{"award-number":["202306460078"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1109\/tpami.2025.3634378","type":"journal-article","created":{"date-parts":[[2025,11,19]],"date-time":"2025-11-19T18:45:41Z","timestamp":1763577941000},"page":"3417-3431","source":"Crossref","is-referenced-by-count":0,"title":["Dynamic Deep Factor Graph for Multi-Agent Reinforcement Learning"],"prefix":"10.1109","volume":"48","author":[{"given":"Yuchen","family":"Shi","sequence":"first","affiliation":[{"name":"School of Computer and Communication Engineering, Shunde Innovation School, University of Science and Technology Beijing, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8162-4269","authenticated-orcid":false,"given":"Shihong","family":"Duan","sequence":"additional","affiliation":[{"name":"School of Computer and Communication Engineering, Shunde Innovation School, University of Science and Technology Beijing, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1624-5494","authenticated-orcid":false,"given":"Cheng","family":"Xu","sequence":"additional","affiliation":[{"name":"School of Computer and Communication Engineering, Shunde Innovation School, University of Science and Technology Beijing, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9530-8838","authenticated-orcid":false,"given":"Ran","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Computer and Communication Engineering, Shunde Innovation School, University of Science and Technology Beijing, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fangwen","family":"Ye","sequence":"additional","affiliation":[{"name":"School of Computer and Communication Engineering, Shunde Innovation School, University of Science and Technology Beijing, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9307-2120","authenticated-orcid":false,"given":"Chau","family":"Yuen","sequence":"additional","affiliation":[{"name":"School of Electrical and Electronic Engineering, Nanyang Technological University, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-023-00610-y"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-024-54938-5"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-022-04105-y"},{"key":"ref4","first-page":"6382","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Lowe"},{"key":"ref5","first-page":"24611","article-title":"The surprising effectiveness of PPO in cooperative multi-agent games","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Yu"},{"key":"ref6","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:: 1707.06347"},{"key":"ref7","article-title":"Off-policy multi-agent decomposed policy gradients","author":"Wang","year":"2020","journal-title":"arXiv:: 2007.12322"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.65109\/JSRC7365"},{"issue":"1","key":"ref9","first-page":"7234","article-title":"Monotonic value function factorisation for deep multi-agent reinforcement learning","volume":"21","author":"Rashid","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2006.880330"},{"key":"ref11","first-page":"5887","article-title":"QTRAN: Learning to factorize with transformation for cooperative multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Son"},{"key":"ref12","first-page":"11609","article-title":"QPLEX: Duplex dueling multi-agent Q-learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wang"},{"key":"ref13","first-page":"10199","article-title":"Weighted QMIX: Expanding monotonic value function factorisation for deep multi-agent reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Rashid"},{"key":"ref14","first-page":"227","article-title":"Coordinated reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Guestrin"},{"key":"ref15","first-page":"980","article-title":"Deep coordination graphs","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"B\u00f6hmer"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-022-09580-8"},{"key":"ref17","first-page":"25655","article-title":"Non-linear coordination graphs","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Kang"},{"key":"ref18","first-page":"24963","article-title":"Self-organized polynomial-time coordination graphs","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Yang"},{"key":"ref19","first-page":"8126","article-title":"Context-aware sparse deep coordination graphs","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wang"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.65109\/czoy2835"},{"key":"ref21","article-title":"Graph convolutional reinforcement learning","author":"Jiang","year":"2018","journal-title":"arXiv:: 1810.09202"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2004.1267047"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/18.910572"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414606"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3356464.3357707"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-28929-8"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"ref29","first-page":"29","article-title":"Deep recurrent Q-learning for partially observable MDPs","volume-title":"Proc. AAAI Fall Symp. Ser.","author":"Hausknecht"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/JAS.2014.7004682"},{"key":"ref31","first-page":"36748","article-title":"Scalable interpretability via polynomials","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Dubey"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1201\/9780203738535-7"},{"key":"ref33","first-page":"22","article-title":"Constrained policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Achiam"},{"key":"ref34","first-page":"11909","article-title":"Generalized proximal policy optimization with sample reuse","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Queeney"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/11780519_1"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/11744047_21"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICC.2010.5502031"},{"key":"ref38","first-page":"1146","article-title":"Stabilising experience replay for deep multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Foerster"},{"key":"ref39","article-title":"High-dimensional continuous control using generalized advantage estimation","author":"Schulman","year":"2015","journal-title":"arXiv::1506.02438"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.65109\/LVZZ5205"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/34\/11372200\/11259072.pdf?arnumber=11259072","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,9]],"date-time":"2026-02-09T21:05:34Z","timestamp":1770671134000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11259072\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3]]},"references-count":40,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2025.3634378","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,3]]}}}