{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T23:37:50Z","timestamp":1773099470252,"version":"3.50.1"},"reference-count":54,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T00:00:00Z","timestamp":1746057600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T00:00:00Z","timestamp":1746057600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T00:00:00Z","timestamp":1746057600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key Research and Development Program of China","award":["2021ZD0112500"],"award-info":[{"award-number":["2021ZD0112500"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U22A2098"],"award-info":[{"award-number":["U22A2098"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U2341229"],"award-info":[{"award-number":["U2341229"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62172185"],"award-info":[{"award-number":["62172185"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61976102"],"award-info":[{"award-number":["61976102"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62206105"],"award-info":[{"award-number":["62206105"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62202200"],"award-info":[{"award-number":["62202200"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Key Research and Development Project of Jilin Province","award":["20240304200SF"],"award-info":[{"award-number":["20240304200SF"]}]},{"name":"International Cooperation Project of Jilin Province","award":["20220402009GH"],"award-info":[{"award-number":["20220402009GH"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2025,5]]},"DOI":"10.1109\/tnnls.2024.3437366","type":"journal-article","created":{"date-parts":[[2024,8,14]],"date-time":"2024-08-14T13:39:12Z","timestamp":1723642752000},"page":"9136-9149","source":"Crossref","is-referenced-by-count":1,"title":["Boosting Weak-to-Strong Agents in Multiagent Reinforcement Learning via Balanced PPO"],"prefix":"10.1109","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5387-7904","authenticated-orcid":false,"given":"Sili","family":"Huang","sequence":"first","affiliation":[{"name":"Key Laboratory of Symbolic Computation and Knowledge Engineer of Ministry of Education and the School of Artificial Intelligence, Jilin University, Changchun, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7835-9556","authenticated-orcid":false,"given":"Hechang","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Jilin University, Changchun, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8519-4750","authenticated-orcid":false,"given":"Haiyin","family":"Piao","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Jilin University, Changchun, China"}]},{"given":"Zhixiao","family":"Sun","sequence":"additional","affiliation":[{"name":"Unmanned System Research Institute, Northwestern Polytechnical University, Xi&#x2019;an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2697-8093","authenticated-orcid":false,"given":"Yi","family":"Chang","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Jilin University, Changchun, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1539-7939","authenticated-orcid":false,"given":"Lichao","family":"Sun","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Lehigh University, Bethlehem, PA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1927-8419","authenticated-orcid":false,"given":"Bo","family":"Yang","sequence":"additional","affiliation":[{"name":"Key Laboratory of Symbolic Computation and Knowledge Engineer of Ministry of Education and the School of Computer Science and Technology, Jilin University, Changchun, China"}]}],"member":"263","reference":[{"key":"ref1","first-page":"6379","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","author":"Lowe"},{"key":"ref2","article-title":"DOP: Off-policy multi-agent decomposed policy gradients","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Wang"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.3042508"},{"key":"ref4","first-page":"39363","article-title":"Learning generalizable agents via saliency-guided features decorrelation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Huang"},{"key":"ref5","first-page":"12619","article-title":"Distributional reward estimation for effective multi-agent deep reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Hu"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-55393-7_1"},{"key":"ref7","article-title":"In-context decision transformer: Reinforcement learning via hierarchical chain-of-thought","volume-title":"Proc. 41st Int. Conf. Mach. Learn.","author":"Huang"},{"key":"ref8","article-title":"Instructed diffuser with temporal condition guidance for offline reinforcement learning","author":"Hu","year":"2023","journal-title":"arXiv:2306.04875"},{"key":"ref9","first-page":"24611","article-title":"The surprising effectiveness of PPO in cooperative multi-agent games","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Yu"},{"key":"ref10","first-page":"2085","article-title":"Value-decomposition networks for cooperative multi-agent learning based on team reward","volume-title":"Proc. 17th Int. Conf. Auto. Agents MultiAgent Syst. (AAMAS)","author":"Sunehag"},{"key":"ref11","first-page":"1","article-title":"QMIX: Monotonic value function factorisation for deep multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"21","author":"Rashid"},{"key":"ref12","article-title":"RODE: Learning roles to decompose multi-agent tasks","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Wang"},{"key":"ref13","first-page":"3040","article-title":"Social influence as intrinsic motivation for multi-agent deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Jaques"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref15","article-title":"The StarCraft multi-agent challenge","author":"Samvelyan","year":"2019","journal-title":"arXiv:1902.04043"},{"key":"ref16","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"},{"key":"ref17","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"Haarnoja","year":"2018","journal-title":"arXiv:1801.01290"},{"key":"ref18","first-page":"11853","article-title":"Learning implicit credit assignment for multi-agent actor-critic","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","volume":"33","author":"Zhou"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5878"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3230701"},{"key":"ref21","article-title":"R-MADDPG for partially observable environments and limited communication","author":"Wang","year":"2020","journal-title":"arXiv:2002.06684"},{"key":"ref22","article-title":"QPLEX: Duplex dueling multi-agent Q-learning","author":"Wang","year":"2020","journal-title":"arXiv:2008.01062"},{"key":"ref23","first-page":"122","article-title":"Learning with opponent-learning awareness","volume-title":"Proc. 17th Int. Conf. Auton. Agents MultiAgent Syst.","author":"Foerster"},{"key":"ref24","first-page":"789","article-title":"Gifting in multi-agent reinforcement learning","volume-title":"Proc. 19th Int. Conf. Auto. Agents Multiagent Syst.","author":"Lupu"},{"key":"ref25","first-page":"1698","article-title":"LDSA: Learning dynamic subtask assignment in cooperative multi-agent reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Yang"},{"key":"ref26","article-title":"ROMA: Multi-agent reinforcement learning with emergent roles","author":"Wang","year":"2020","journal-title":"arXiv:2003.08039"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-021-10045-9"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3213566"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3098985"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3146858"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3172572"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3220798"},{"key":"ref33","first-page":"964","article-title":"Multi-agent graph-attention communication and teaming","volume-title":"Proc. 20th Int. Conf. Auto. Agents MultiAgent Syst.","author":"Niu"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3139138"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3215774"},{"key":"ref36","first-page":"5887","article-title":"QTRAN: Learning to factorize with transformation for cooperative multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Son"},{"key":"ref37","article-title":"Learning nearly decomposable value functions via communication minimization","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Wang"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3089493"},{"key":"ref39","first-page":"23417","article-title":"Individual reward assisted multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wang"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2996209"},{"key":"ref41","first-page":"3991","article-title":"Celebrating diversity in shared multi-agent reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Li"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3146201"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3236361"},{"key":"ref44","article-title":"Influence-based multi-agent exploration","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Wang"},{"key":"ref45","first-page":"2324","article-title":"A Q-values sharing framework for multiple independent q-learners","volume-title":"Proc. 18th Int. Conf. Auto. Agents MultiAgent Syst.","author":"Zhu"},{"key":"ref46","first-page":"151","article-title":"Understanding the impact of entropy on policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn. (PMLR)","author":"Ahmed"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref48","article-title":"Is independent learning all you need in the StarCraft multi-agent challenge?","author":"de Witt","year":"2020","journal-title":"arXiv:2011.09533"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-28929-8"},{"key":"ref50","article-title":"High-dimensional continuous control using generalized advantage estimation","author":"Schulman","year":"2015","journal-title":"arXiv:1506.02438"},{"key":"ref51","first-page":"12208","article-title":"FACMAC: Factored multi-agent centralised policy gradients","volume":"34","author":"Peng","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref52","first-page":"30776","article-title":"Complementary attention for multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Shao"},{"key":"ref53","article-title":"Prioritized experience replay","author":"Schaul","year":"2015","journal-title":"arXiv:1511.05952"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1812.05905"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/5962385\/10982361\/10636273.pdf?arnumber=10636273","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T18:39:44Z","timestamp":1764959984000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10636273\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5]]},"references-count":54,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2024.3437366","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,5]]}}}