{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,29]],"date-time":"2025-12-29T22:19:03Z","timestamp":1767046743592,"version":"3.37.3"},"reference-count":44,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100007085","name":"National University of Defense Technology","doi-asserted-by":"publisher","award":["62376280"],"award-info":[{"award-number":["62376280"]}],"id":[{"id":"10.13039\/501100007085","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Knowledge-Based Systems"],"published-print":{"date-parts":[[2024,11]]},"DOI":"10.1016\/j.knosys.2024.112474","type":"journal-article","created":{"date-parts":[[2024,9,6]],"date-time":"2024-09-06T15:10:05Z","timestamp":1725635405000},"page":"112474","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":1,"special_numbering":"C","title":["VAOS: Enhancing the stability of cooperative multi-agent policy learning"],"prefix":"10.1016","volume":"304","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1849-8487","authenticated-orcid":false,"given":"Peng","family":"Li","sequence":"first","affiliation":[]},{"given":"Shaofei","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Weilin","family":"Yuan","sequence":"additional","affiliation":[]},{"given":"Zhenzhen","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Jing","family":"Chen","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.knosys.2024.112474_b1","unstructured":"L. Zheng, J. Chen, J. Wang, J. He, Y. Hu, Y. Chen, C. Fan, Y. Gao, C. Zhang, Episodic multi-agent reinforcement learning with curiosity-driven exploration, in: Proc. Adv. Neural Inf. Process. Syst., Vol. 34, 2021, pp. 3757\u20133769."},{"issue":"1","key":"10.1016\/j.knosys.2024.112474_b2","doi-asserted-by":"crossref","first-page":"56","DOI":"10.1109\/TCIAIG.2017.2679115","article-title":"Multiagent inverse reinforcement learning for two-person zero-sum games","volume":"10","author":"Lin","year":"2018","journal-title":"IEEE Transac. Games"},{"key":"10.1016\/j.knosys.2024.112474_b3","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2024.111719","article-title":"QDAP: Downsizing adaptive policy for cooperative multi-agent reinforcement learning","volume":"294","author":"Zhao","year":"2024","journal-title":"Knowl.-Based Syst."},{"issue":"9","key":"10.1016\/j.knosys.2024.112474_b4","doi-asserted-by":"crossref","first-page":"3826","DOI":"10.1109\/TCYB.2020.2977374","article-title":"Deep reinforcement learning for multiagent systems: a review of challenges, solutions, and applications","volume":"50","author":"Nguyen","year":"2020","journal-title":"IEEE Trans. Cybern."},{"issue":"1","key":"10.1016\/j.knosys.2024.112474_b5","doi-asserted-by":"crossref","first-page":"262","DOI":"10.1109\/TCYB.2021.3087228","article-title":"Network-scale traffic signal control via multiagent reinforcement learning with deep spatiotemporal attentive network","volume":"53","author":"Huang","year":"2023","journal-title":"IEEE Trans. Cybern."},{"issue":"7","key":"10.1016\/j.knosys.2024.112474_b6","doi-asserted-by":"crossref","first-page":"9335","DOI":"10.1109\/TITS.2021.3105426","article-title":"An information fusion approach to intelligent traffic signal control using the joint methods of multiagent reinforcement learning and artificial intelligence of things","volume":"23","author":"Yang","year":"2022","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"issue":"2","key":"10.1016\/j.knosys.2024.112474_b7","doi-asserted-by":"crossref","first-page":"1725","DOI":"10.1109\/TII.2022.3182328","article-title":"A multiagent reinforcement learning approach for wind farm frequency control","volume":"19","author":"Liang","year":"2023","journal-title":"IEEE Trans. Industr. Inform."},{"key":"10.1016\/j.knosys.2024.112474_b8","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2024.112000","article-title":"An evolutionary multi-agent reinforcement learning algorithm for multi-UAV air combat","volume":"299","author":"Wang","year":"2024","journal-title":"Knowl.-Based Syst."},{"key":"10.1016\/j.knosys.2024.112474_b9","unstructured":"P. Sunehag, G. Lever, A. Gruslys, W.M. Czarnecki, V. Zambaldi, M. Jaderberg, M. Lanctot, N. Sonnerat, J.Z. Leibo, K. Tuyls, T. Graepel, Value-decomposition networks for cooperative multi-agent learning based on team reward, in: Proc. 17th Int. Conf. Auto. Agents Multiagent Syst., 2018, pp. 2085\u20132087."},{"key":"10.1016\/j.knosys.2024.112474_b10","unstructured":"T. Rashid, M. Samvelyan, C. Schroeder, G. Farquhar, J. Foerster, S. Whiteson, QMIX: monotonic value function factorisation for deep multi-agent reinforcement learning, in: Proc. 35th Int. Conf. Mach. Learn. Vol. 80, 2018, pp. 4295\u20134304."},{"key":"10.1016\/j.knosys.2024.112474_b11","article-title":"MARLlib: A scalable and efficient multi-agent reinforcement learning library","author":"Hu","year":"2023","journal-title":"J. Mach. Learn. Res."},{"issue":"2","key":"10.1016\/j.knosys.2024.112474_b12","doi-asserted-by":"crossref","first-page":"895","DOI":"10.1007\/s10462-021-09996-w","article-title":"Multi-agent deep reinforcement learning: a survey","volume":"55","author":"Gronauer","year":"2022","journal-title":"Artif. Intell. Rev."},{"key":"10.1016\/j.knosys.2024.112474_b13","doi-asserted-by":"crossref","unstructured":"J. Su, A. Stephen, B. Petrt, Value-decomposition multi-agent actor-critics, in: Proc. 21st AAAI Conf. Artif. Intell. Vol. 35, No. 13, 2021, pp. 11352\u201311360.","DOI":"10.1609\/aaai.v35i13.17353"},{"issue":"5","key":"10.1016\/j.knosys.2024.112474_b14","doi-asserted-by":"crossref","first-page":"2054","DOI":"10.1109\/TNNLS.2020.2996209","article-title":"Reinforcement learning with task decomposition for cooperative multiagent systems","volume":"32","author":"Sun","year":"2021","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.knosys.2024.112474_b15","unstructured":"S. Liu, Y. Hu, R. Wu, D. Xing, Y. Xiong, C. Fan, K. Kuang, Y. Liu, Adaptive Value Decomposition with Greedy Marginal Contribution Computation for Cooperative Multi-Agent Reinforcement Learning, in: Proc. 21st Int. Conf. Auto. Agents Multiagent Syst., 2023, pp. 31\u201339."},{"key":"10.1016\/j.knosys.2024.112474_b16","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2023.110709","article-title":"Regularization-adapted Anderson acceleration for multi-agent reinforcement learning","volume":"275","author":"Wang","year":"2023","journal-title":"Knowl.-Based Syst."},{"key":"10.1016\/j.knosys.2024.112474_b17","unstructured":"L. Pan, T. Rashid, B. Peng, L. Huang, S. Whiteson, Regularized softmax deep multi-agent Q-learning, in: Proc. Adv. Neural Inf. Process. Syst. Vol. 34, 2021, pp. 1365\u20131377."},{"key":"10.1016\/j.knosys.2024.112474_b18","series-title":"Proceedings of 4th Connectionist Models Summer School","article-title":"Issues in using function approximation for reinforcement learning","author":"Thrun","year":"1993"},{"key":"10.1016\/j.knosys.2024.112474_b19","unstructured":"O. Anschel, N. Baram, N. Shimkin, Averaged-DQN: variance reduction and stabilization for deep reinforcement learning, in: Proc. 34th Int. Conf. Mach. Learn. Vol. 70, 2017, pp. 176\u2013185."},{"key":"10.1016\/j.knosys.2024.112474_b20","doi-asserted-by":"crossref","first-page":"206","DOI":"10.1016\/j.neucom.2020.05.097","article-title":"A TD3-based multi-agent deep reinforcement learning method in mixed cooperation-competition environment","volume":"411","author":"Zhang","year":"2020","journal-title":"Neurocomputing"},{"year":"2019","series-title":"Reducing overestimation bias in multi-agent domains using double centralized critics","author":"Ackermann","key":"10.1016\/j.knosys.2024.112474_b21"},{"key":"10.1016\/j.knosys.2024.112474_b22","doi-asserted-by":"crossref","first-page":"94","DOI":"10.1016\/j.neucom.2021.12.039","article-title":"Sub-AVG: Overestimation reduction for cooperative multi-agent reinforcement learning","volume":"474","author":"Wu","year":"2022","journal-title":"Neurocomputing"},{"year":"1989","series-title":"Learning from delayed rewards","author":"Watkins","key":"10.1016\/j.knosys.2024.112474_b23"},{"key":"10.1016\/j.knosys.2024.112474_b24","first-page":"1","article-title":"UNMAS: Multiagent reinforcement learning for unshaped cooperative scenarios","author":"Chai","year":"2021","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.knosys.2024.112474_b25","doi-asserted-by":"crossref","unstructured":"S. Kim, K. Asadi, M. Littman, G. Konidaris, DeepMellow: removing the need for a target network in deep Q-learning, in: Proc. 28th Int. Joint Conf. Artif. Intell., 2019, pp. 2733\u20132739.","DOI":"10.24963\/ijcai.2019\/379"},{"key":"10.1016\/j.knosys.2024.112474_b26","doi-asserted-by":"crossref","unstructured":"Y. Gan, Z. Zhang, x. Tan, Stabilizing Q learning via soft mellowmax operator, in: Proc. 21st AAAI Conf. Artif. Intell. Vol.35, No. 9, 2021, pp. 7501\u20137509.","DOI":"10.1609\/aaai.v35i9.16919"},{"key":"10.1016\/j.knosys.2024.112474_b27","doi-asserted-by":"crossref","unstructured":"Z. Li, X. Hou, Mixing update Q-value for deep reinforcement learning, in: Proc. Int. Joint Conf. Neural Netw., 2019, pp. 1\u20136.","DOI":"10.1109\/IJCNN.2019.8852397"},{"key":"10.1016\/j.knosys.2024.112474_b28","unstructured":"S. Fujimoto, D. Meger, D. Precup, Off-policy deep reinforcement learning without exploration, in: Proc. 36th Int. Conf. Mach. Learn. Vol. 97, 2019, pp. 2052\u20132062."},{"key":"10.1016\/j.knosys.2024.112474_b29","unstructured":"A. Kumar, J. Fu, M. Soh, G. Tucker, S. Levine, Stabilizing off-policy Q-learning via bootstrapping error reduction, in: Proc. Adv. Neural Inf. Process. Syst. Vol. 32, 2019."},{"key":"10.1016\/j.knosys.2024.112474_b30","unstructured":"R. Lowe, Y. WU, A. Tamar, J. Harb, O. Pieter Abbeel, I. Mordatch, Multi-agent actor-critic for mixed cooperative-competitive environments, in: Proc. Adv. Neural Inf. Process. Syst. Vol. 30, 2017."},{"key":"10.1016\/j.knosys.2024.112474_b31","unstructured":"S. Fujimoto, H. van Hoof, D. Meger, Addressing function approximation error in actor-critic methods, in: Proc. 35th Int. Conf. Mach. Learn. Vol. 80, 2018, pp. 1587\u20131596."},{"key":"10.1016\/j.knosys.2024.112474_b32","series-title":"Proc. Adv. Neural Inf. Process. Syst. Vol. 23","article-title":"Double Q-learning","author":"Hasselt","year":"2010"},{"key":"10.1016\/j.knosys.2024.112474_b33","doi-asserted-by":"crossref","unstructured":"H.v. Hasselt, A. Guez, D. Silver, Deep reinforcement learning with double Q-learning, in: Proc. 16th AAAI Conf. Artif. Intell., 2016, pp. 2094\u20132100.","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"10.1016\/j.knosys.2024.112474_b34","doi-asserted-by":"crossref","first-page":"79446","DOI":"10.1109\/ACCESS.2019.2922706","article-title":"Stochastic double deep Q-network","volume":"7","author":"Lv","year":"2019","journal-title":"IEEE Access"},{"issue":"1","key":"10.1016\/j.knosys.2024.112474_b35","doi-asserted-by":"crossref","first-page":"52","DOI":"10.1109\/TNNLS.2021.3089493","article-title":"SMIX(\u03bb): Enhancing centralized value functions for cooperative multiagent reinforcement learning","volume":"34","author":"Yao","year":"2023","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.knosys.2024.112474_b36","unstructured":"S. Hu, F. Zhu, X. Chang, X. Liang, UPDeT: Universal Multi-agent RL via Policy Decoupling with Transformers, in: Proc. Int. Conf. Learn. Represent., 2021."},{"issue":"6","key":"10.1016\/j.knosys.2024.112474_b37","doi-asserted-by":"crossref","first-page":"1367","DOI":"10.1109\/TCYB.2016.2544866","article-title":"FMRQ\u2013a multiagent reinforcement learning algorithm for fully cooperative tasks","volume":"47","author":"Zhang","year":"2017","journal-title":"IEEE Trans. Cybern."},{"key":"10.1016\/j.knosys.2024.112474_b38","doi-asserted-by":"crossref","first-page":"9261","DOI":"10.1007\/s10489-022-03924-3","article-title":"Transform networks for cooperative multi-agent deep reinforcement learning","volume":"53","author":"Wang","year":"2023","journal-title":"Appl. Intell."},{"key":"10.1016\/j.knosys.2024.112474_b39","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.neunet.2022.09.012","article-title":"A leader-following paradigm based deep reinforcement learning method for multi-agent cooperation games","volume":"156","author":"Zhang","year":"2022","journal-title":"Neural Netw."},{"key":"10.1016\/j.knosys.2024.112474_b40","unstructured":"K. Son, D. Kim, W.J. Kang, D.E. Hostallero, Y. Yi, QTRAN: learning to factorize with transformation for cooperative multi-agent reinforcement learning, in: Proc. 36th Int. Conf. Mach. Learn. Vol. 97, 2019, pp. 5887\u20135896."},{"key":"10.1016\/j.knosys.2024.112474_b41","unstructured":"K. Asadi, M.L. Littman, An alternative softmax operator for reinforcement learning, in: Proc. 34th Int. Conf. Mach. Learn. Vol. 70, 2017, pp. 243\u2013252."},{"year":"2019","series-title":"The StarCraft multi-agent challenge","author":"Mikayel","key":"10.1016\/j.knosys.2024.112474_b42"},{"key":"10.1016\/j.knosys.2024.112474_b43","unstructured":"T. Rashid, G. Farquhar, B. Peng, S. Whiteson, Weighted QMIX: expanding monotonic value function factorisation for deep multi-agent reinforcement learning, in: Proc. Adv. Neural Inf. Process. Syst. Vol. 33, 2020, pp. 10199\u201310210."},{"key":"10.1016\/j.knosys.2024.112474_b44","unstructured":"J. Wang, Z. Ren, T. Liu, Y. Yu, C. Zhang, QPLEX: duplex dueling multi-agent Q-learning, in: Proc. Int. Conf. Learn. Represent., 2021."}],"container-title":["Knowledge-Based Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705124011080?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705124011080?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,1,19]],"date-time":"2025-01-19T20:53:31Z","timestamp":1737320011000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0950705124011080"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11]]},"references-count":44,"alternative-id":["S0950705124011080"],"URL":"https:\/\/doi.org\/10.1016\/j.knosys.2024.112474","relation":{},"ISSN":["0950-7051"],"issn-type":[{"type":"print","value":"0950-7051"}],"subject":[],"published":{"date-parts":[[2024,11]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"VAOS: Enhancing the stability of cooperative multi-agent policy learning","name":"articletitle","label":"Article Title"},{"value":"Knowledge-Based Systems","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.knosys.2024.112474","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2024 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"112474"}}