{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,5]],"date-time":"2025-04-05T04:06:00Z","timestamp":1743825960360,"version":"3.40.3"},"reference-count":35,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T00:00:00Z","timestamp":1742860800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T00:00:00Z","timestamp":1742860800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Sci. China Inf. Sci."],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1007\/s11432-024-4223-2","type":"journal-article","created":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T08:49:29Z","timestamp":1743756569000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Graph-based strategy evaluation for large-scale multiagent reinforcement learning"],"prefix":"10.1007","volume":"68","author":[{"given":"Yiyun","family":"Sun","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Meiqin","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Senlin","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ronghao","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shanling","family":"Dong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,3,25]]},"reference":[{"key":"4223_CR1","doi-asserted-by":"crossref","first-page":"014201","DOI":"10.1007\/s11432-017-9263-6","volume":"61","author":"H Fang","year":"2018","unstructured":"Fang H, Shang C S, Chen J. An optimization-based shared control framework with applications in multi-robot systems. Sci China Inf Sci, 2018, 61: 014201","journal-title":"Sci China Inf Sci"},{"key":"4223_CR2","doi-asserted-by":"crossref","first-page":"3259","DOI":"10.1109\/TSG.2016.2629450","volume":"9","author":"B J Claessens","year":"2016","unstructured":"Claessens B J, Vrancx P, Ruelens F. Convolutional neural networks for automatic state-time feature extraction in reinforcement learning applied to residential load control. IEEE Trans Smart Grid, 2016, 9: 3259\u20133269","journal-title":"IEEE Trans Smart Grid"},{"key":"4223_CR3","doi-asserted-by":"crossref","first-page":"162204","DOI":"10.1007\/s11432-020-3148-x","volume":"65","author":"S C Huo","year":"2022","unstructured":"Huo S C, Huang D L, Zhang Y. Secure output synchronization of heterogeneous multi-agent systems against false data injection attacks. Sci China Inf Sci, 2022, 65: 162204","journal-title":"Sci China Inf Sci"},{"key":"4223_CR4","doi-asserted-by":"crossref","first-page":"1990","DOI":"10.1109\/TPWRS.2019.2948132","volume":"35","author":"H Xu","year":"2019","unstructured":"Xu H, Dominguez-Garcia A D, Sauer P W. Optimal tap setting of voltage regulation transformers using batch reinforcement learning. IEEE Trans Power Syst, 2019, 35: 1990\u20132001","journal-title":"IEEE Trans Power Syst"},{"key":"4223_CR5","doi-asserted-by":"crossref","first-page":"132302","DOI":"10.1007\/s11432-023-3906-3","volume":"67","author":"X H Zhou","year":"2024","unstructured":"Zhou X H, Xiong J, Zhao H T, et al. Joint UAV trajectory and communication design with heterogeneous multi-agent reinforcement learning. Sci China Inf Sci, 2024, 67: 132302","journal-title":"Sci China Inf Sci"},{"key":"4223_CR6","first-page":"3271","volume-title":"Proceedings of Advance Neural Information Processing Systems","author":"J H Wang","year":"2021","unstructured":"Wang J H, Xu W K, Gu Y J, et al. Multi-agent reinforcement learning for active voltage control on power distribution networks. In: Proceedings of Advance Neural Information Processing Systems, 2021. 3271\u20133284"},{"key":"4223_CR7","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1038\/s41598-019-45619-9","volume":"9","author":"S Omidshafiei","year":"2019","unstructured":"Omidshafiei S, Papadimitriou C, Piliouras G, et al. \u03b1-Rank: multi-agent evaluation by evolution. Sci Rep, 2019, 9: 1\u201329","journal-title":"Sci Rep"},{"key":"4223_CR8","doi-asserted-by":"crossref","first-page":"4939","DOI":"10.3390\/electronics12244939","volume":"12","author":"K Liang","year":"2023","unstructured":"Liang K, Zhang G Q, Guo J H, et al. An actor-critic hierarchical reinforcement learning model for course recommendation. Electronics, 2023, 12: 4939\u20134351","journal-title":"Electronics"},{"key":"4223_CR9","volume-title":"Proceedings of International Conference on Learning Representations","author":"P Muller","year":"2020","unstructured":"Muller P, Omidshafiei S, Rowland M, et al. A generalized training approach for multi-agent learning. In: Proceedings of International Conference on Learning Representations, 2020"},{"key":"4223_CR10","first-page":"1575","volume-title":"Proceedings of the 19th International Conference on Autonomous Agents and Multiagent Systems","author":"Y Yang","year":"2020","unstructured":"Yang Y, Tutunov R, Sakulwongtana P, et al. \u03b1\u03b1-Rank: practically scaling \u03b1-Rank through stochastic optimisation. In: Proceedings of the 19th International Conference on Autonomous Agents and Multiagent Systems, 2020. 1575\u20131583"},{"key":"4223_CR11","first-page":"32","volume-title":"Proceedings of Advance Neural Information Processing Systems","author":"M Rowland","year":"2019","unstructured":"Rowland M, Omidshafiei S, Tuyls K, et al. Multi-agent evaluation under incomplete information. In: Proceedings of Advance Neural Information Processing Systems, 2019. 32\u201338"},{"key":"4223_CR12","first-page":"19","volume-title":"Proceedings of Advance Neural Information Processing Systems","author":"R Herbrich","year":"2006","unstructured":"Herbrich R, Minka T, Graepel T. TrueSkill: a Bayesian skill rating system. In: Proceedings of Advance Neural Information Processing Systems, 2006. 19\u201326"},{"key":"4223_CR13","first-page":"2870","volume-title":"Proceedings of International Conference on Machine Learning","author":"Y Du","year":"2021","unstructured":"Du Y, Yan X, Chen X, et al. Estimating \u03b1-Rank from a few entries with low rank matrix completion. In: Proceedings of International Conference on Machine Learning, 2021. 2870\u20132879"},{"key":"4223_CR14","first-page":"5673","volume-title":"Proceedings of International Conference on Machine Learning","author":"T Rashid","year":"2021","unstructured":"Rashid T, Zhang C, Ciosek K. Estimating a-Rank by maximizing information gain. In: Proceedings of International Conference on Machine Learning, 2021. 5673\u20135681"},{"key":"4223_CR15","doi-asserted-by":"crossref","first-page":"035102","DOI":"10.1103\/PhysRevE.74.035102","volume":"74","author":"M B Hastings","year":"2006","unstructured":"Hastings M B. Community detection as an inference problem. Phys Rev E, 2006, 74: 035102","journal-title":"Phys Rev E"},{"key":"4223_CR16","first-page":"3670","volume-title":"Proceedings of International Joint Conference on Artificial Intelligence","author":"C Wang","year":"2019","unstructured":"Wang C, Pan S R, Hu R Q, et al. Attributed graph clustering: a deep attentional embedding approach. In: Proceedings of International Joint Conference on Artificial Intelligence, 2019. 3670\u20133676"},{"key":"4223_CR17","first-page":"536","volume-title":"Proceedings of International Conference on Machine Learning","author":"H B McMahan","year":"2003","unstructured":"McMahan H B, Gordon G J, Blum A. Planning in the presence of cost functions controlled by an adversary. In: Proceedings of International Conference on Machine Learning, 2003. 536\u2013543"},{"key":"4223_CR18","doi-asserted-by":"crossref","first-page":"57","DOI":"10.2307\/2951778","volume":"61","author":"H P Young","year":"1993","unstructured":"Young H P. The evolution of conventions. Econometrica, 1993, 61: 57\u201384","journal-title":"Econometrica"},{"key":"4223_CR19","doi-asserted-by":"crossref","first-page":"1898","DOI":"10.1109\/TAC.2021.3085171","volume":"67","author":"R Yan","year":"2022","unstructured":"Yan R, Duan X M, Shi Z Y, et al. Policy evaluation and seeking for multiagent reinforcement learning via best response. IEEE Trans Automat Contr, 2022, 67: 1898\u20131913","journal-title":"IEEE Trans Automat Contr"},{"key":"4223_CR20","doi-asserted-by":"crossref","first-page":"4294","DOI":"10.1109\/TAC.2018.2817161","volume":"63","author":"B Swenson","year":"2018","unstructured":"Swenson B, Eksin C, Kar S, et al. Distributed inertial best-response dynamics. IEEE Trans Automat Contr, 2018, 63: 4294\u20134300","journal-title":"IEEE Trans Automat Contr"},{"key":"4223_CR21","first-page":"142","volume-title":"Proceedings of Annual IEEE Symposium on Foundations of Computer Science","author":"M Goemans","year":"2005","unstructured":"Goemans M, Mirrokni V, Vetta A. Sink equilibria and convergence. In: Proceedings of Annual IEEE Symposium on Foundations of Computer Science, 2005. 142\u2013151"},{"key":"4223_CR22","first-page":"157","volume-title":"Proceedings of International Conference on International Conference on Machine Learning","author":"W W Cohen","year":"1994","unstructured":"Cohen W W, Hirsh H. Markov games as a framework for multi-agent reinforcement learning. In: Proceedings of International Conference on International Conference on Machine Learning, 1994. 157\u2013163"},{"key":"4223_CR23","doi-asserted-by":"crossref","first-page":"48","DOI":"10.1073\/pnas.36.1.48","volume":"36","author":"J F Nash","year":"1950","unstructured":"Nash J F. Equilibrium points in n-person games. Proc Natl Acad Sci USA, 1950, 36: 48\u201349","journal-title":"Proc Natl Acad Sci USA"},{"key":"4223_CR24","first-page":"212","volume-title":"Discrete-Time Markov Control Processes: Basic Optimality Criteria","author":"O Hern\u00e1ndez-Lerma","year":"2012","unstructured":"Hern\u00e1ndez-Lerma O, Lasserre J B. Discrete-Time Markov Control Processes: Basic Optimality Criteria. New York: Springer, 2012. 30: 212\u2013215"},{"key":"4223_CR25","doi-asserted-by":"crossref","first-page":"1545","DOI":"10.1109\/TAC.2016.2598476","volume":"62","author":"G Arslan","year":"2017","unstructured":"Arslan G, Yuksel S. Decentralized Q-learning for stochastic teams and games. IEEE Trans Automat Contr, 2017, 62: 1545\u20131558","journal-title":"IEEE Trans Automat Contr"},{"key":"4223_CR26","first-page":"6382","volume-title":"Proceedings of Advance Neural Information Processing Systems","author":"R Lowe","year":"2017","unstructured":"Lowe R, Wu Y, Tamar A, et al. Multi-agent actor-critic for mixed cooperative-competitive environments. In: Proceedings of Advance Neural Information Processing Systems, 2017. 6382\u20136393"},{"key":"4223_CR27","volume-title":"Proceedings of IEEE\/RSJ International Conference on Intelligent Robots and Systems","author":"Y Du","year":"2022","unstructured":"Du Y, Ma C, Liu Y, et al. Scalable model-based policy optimization for decentralized networked systems. In: Proceedings of IEEE\/RSJ International Conference on Intelligent Robots and Systems, 2022"},{"key":"4223_CR28","doi-asserted-by":"crossref","first-page":"P10008","DOI":"10.1088\/1742-5468\/2008\/10\/P10008","volume":"2008","author":"V D Blondel","year":"2008","unstructured":"Blondel V D, Guillaume J L, Lambiotte R, et al. Fast unfolding of communities in large networks. J Stat Mech, 2008, 2008: P10008","journal-title":"J Stat Mech"},{"key":"4223_CR29","doi-asserted-by":"crossref","first-page":"108521","DOI":"10.1016\/j.epsr.2022.108521","volume":"213","author":"A Pigott","year":"2022","unstructured":"Pigott A, Crozier C, Baker K, et al. GridLearn: multiagent reinforcement learning for grid-aware building energy management. Electric Power Syst Res, 2022, 213: 108521","journal-title":"Electric Power Syst Res"},{"key":"4223_CR30","doi-asserted-by":"crossref","first-page":"114410","DOI":"10.1016\/j.enbuild.2024.114410","volume":"318","author":"Y Y Sun","year":"2024","unstructured":"Sun Y Y, Zhang S L, Liu M Q, et al. Energy management based on safe multi-agent reinforcement learning for smart buildings in distribution networks. Energy Buildings, 2024, 318: 114410","journal-title":"Energy Buildings"},{"key":"4223_CR31","doi-asserted-by":"crossref","first-page":"112941","DOI":"10.1016\/j.enbuild.2023.112941","volume":"286","author":"S Dey","year":"2023","unstructured":"Dey S, Marzullo T, Henze G. Inverse reinforcement learning control for building energy management. Energy Buildings, 2023, 286: 112941","journal-title":"Energy Buildings"},{"key":"4223_CR32","first-page":"20469","volume-title":"Proceedings of Advance Neural Information Processing Systems","author":"J C Jiang","year":"2022","unstructured":"Jiang J C, Lu Z Q. I2Q: a fully decentralized q-learning algorithm. In: Proceedings of Advance Neural Information Processing Systems, 2022. 20469\u201320481"},{"key":"4223_CR33","doi-asserted-by":"crossref","first-page":"204","DOI":"10.1109\/TPWRS.2022.3159825","volume":"38","author":"L F Ding","year":"2023","unstructured":"Ding L F, Lin Z Y, Shi X H, et al. Target-value-competition-based multi-agent deep reinforcement learning algorithm for distributed nonconvex economic dispatch. IEEE Trans Power Syst, 2023, 38: 204\u2013217","journal-title":"IEEE Trans Power Syst"},{"key":"4223_CR34","first-page":"7170","volume-title":"Proceedings of Chinese Control Conference","author":"Y W Wang","year":"2023","unstructured":"Wang Y W, Zhang S L, Liu M Q, et al. Multi-agent q-value mixing network with covariance matrix adaptation strategy for the voltage regulation problem. In: Proceedings of Chinese Control Conference, 2023. 7170\u20137175"},{"key":"4223_CR35","doi-asserted-by":"crossref","first-page":"103905","DOI":"10.1016\/j.artint.2023.103905","volume":"319","author":"S D Gu","year":"2023","unstructured":"Gu S D, Kuba J G, Chen Y P, et al. Safe multi-agent reinforcement learning for multi-robot control. Artif Intell, 2023, 319: 103905","journal-title":"Artif Intell"}],"container-title":["Science China Information Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-024-4223-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11432-024-4223-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-024-4223-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T08:50:49Z","timestamp":1743756649000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11432-024-4223-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,25]]},"references-count":35,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2025,8]]}},"alternative-id":["4223"],"URL":"https:\/\/doi.org\/10.1007\/s11432-024-4223-2","relation":{},"ISSN":["1674-733X","1869-1919"],"issn-type":[{"value":"1674-733X","type":"print"},{"value":"1869-1919","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,3,25]]},"assertion":[{"value":"18 February 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 June 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 November 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 March 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"182206"}}