{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T18:59:20Z","timestamp":1772823560567,"version":"3.50.1"},"reference-count":53,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2025,9,24]],"date-time":"2025-09-24T00:00:00Z","timestamp":1758672000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,9,24]],"date-time":"2025-09-24T00:00:00Z","timestamp":1758672000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U23B2064"],"award-info":[{"award-number":["U23B2064"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62176263"],"award-info":[{"award-number":["62176263"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100017596","name":"Natural Science Basic Research Program of Shaanxi Province","doi-asserted-by":"publisher","award":["2022KJXX-99"],"award-info":[{"award-number":["2022KJXX-99"]}],"id":[{"id":"10.13039\/501100017596","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J. King Saud Univ. Comput. Inf. Sci."],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1007\/s44443-025-00215-y","type":"journal-article","created":{"date-parts":[[2025,9,24]],"date-time":"2025-09-24T16:10:23Z","timestamp":1758730223000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Efficient evolutionary curriculum learning for scalable multi-agent reinforcement learning"],"prefix":"10.1007","volume":"37","author":[{"given":"Chao","family":"Li","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8372-0307","authenticated-orcid":false,"given":"Yanfei","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jieling","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhong","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chengjin","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qi","family":"Tian","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,9,24]]},"reference":[{"issue":"12","key":"215_CR1","doi-asserted-by":"publisher","first-page":"5928","DOI":"10.1109\/TAI.2024.3428519","volume":"5","author":"K Acharya","year":"2024","unstructured":"Acharya K, Velasquez A, Song HH (2024) A survey on symbolic knowledge distillation of large language models. IEEE Trans Artif Intell 5(12):5928\u20136594. https:\/\/doi.org\/10.1109\/TAI.2024.3428519","journal-title":"IEEE Trans Artif Intell"},{"key":"215_CR2","doi-asserted-by":"publisher","unstructured":"Agarwal A, Kumar S, Sycara K, et al (2020) Learning transferable cooperative behavior in multi-agent team. International Conference on Autonomous Agents and Multiagent Systems, pp 1741\u20131743. https:\/\/doi.org\/10.48550\/arXiv.1906.01202","DOI":"10.48550\/arXiv.1906.01202"},{"issue":"10","key":"215_CR3","doi-asserted-by":"publisher","first-page":"19817","DOI":"10.1109\/TITS.2022.3160673","volume":"23","author":"L Anzalone","year":"2022","unstructured":"Anzalone L, Barra P, Barra S et al (2022) An end-to-end curriculum learning approach for autonomous driving scenarios. IEEE Trans Intell Transp Syst 23(10):19817\u201319826. https:\/\/doi.org\/10.1109\/TITS.2022.3160673","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"215_CR4","doi-asserted-by":"publisher","first-page":"120495","DOI":"10.1016\/j.eswa.2023.120495","volume":"231","author":"KS Ashish","year":"2023","unstructured":"Ashish KS, Gopinatha P, Sohom C (2023) Reinforcement learning algorithms: A brief survey. Expert Syst Appl 231:120495. https:\/\/doi.org\/10.1016\/j.eswa.2023.120495","journal-title":"Expert Syst Appl"},{"key":"215_CR5","doi-asserted-by":"publisher","unstructured":"Batra S, Huang Z H, Petrenko A, et al (2021) Decentralized control of quadrotor swarms with end-to- end deep reinforcement learning. https:\/\/doi.org\/10.48550\/arXiv.2109.07735","DOI":"10.48550\/arXiv.2109.07735"},{"key":"215_CR6","doi-asserted-by":"publisher","unstructured":"Bengio Y, Louradour J, Collobert R, et al (2009) Curriculum learning. Proceedings of the 26th International Conference on Machine Learning, pp 41\u201348. https:\/\/doi.org\/10.1145\/1553374.1553380","DOI":"10.1145\/1553374.1553380"},{"key":"215_CR7","doi-asserted-by":"publisher","first-page":"819","DOI":"10.1287\/moor.27.4.819.297","volume":"27","author":"DS Bernstein","year":"2002","unstructured":"Bernstein DS, Givan R, Immerman N et al (2002) The complexity of decentralized control of Markov decision processes. Math Oper Res 27:819\u2013840. https:\/\/doi.org\/10.1287\/moor.27.4.819.297","journal-title":"Math Oper Res"},{"issue":"1","key":"215_CR8","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1016\/0010-0277(93)90058-4","volume":"48","author":"JL Elman","year":"1993","unstructured":"Elman JL (1993) Learning and development in neural networks: the importance of starting small. Cognition 48(1):71\u201399. https:\/\/doi.org\/10.1016\/0010-0277(93)90058-4","journal-title":"Cognition"},{"key":"215_CR9","doi-asserted-by":"publisher","unstructured":"Gao Z, Xu K, Ding B, et al (2021) KnowSR: Knowledge sharing among homogeneous agents in multi-agent reinforcement learning. https:\/\/doi.org\/10.48550\/arXiv.2105.11611","DOI":"10.48550\/arXiv.2105.11611"},{"key":"215_CR10","doi-asserted-by":"publisher","unstructured":"Haarnoja T, Zhou A, Abbeel P, et al (2018) Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. International conference on machine learning, pp 1861\u20131870. https:\/\/doi.org\/10.48550\/arXiv.1801.01290","DOI":"10.48550\/arXiv.1801.01290"},{"key":"215_CR11","doi-asserted-by":"publisher","unstructured":"Iqbal S, Witt C D, Peng B, et al (2021) Randomized entity-wise factorization for multi-agent reinforcement learning. International Conference on Machine Learning, pp 4596\u20134606. https:\/\/doi.org\/10.48550\/arXiv.2006.04222","DOI":"10.48550\/arXiv.2006.04222"},{"key":"215_CR12","doi-asserted-by":"publisher","unstructured":"Khan A, Tolstaya E, Ribeiro A, et al (2019) Graph policy gradients for large scale robot control. https:\/\/doi.org\/10.48550\/arXiv.1907.03822","DOI":"10.48550\/arXiv.1907.03822"},{"key":"215_CR13","doi-asserted-by":"publisher","first-page":"128943","DOI":"10.1109\/ACCESS.2022.3225431","volume":"10","author":"N Kodama","year":"2022","unstructured":"Kodama N, Harada T, Miyazaki K (2022) Traffic signal control system using deep reinforcement learning with emphasis on reinforcing successful experiences. IEEE Access 10:128943\u2013128950. https:\/\/doi.org\/10.1109\/ACCESS.2022.3225431","journal-title":"IEEE Access"},{"key":"215_CR14","doi-asserted-by":"publisher","first-page":"21433","DOI":"10.1007\/s10489-023-04652-y","volume":"53","author":"X Li","year":"2023","unstructured":"Li X, Li J, Shi H (2023) A multi-agent reinforcement learning method with curriculum transfer for large-scale dynamic traffic signal control. Appl Intell 53:21433\u201321447. https:\/\/doi.org\/10.1007\/s10489-023-04652-y","journal-title":"Appl Intell"},{"issue":"2","key":"215_CR15","doi-asserted-by":"publisher","first-page":"447","DOI":"10.23919\/JSEE.2022.000045","volume":"33","author":"WZ Liu","year":"2022","unstructured":"Liu WZ, Dong L, Liu J et al (2022) Knowledge transfer in multi-agent reinforcement learning with incremental number of agents. J Syst Eng Electron 33(2):447\u2013460. https:\/\/doi.org\/10.23919\/JSEE.2022.000045","journal-title":"J Syst Eng Electron"},{"key":"215_CR16","unstructured":"Liu Y F, Li C, Wang Z, et al (2025) Research Progress on Multi-Agent Deep Reinforcement Learning and Scalability. Computer Engineering and Applications, 61(04): 1\u201324. http:\/\/cea.ceaj.org\/CN\/10.3778\/j.issn.1002-8331.2407-0034"},{"key":"215_CR17","doi-asserted-by":"publisher","unstructured":"Long Q, Zhou Z H, Gupta A, et al (2020) Evolutionary population curriculum for scaling multi-agent reinforcement learning. https:\/\/doi.org\/10.48550\/arXiv.2003.10423","DOI":"10.48550\/arXiv.2003.10423"},{"key":"215_CR18","doi-asserted-by":"publisher","unstructured":"Lowe R, Wu Y, Tamar A (2017) Multi-agent actor-critic for mixed cooperative-competitive environments. Proceedings of Conference on Neural Information Processing Systems, pp 6379\u20136390. https:\/\/doi.org\/10.48550\/arXiv.1706.02275","DOI":"10.48550\/arXiv.1706.02275"},{"key":"215_CR19","doi-asserted-by":"publisher","DOI":"10.1007\/s44443-025-00016-3","author":"J Luo","year":"2025","unstructured":"Luo J, Li F, Jiao J (2025) A dynamic multiobjective recommendation method based on soft actor-critic with discrete actions. J King Saud Univ Comput Inf Sci. https:\/\/doi.org\/10.1007\/s44443-025-00016-3","journal-title":"J King Saud Univ Comput Inf Sci"},{"key":"215_CR20","doi-asserted-by":"publisher","unstructured":"Mao W C, Qiu H R, Wang C, et al (2024) Multi-agent meta-reinforcement learning: sharper convergence rates with task similarity. Proceedings of the 37th International Conference on Neural Information Processing Systems, pp 66556\u201366570. https:\/\/doi.org\/10.5555\/3666122.3669028","DOI":"10.5555\/3666122.3669028"},{"key":"215_CR21","doi-asserted-by":"publisher","unstructured":"Max J, Valentin D, Simon O, et al (2017) Population Based Training of Neural Networks. https:\/\/doi.org\/10.48550\/arXiv.1711.09846","DOI":"10.48550\/arXiv.1711.09846"},{"key":"215_CR22","doi-asserted-by":"publisher","unstructured":"Mehta D, Dahiya N, Atre I, et al (2025) Reinforcement Learning Based Recommendation System: An In-Depth Review of Models and their Limitations. 2025 Fourth International Conference on Power, Control and Computing Technologies (ICPC2T), pp 530\u2013535. https:\/\/doi.org\/10.1109\/ICPC2T63847.2025.10958609","DOI":"10.1109\/ICPC2T63847.2025.10958609"},{"key":"215_CR23","doi-asserted-by":"publisher","unstructured":"Mordatch I, Abbeel P (2018) Emergence of Grounded Compositional Language in Multi-Agent Populations. Proceedings of the AAAI Conference on Artificial Intelligence, pp 32(1): 1495\u20131502. https:\/\/doi.org\/10.5555\/3504035.3504218","DOI":"10.5555\/3504035.3504218"},{"key":"215_CR24","doi-asserted-by":"publisher","unstructured":"Narvekar S, Sinapov J, Leonetti M, et al (2016) Source task creation for curriculum learning. Proceedings of the 2016 International Conference on Autonomous Agents and Multiagent Systems, pp 566\u2013574. https:\/\/doi.org\/10.5555\/2936924.2937007","DOI":"10.5555\/2936924.2937007"},{"issue":"1","key":"215_CR25","doi-asserted-by":"publisher","first-page":"557","DOI":"10.1109\/TIV.2022.3185303","volume":"8","author":"O Natan","year":"2023","unstructured":"Natan O, Miura J (2023) End-to-end autonomous driving with semantic depth cloud mapping and multi-agent. IEEE Trans Intell Veh 8(1):557\u2013571. https:\/\/doi.org\/10.1109\/TIV.2022.3185303","journal-title":"IEEE Trans Intell Veh"},{"key":"215_CR26","doi-asserted-by":"publisher","unstructured":"Navara S, Stone P (2018) Learning curriculum policies for reinforcement learning. https:\/\/doi.org\/10.48550\/arXiv.1812.00285","DOI":"10.48550\/arXiv.1812.00285"},{"key":"215_CR27","doi-asserted-by":"publisher","unstructured":"Pu Y, Wang S C, Yang R, et al (2021) Decomposed Soft Actor-Critic Method for Cooperative Multi-Agent Reinforcement Learning. https:\/\/doi.org\/10.48550\/arXiv.2104.06655","DOI":"10.48550\/arXiv.2104.06655"},{"key":"215_CR28","doi-asserted-by":"publisher","unstructured":"Rashid T, Samvelyan M, Witt C S, et al (2018) Qmix: Monotonic value function factorization for deep multiagent reinforcement learning. International Conference on Machine Learning, pp 4292\u20134301. https:\/\/doi.org\/10.48550\/arXiv.1803.11485","DOI":"10.48550\/arXiv.1803.11485"},{"key":"215_CR29","doi-asserted-by":"publisher","unstructured":"Samvelyan M, Rashid T, Schroeder de Witt C, et al (2019) The StarCraft Multi-Agent Challenge. Proceedings of the 18th International Conference on Autonomous Agents and MultiAgent Systems, pp 2186\u20132188. https:\/\/doi.org\/10.5555\/3306127.3332052","DOI":"10.5555\/3306127.3332052"},{"key":"215_CR30","doi-asserted-by":"publisher","unstructured":"Sanmit N, Bei P, Matteo L, et al (2020) Curriculum Learning for Reinforcement Learning Domains: A Framework and Survey. Journal of Machine Learning Research 21(181): 1\u201350. https:\/\/doi.org\/10.48550\/arXiv.2003.04960","DOI":"10.48550\/arXiv.2003.04960"},{"key":"215_CR31","doi-asserted-by":"publisher","unstructured":"Schlichting M R, Notter S, Fichter W (2021) LSTM-Based Spatial Encoding: Explainable Path Planning for Time-Variant Multi-Agent Systems. AIAA Scitech 2021 Forum. Reston, pp 1860. https:\/\/doi.org\/10.2514\/6.2021-1860","DOI":"10.2514\/6.2021-1860"},{"issue":"2","key":"215_CR32","doi-asserted-by":"publisher","first-page":"3221","DOI":"10.1109\/LRA.2020.2974695","volume":"5","author":"SH Semnani","year":"2020","unstructured":"Semnani SH, Liu H, Everett M et al (2020) Multi-agent motion planning for dense and dynamic environments via deep reinforcement learning. IEEE Robot Autom Lett 5(2):3221\u20133226. https:\/\/doi.org\/10.1109\/LRA.2020.2974695","journal-title":"IEEE Robot Autom Lett"},{"key":"215_CR33","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2024.124379","volume":"255","author":"M Seong","year":"2024","unstructured":"Seong M, Jo O, Shin K (2024) Age of information minimization in UAV-assisted data harvesting networks by multi-agent deep reinforcement curriculum learning. Expert Syst Appl 255:124379. https:\/\/doi.org\/10.1016\/j.eswa.2024.124379","journal-title":"Expert Syst Appl"},{"issue":"1","key":"215_CR34","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1109\/TETCI.2018.2823329","volume":"3","author":"K Shao","year":"2019","unstructured":"Shao K, Zhu Y, Zhao D (2019) StarCraft micromanagement with reinforcement learning and curriculum transfer learning. IEEE Trans Emerg Top Comput Intell 3(1):73\u201384. https:\/\/doi.org\/10.1109\/TETCI.2018.2823329","journal-title":"IEEE Trans Emerg Top Comput Intell"},{"issue":"3","key":"215_CR35","doi-asserted-by":"publisher","first-page":"4552","DOI":"10.1109\/LRA.2021.3068952","volume":"6","author":"CD Souza","year":"2021","unstructured":"Souza CD, Newbury R, Cosgun A et al (2021) Decentralized multi-agent pursuit using deep reinforcement learning. IEEE Robot Autom Lett 6(3):4552\u20134559. https:\/\/doi.org\/10.1109\/LRA.2021.3068952","journal-title":"IEEE Robot Autom Lett"},{"key":"215_CR36","doi-asserted-by":"publisher","unstructured":"Sunehag P, Lever G, Gruslys A, et al (2018) Value-Decomposition Networks For Cooperative Multi-Agent Learning Based On Team Reward. Proceedings of the 17th International Conference on Autonomous Agents and MultiAgent Systems, pp 2085\u20132087. https:\/\/doi.org\/10.5555\/3237383.3238080","DOI":"10.5555\/3237383.3238080"},{"key":"215_CR37","doi-asserted-by":"publisher","unstructured":"Tseng W C, Wang T H, Chen L Y, et al (2022) offline multiagent reinforcement learning with knowledge distillation. Proceedings of the 36th International Conference on Neural Information Processing Systems, New Orleans, LA, USA, pp 226\u2013237. https:\/\/doi.org\/10.5555\/3600270.3600287","DOI":"10.5555\/3600270.3600287"},{"key":"215_CR38","doi-asserted-by":"publisher","unstructured":"Vettoruzzo A, Bouguelia M-R, Vanschoren, et al (2024) Advances and Challenges in Meta-Learning: A Technical Review. IEEE Transactions on Pattern Analysis and Machine Intelligence 46(7): 4763\u20134779. https:\/\/doi.org\/10.1109\/TPAMI.2024.3357847","DOI":"10.1109\/TPAMI.2024.3357847"},{"key":"215_CR39","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","volume":"575","author":"O Vinyals","year":"2019","unstructured":"Vinyals O, Babuschkin I, Czarnecki WM et al (2019) Grandmaster level in StarCraft II using multi-agent reinforcement learning. Nature 575:350\u2013354. https:\/\/doi.org\/10.1038\/s41586-019-1724-z","journal-title":"Nature"},{"issue":"6","key":"215_CR40","doi-asserted-by":"publisher","first-page":"3048","DOI":"10.1109\/TPAMI.2021.3055564","volume":"44","author":"L Wang","year":"2022","unstructured":"Wang L, Yoon K-J (2022) Knowledge distillation and student-teacher learning for visual intelligence: a review and new outlooks. IEEE Trans Pattern Anal Mach Intell 44(6):3048\u20133068. https:\/\/doi.org\/10.1109\/TPAMI.2021.3055564","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"9","key":"215_CR41","doi-asserted-by":"publisher","first-page":"4555","DOI":"10.1109\/TPAMI.2021.3069908","volume":"44","author":"X Wang","year":"2022","unstructured":"Wang X, Chen Y, Zhu W (2022) A survey on curriculum learning. IEEE Trans Pattern Anal Mach Intell 44(9):4555\u20134576. https:\/\/doi.org\/10.1109\/TPAMI.2021.3069908","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"215_CR42","doi-asserted-by":"publisher","unstructured":"Wang W, Yang T, Liu Y, et al (2020) From few to more: Large-scale dynamic multiagent curriculum learning. Proceedings of the AAAI Conference on Artificial Intelligence, pp 7293\u20137300. https:\/\/doi.org\/10.48550\/arXiv.1909.02790","DOI":"10.48550\/arXiv.1909.02790"},{"key":"215_CR43","doi-asserted-by":"publisher","unstructured":"Wang L X, Yang Z Y, Wang, Z Y (2020) Breaking the curse of many agents: provable mean embedding q-iteration for mean-field reinforcement learning. Proceedings of the 37th International Conference on Machine Learning, pp 10092\u201310103. https:\/\/doi.org\/10.5555\/3524938.3525874","DOI":"10.5555\/3524938.3525874"},{"issue":"5","key":"215_CR44","doi-asserted-by":"publisher","first-page":"2701","DOI":"10.1109\/TNNLS.2021.3107742","volume":"34","author":"C Xiao","year":"2023","unstructured":"Xiao C, Lu P, He Q (2023) Flying through a narrow gap using end-to-end deep reinforcement learning augmented with curriculum learning and Sim2Real. IEEE Trans Neural Netw Learn Syst 34(5):2701\u20132708. https:\/\/doi.org\/10.1109\/TNNLS.2021.3107742","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"issue":"2","key":"215_CR45","doi-asserted-by":"publisher","first-page":"1260","DOI":"10.1109\/TII.2021.3094207","volume":"18","author":"C Yan","year":"2022","unstructured":"Yan C, Wang C, Xiang XJ et al (2022a) Deep reinforcement learning of collision-free flocking policies for multiple fixed-wing UAVs using local situation maps. IEEE Trans Ind Inform 18(2):1260\u20131270. https:\/\/doi.org\/10.1109\/TII.2021.3094207","journal-title":"IEEE Trans Ind Inform"},{"key":"215_CR46","doi-asserted-by":"publisher","DOI":"10.1016\/j.ast.2022.108091","volume":"133","author":"C Yan","year":"2023","unstructured":"Yan C, Xiang XJ, Wang C et al (2023) PASCAL: population-specific curriculum-based MADRL for collision-free flocking with large-scale fixed-wing UAV swarms. Aerosp Sci Technol 133:108091. https:\/\/doi.org\/10.1016\/j.ast.2022.108091","journal-title":"Aerosp Sci Technol"},{"key":"215_CR47","doi-asserted-by":"publisher","unstructured":"Yan C, Xiang X J, Xu X, et al (2022) A survey on scalability and transferability of multi-agent deep reinforcement learning. Control and Decision 37(12): 3083\u20133102. https:\/\/doi.org\/10.13195\/j.kzyjc.2022.0044","DOI":"10.13195\/j.kzyjc.2022.0044"},{"key":"215_CR48","doi-asserted-by":"publisher","unstructured":"Yu C, Velu A, Vinitsky E, et al (2021) The surprising effectiveness of PPO in cooperative, multi-agent games. https:\/\/doi.org\/10.48550\/arXiv.2103.01955","DOI":"10.48550\/arXiv.2103.01955"},{"key":"215_CR49","doi-asserted-by":"publisher","unstructured":"Yu C, Velu A, Vinitsky E, et al (2022) The surprising effectiveness of PPO in cooperative multiagent games. Proceedings of the 36th International Conference on Neural Information Processing Systems, pp 24611\u201324624. https:\/\/doi.org\/10.5555\/3600270.3602057","DOI":"10.5555\/3600270.3602057"},{"key":"215_CR50","doi-asserted-by":"publisher","first-page":"5443","DOI":"10.1109\/TSP.2022.3222734","volume":"70","author":"XY Zhang","year":"2022","unstructured":"Zhang XY, Hu C, He B et al (2022a) Distributed reptile algorithm for meta-learning over multi-agent systems. IEEE Trans Signal Process 70:5443\u20135456. https:\/\/doi.org\/10.1109\/TSP.2022.3222734","journal-title":"IEEE Trans Signal Process"},{"issue":"1","key":"215_CR51","doi-asserted-by":"publisher","first-page":"159","DOI":"10.1109\/LRA.2023.3328448","volume":"9","author":"H Zhang","year":"2024","unstructured":"Zhang H, Zhang X, Feng Z et al (2024) Heterogeneous multi-robot cooperation with asynchronous multi-agent reinforcement learning. IEEE Robot Autom Lett 9(1):159\u2013166. https:\/\/doi.org\/10.1109\/LRA.2023.3328448","journal-title":"IEEE Robot Autom Lett"},{"key":"215_CR52","doi-asserted-by":"publisher","unstructured":"Zhang K Q, Yang Z R, Ba\u015far (2019) Multi-Agent Reinforcement Learning: A Selective Overview of Theories and Algorithms. https:\/\/doi.org\/10.48550\/arXiv.1911.10635","DOI":"10.48550\/arXiv.1911.10635"},{"key":"215_CR53","doi-asserted-by":"publisher","unstructured":"Zhang Z, Wang X H, Zhang Q R, et al (2022) Multi-robot cooperative pursuit via potential field-enhanced reinforcement learning. 2022 International Conference on Robotics and Automation (ICRA), pp 8808\u20138814. https:\/\/doi.org\/10.1109\/ICRA46639.2022.9812083","DOI":"10.1109\/ICRA46639.2022.9812083"}],"container-title":["Journal of King Saud University Computer and Information Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s44443-025-00215-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s44443-025-00215-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s44443-025-00215-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,29]],"date-time":"2025-10-29T22:03:03Z","timestamp":1761775383000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s44443-025-00215-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,24]]},"references-count":53,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2025,10]]}},"alternative-id":["215"],"URL":"https:\/\/doi.org\/10.1007\/s44443-025-00215-y","relation":{},"ISSN":["1319-1578","2213-1248"],"issn-type":[{"value":"1319-1578","type":"print"},{"value":"2213-1248","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,24]]},"assertion":[{"value":"2 May 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 July 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 September 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"243"}}