{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T06:37:16Z","timestamp":1778049436453,"version":"3.51.4"},"reference-count":171,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T00:00:00Z","timestamp":1764201600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2026,1,7]],"date-time":"2026-01-07T00:00:00Z","timestamp":1767744000000},"content-version":"vor","delay-in-days":41,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"name":"the Key Scientific and Technological Project of the Henan Province","award":["232102211059"],"award-info":[{"award-number":["232102211059"]}]},{"name":"the Key Research Projects Funding Program for Higher Education Institutions of Henan Province","award":["24A320003"],"award-info":[{"award-number":["24A320003"]}]},{"name":"by the Key Medical Science and Technology Research Project of Henan Province under Grant","award":["SBJ202103098"],"award-info":[{"award-number":["SBJ202103098"]}]},{"name":"by the Key Medical Science and Technology Research Project of Henan Province under Grant","award":["LHGJ20220662"],"award-info":[{"award-number":["LHGJ20220662"]}]},{"name":"the Key Medical Science and Technology Research Project of Henan Province under Grant","award":["SBJ202103098"],"award-info":[{"award-number":["SBJ202103098"]}]},{"name":"the Key Medical Science and Technology Research Project of Henan Province under Grant","award":["LHGJ20220662"],"award-info":[{"award-number":["LHGJ20220662"]}]},{"name":"the Key Medical Science and Technology Research Project of Henan Province under Grant","award":["SBJ202103098"],"award-info":[{"award-number":["SBJ202103098"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Artif Intell Rev"],"DOI":"10.1007\/s10462-025-11439-9","type":"journal-article","created":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T14:45:18Z","timestamp":1764254718000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":35,"title":["Reinforcement learning for single-agent to multi-agent systems: from basic theory to industrial application progress, a survey"],"prefix":"10.1007","volume":"59","author":[{"given":"Dehua","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Qingsong","family":"Yuan","sequence":"additional","affiliation":[]},{"given":"Lei","family":"Meng","sequence":"additional","affiliation":[]},{"given":"Ruixue","family":"Xia","sequence":"additional","affiliation":[]},{"given":"Wei","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Chunbin","family":"Qin","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,11,27]]},"reference":[{"key":"11439_CR1","doi-asserted-by":"crossref","unstructured":"Agrawal R, Imieli\u0144ski T, Swami A (1993) Mining association rules between sets of items in large databases. In: Proceedings of the 1993 ACM SIGMOD international conference on Management of data, pp 207\u2013216","DOI":"10.1145\/170035.170072"},{"key":"11439_CR2","doi-asserted-by":"crossref","unstructured":"Ahad A, Tahir M, Sheikh MAS, et\u00a0al (2021) Optimal route selection in 5G-based smart health-care network: a reinforcement learning approach. In: 2021 26th IEEE Asia-Pacific conference on communications (APCC), pp 248\u2013253","DOI":"10.1109\/APCC49754.2021.9609815"},{"key":"11439_CR3","doi-asserted-by":"crossref","unstructured":"Alelaiwi A (2020) Resource allocation management in patient-to-physician communications based on deep reinforcement learning in smart healthcare services. In: 2020 IEEE International conference on multimedia & expo workshops (ICMEW), pp 1\u20135","DOI":"10.1109\/ICMEW46912.2020.9105982"},{"key":"11439_CR4","volume-title":"Machine learning: an artificial intelligence approach","author":"JR Anderson","year":"1983","unstructured":"Anderson JR (1983) Machine learning: an artificial intelligence approach, vol 2. Morgan Kaufmann, Burlington"},{"key":"11439_CR5","unstructured":"Arthur D, Vassilvitskii S (2007) K-means++: the advantages of careful seeding. In: Proceedings of the eighteenth annual ACM-SIAM symposium on discrete algorithms, vol 1, pp 1027\u20131035"},{"key":"11439_CR6","doi-asserted-by":"crossref","first-page":"65956","DOI":"10.1109\/ACCESS.2024.3398059","volume":"12","author":"GS Asha Rani","year":"2024","unstructured":"Asha Rani GS, Lal Priya PS, Jayan J et al (2024) Data-driven energy management of an electric vehicle charging station using deep reinforcement learning. IEEE Access 12:65956\u201365966","journal-title":"IEEE Access"},{"issue":"1","key":"11439_CR7","doi-asserted-by":"crossref","first-page":"51","DOI":"10.1109\/TG.2022.3208324","volume":"16","author":"H Bai","year":"2024","unstructured":"Bai H, Shen R, Lin Y et al (2024) Lamarckian platform: pushing the boundaries of evolutionary reinforcement learning toward asynchronous commercial games. IEEE Trans Games 16(1):51\u201363","journal-title":"IEEE Trans Games"},{"issue":"5","key":"11439_CR8","first-page":"679","volume":"6","author":"R Bellman","year":"1957","unstructured":"Bellman R (1957a) A Markovian decision process. J Math Mech 6(5):679\u2013684","journal-title":"J Math Mech"},{"key":"11439_CR9","volume-title":"Dynamic programming","author":"RE Bellman","year":"1957","unstructured":"Bellman RE (1957b) Dynamic programming. Princeton University Press, Princeton"},{"issue":"3","key":"11439_CR10","doi-asserted-by":"crossref","first-page":"310","DOI":"10.1007\/s11768-011-1005-3","volume":"9","author":"DP Bertsekas","year":"2011","unstructured":"Bertsekas DP (2011) Approximate policy iteration: a survey and some new methods. J Control Theory Appl 9(3):310\u2013335","journal-title":"J Control Theory Appl"},{"key":"11439_CR11","volume-title":"Dynamic programming and optimal control","author":"D Bertsekas","year":"2012","unstructured":"Bertsekas D (2012) Dynamic programming and optimal control, vol 4. Athena scientific, Nashua"},{"issue":"1","key":"11439_CR12","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L Breiman","year":"2001","unstructured":"Breiman L (2001) Random forests. Mach Learn 45(1):5\u201332","journal-title":"Mach Learn"},{"issue":"5","key":"11439_CR13","doi-asserted-by":"crossref","first-page":"4346","DOI":"10.1109\/TITS.2023.3330183","volume":"25","author":"H Cao","year":"2024","unstructured":"Cao H, Xiong H, Zeng W et al (2024) Safe reinforcement learning-based motion planning for functional mobile robots suffering uncontrollable mobile robots. IEEE Trans Intell Transp Syst 25(5):4346\u20134363","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"11439_CR14","unstructured":"Casta\u00f1eda AO (2016) Deep reinforcement learning variants of multi-agent learning algorithms. PhD thesis, University of Edinburgh, Edinburgh"},{"issue":"3","key":"11439_CR15","doi-asserted-by":"crossref","first-page":"542","DOI":"10.1109\/TNN.2009.2015974","volume":"20","author":"O Chapelle","year":"2009","unstructured":"Chapelle O, Scholkopf B, Zien A (2009) Semi-supervised learning (chapelle, O. et\u00a0al., eds.; 2006) [book reviews]. IEEE Trans Neural Netw 20(3):542\u2013542","journal-title":"IEEE Trans Neural Netw"},{"issue":"3","key":"11439_CR16","doi-asserted-by":"crossref","first-page":"2969","DOI":"10.1109\/TNNLS.2023.3290974","volume":"35","author":"H Chen","year":"2023","unstructured":"Chen H, Luo H, Huang B et al (2023) Transfer learning-motivated intelligent fault diagnosis designs: a survey, insights, and perspectives. IEEE Trans Neural Netw Learning Syst 35(3):2969\u20132983","journal-title":"IEEE Trans Neural Netw Learning Syst"},{"issue":"11","key":"11439_CR17","doi-asserted-by":"crossref","first-page":"15947","DOI":"10.1109\/TITS.2024.3410023","volume":"25","author":"W Chen","year":"2024","unstructured":"Chen W, Yang S, Li W et al (2024) Learning multi-intersection traffic signal control via coevolutionary multi-agent reinforcement learning. IEEE Trans Intell Transp Syst 25(11):15947\u201315963","journal-title":"IEEE Trans Intell Transp Syst"},{"issue":"4","key":"11439_CR18","doi-asserted-by":"crossref","first-page":"225","DOI":"10.3390\/wevj16040225","volume":"16","author":"J Chen","year":"2025","unstructured":"Chen J, Zhu B, Zhang M et al (2025) Multi-agent deep reinforcement learning cooperative control model for autonomous vehicle merging into platoon in highway. World Electr Veh J 16(4):225","journal-title":"World Electr Veh J"},{"key":"11439_CR19","doi-asserted-by":"crossref","unstructured":"Diprasetya MR, Pullani AN, Schwung A (2024) Sim-to-real transfer for robotics using model-free curriculum reinforcement learning. In: 2024 IEEE international conference on industrial technology (ICIT), pp 1\u20136","DOI":"10.1109\/ICIT58233.2024.10540995"},{"key":"11439_CR20","doi-asserted-by":"crossref","unstructured":"Drudi C, Fechner M, Mollura M et\u00a0al (2024) Reinforcement learning for heart failure treatment optimization in the intensive care unit. In: 2024 46th annual international conference of the IEEE Engineering in Medicine and Biology Society (EMBC), pp 1\u20134","DOI":"10.1109\/EMBC53108.2024.10781564"},{"key":"11439_CR21","unstructured":"Foerster JN, Assael YM, de\u00a0Freitas N et\u00a0al (2016) Learning to communicate with deep multi-agent reinforcement learning. In: Proceedings of the 30th international conference on neural information processing systems, Barcelona, pp 2145\u20132153"},{"key":"11439_CR22","doi-asserted-by":"crossref","unstructured":"Foerster J, Farquhar G, Afouras T et\u00a0al (2017) Counterfactual multi-agent policy gradients. arXiv:1705.08926","DOI":"10.1609\/aaai.v32i1.11794"},{"issue":"5","key":"11439_CR23","doi-asserted-by":"crossref","first-page":"826","DOI":"10.1109\/TSMC.1983.6313076","volume":"13","author":"K Fukushima","year":"1983","unstructured":"Fukushima K, Miyake S, Ito T (1983) Neocognitron: a neural network model for a mechanism of visual pattern recognition. IEEE Trans Syst Man Cybern 13(5):826\u2013834","journal-title":"IEEE Trans Syst Man Cybern"},{"key":"11439_CR24","volume-title":"Theoria Motus Corporum Coelestium in Sectionibus Conicis Solem Ambientium","author":"CF Gauss","year":"1809","unstructured":"Gauss CF (1809) Theoria Motus Corporum Coelestium in Sectionibus Conicis Solem Ambientium. Perthes et Besser, Hamburg"},{"key":"11439_CR25","doi-asserted-by":"crossref","first-page":"94275","DOI":"10.1109\/ACCESS.2025.3573096","volume":"13","author":"P Geranmayeh","year":"2025","unstructured":"Geranmayeh P, Grass E (2025) Optimization of beamforming and transmit power using DGN and comparison with traditional techniques. IEEE Access 13:94275\u201394285","journal-title":"IEEE Access"},{"issue":"12","key":"11439_CR26","doi-asserted-by":"crossref","first-page":"1593","DOI":"10.1039\/B907946G","volume":"5","author":"P Geurts","year":"2009","unstructured":"Geurts P, Irrthum A, Wehenkel L (2009) Supervised learning with decision tree-based methods in computational and systems biology. Mol BioSyst 5(12):1593\u20131605","journal-title":"Mol BioSyst"},{"issue":"11","key":"11439_CR27","doi-asserted-by":"crossref","first-page":"5371","DOI":"10.1109\/TAI.2024.3428510","volume":"5","author":"J Gong","year":"2024","unstructured":"Gong J, Yu N, Han F et al (2024) Energy scheduling optimization for microgrids based on partially observable Markov game. IEEE Trans Artif Intell 5(11):5371\u20135380","journal-title":"IEEE Trans Artif Intell"},{"key":"11439_CR28","doi-asserted-by":"crossref","unstructured":"Guan H (2020) Analysis on deep reinforcement learning in industrial robotic arm. In: 2020 international conference on intelligent computing and human\u2013computer interaction (ICHCI), pp 426\u2013430","DOI":"10.1109\/ICHCI51889.2020.00094"},{"key":"11439_CR29","doi-asserted-by":"crossref","unstructured":"Guan H, Gao Y, Zhao M et\u00a0al (2022) Ab-mapper: attention and bicnet based multi-agent path planning for dynamic environment. In: 2022 IEEE\/RSJ international conference on intelligent robots and systems (IROS), IEEE, pp 13799\u201313806","DOI":"10.1109\/IROS47612.2022.9981513"},{"issue":"12","key":"11439_CR30","doi-asserted-by":"crossref","first-page":"9052","DOI":"10.1109\/TPAMI.2024.3415112","volume":"46","author":"J Gui","year":"2024","unstructured":"Gui J, Chen T, Zhang J et al (2024) A survey on self-supervised learning: algorithms, applications, and future trends. IEEE Trans Pattern Anal Mach Intell 46(12):9052\u20139071","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"2","key":"11439_CR31","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/335191.335372","volume":"29","author":"J Han","year":"2000","unstructured":"Han J, Pei J, Yin Y (2000) Mining frequent patterns without candidate generation. ACM SIGMOD Rec 29(2):1\u201312","journal-title":"ACM SIGMOD Rec"},{"key":"11439_CR32","unstructured":"Hausknecht MJ, Stone P (2015) Deep recurrent Q-learning for partially observable MDPs. In: AAAI fall symposia, p 141"},{"key":"11439_CR33","doi-asserted-by":"crossref","DOI":"10.1002\/0471722146","volume-title":"Applied logistic regression","author":"DW Hosmer","year":"2000","unstructured":"Hosmer DW, Lemeshow S (2000) Applied logistic regression. Wiley, Hoboken"},{"issue":"1","key":"11439_CR34","doi-asserted-by":"crossref","first-page":"568","DOI":"10.1109\/TIV.2023.3316196","volume":"9","author":"Y Hou","year":"2024","unstructured":"Hou Y, Zhao J, Zhang R et al (2024) UAV swarm cooperative target search: a multi-agent reinforcement learning approach. IEEE Trans Intell Veh 9(1):568\u2013578","journal-title":"IEEE Trans Intell Veh"},{"key":"11439_CR35","doi-asserted-by":"crossref","first-page":"193581","DOI":"10.1109\/ACCESS.2024.3520357","volume":"12","author":"B Hu","year":"2024","unstructured":"Hu B, Gong Y, Liang X et al (2024) Safe deep reinforcement learning-based real-time multi-energy management in combined heat and power microgrids. IEEE Access 12:193581\u2013193593","journal-title":"IEEE Access"},{"issue":"1","key":"11439_CR36","doi-asserted-by":"crossref","first-page":"127","DOI":"10.1016\/j.datak.2010.09.002","volume":"70","author":"Z Huang","year":"2011","unstructured":"Huang Z, van der Aalst W, Lu X et al (2011) Reinforcement learning based resource allocation in business process management. Data Knowl Eng 70(1):127\u2013145","journal-title":"Data Knowl Eng"},{"key":"11439_CR37","unstructured":"Iqbal S, Sha F (2018) Actor-attention-critic for multi-agent reinforcement learning. arXiv:1810.02912"},{"key":"11439_CR38","doi-asserted-by":"crossref","unstructured":"Iskandar A, Rostum HM, Kov\u00e1cs B (2023) Using deep reinforcement learning to solve a navigation problem for a swarm robotics system. In: 2023 24th international Carpathian control conference (ICCC), pp 185\u2013189","DOI":"10.1109\/ICCC57093.2023.10178888"},{"issue":"3","key":"11439_CR39","doi-asserted-by":"crossref","first-page":"4336","DOI":"10.1109\/TNNLS.2024.3377370","volume":"36","author":"D Isla-Cernadas","year":"2025","unstructured":"Isla-Cernadas D, Fern\u00e1ndez-Delgado M, Cernadas E et al (2025) Closed-form gaussian spread estimation for small and large support vector classification. IEEE Trans Neural Netw Learn Syst 36(3):4336\u20134344","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"issue":"11","key":"11439_CR40","doi-asserted-by":"crossref","first-page":"16625","DOI":"10.1109\/TITS.2024.3416154","volume":"25","author":"Q Jiang","year":"2024","unstructured":"Jiang Q, Qin M, Zhang H et al (2024) Blindlight: high robustness reinforcement learning method to solve partially blinded traffic signal control problem. IEEE Trans Intell Transp Syst 25(11):16625\u201316641","journal-title":"IEEE Trans Intell Transp Syst"},{"issue":"1","key":"11439_CR41","doi-asserted-by":"crossref","first-page":"318","DOI":"10.26599\/TST.2024.9010013","volume":"30","author":"Y Jiang","year":"2025","unstructured":"Jiang Y, Di K, Qian R et al (2025) Optimizing risk-aware task migration algorithm among multiplex UAV groups through hybrid attention multi-agent reinforcement learning. Tsinghua Sci Technol 30(1):318\u2013330","journal-title":"Tsinghua Sci Technol"},{"key":"11439_CR42","unstructured":"Jordan MI, Kearns MJ, Solla SA (1998) Advances in neural information processing systems. In: proceedings of the 1997 conference, vol\u00a010. MIT Press"},{"key":"11439_CR43","doi-asserted-by":"crossref","unstructured":"Kaloev M, Krastev G (2023) Comprehensive review of benefits from the use of neuron connection pruning techniques during the training process of artificial neural networks in reinforcement learning: experimental simulations in atari games. In: 2023 7th International symposium on multidisciplinary studies and innovative technologies (ISMSIT), pp 1\u20136","DOI":"10.1109\/ISMSIT58785.2023.10304968"},{"issue":"3","key":"11439_CR44","doi-asserted-by":"crossref","first-page":"722","DOI":"10.1109\/TG.2024.3355172","volume":"16","author":"S Karimi","year":"2024","unstructured":"Karimi S, Asadi S, Payberah AH (2024) Bazigooshi: a hybrid model of reinforcement learning for generalization in gameplay. IEEE Trans Games 16(3):722\u2013734","journal-title":"IEEE Trans Games"},{"key":"11439_CR45","doi-asserted-by":"crossref","unstructured":"Kell AJM, Forshaw M, Stephen\u00a0McGough A (2020) Exploring market power using deep reinforcement learning for intelligent bidding strategies. In: 2020 IEEE international conference on big data (big data), pp 4402\u20134411","DOI":"10.1109\/BigData50022.2020.9378137"},{"key":"11439_CR46","doi-asserted-by":"crossref","first-page":"48059","DOI":"10.1109\/ACCESS.2023.3266331","volume":"11","author":"S Kim","year":"2023","unstructured":"Kim S (2023) Learning and game based spectrum allocation model for internet of medical things (IOMT) platform. IEEE Access 11:48059\u201348068","journal-title":"IEEE Access"},{"key":"11439_CR47","unstructured":"Kim D, Moon S, Hostallero D et\u00a0al (2019) Learning to schedule communication in multi-agent reinforcement learning. arXiv:1902.01554"},{"issue":"6","key":"11439_CR48","doi-asserted-by":"crossref","first-page":"4909","DOI":"10.1109\/TITS.2021.3054625","volume":"23","author":"BR Kiran","year":"2022","unstructured":"Kiran BR, Sobh I, Talpaert V et al (2022) Deep reinforcement learning for autonomous driving: a survey. IEEE Trans Intell Transpl Syst 23(6):4909\u20134926","journal-title":"IEEE Trans Intell Transpl Syst"},{"key":"11439_CR49","doi-asserted-by":"crossref","unstructured":"Kobren A, Monath N, Krishnamurthy A et\u00a0al (2017) A hierarchical algorithm for extreme clustering. In: Proceedings of the 23rd ACM SIGKDD international conference on knowledge discovery and data mining, pp 255\u2013264","DOI":"10.1145\/3097983.3098079"},{"issue":"Dec","key":"11439_CR50","first-page":"1107","volume":"4","author":"MG Lagoudakis","year":"2003","unstructured":"Lagoudakis MG, Parr R (2003) Least-squares policy iteration. J Mach Learn Res 4(Dec):1107\u20131149","journal-title":"J Mach Learn Res"},{"issue":"4","key":"11439_CR51","doi-asserted-by":"crossref","first-page":"373","DOI":"10.1093\/comjnl\/9.4.373","volume":"9","author":"GN Lance","year":"1967","unstructured":"Lance GN, Williams WT (1967) A general theory of classificatory sorting strategies: 1. Hierarchical systems. Comput J 9(4):373\u2013380","journal-title":"Comput J"},{"issue":"6","key":"11439_CR52","doi-asserted-by":"crossref","first-page":"1088","DOI":"10.1109\/TSMCA.2007.904745","volume":"37","author":"M Li","year":"2007","unstructured":"Li M, Zhou ZH (2007) Improve computer-aided diagnosis with machine learning techniques using undiagnosed samples. IEEE Trans Syst Man Cybern-Part A 37(6):1088\u20131098","journal-title":"IEEE Trans Syst Man Cybern-Part A"},{"issue":"1","key":"11439_CR53","doi-asserted-by":"crossref","first-page":"957","DOI":"10.1109\/TNSE.2023.3311047","volume":"11","author":"K Li","year":"2024","unstructured":"Li K, Hu Q, Liu Q et al (2024a) A predefined-time consensus algorithm of multi-agent system for distributed constrained optimization. IEEE Trans Netw Sci Eng 11(1):957\u2013968","journal-title":"IEEE Trans Netw Sci Eng"},{"issue":"8","key":"11439_CR54","doi-asserted-by":"crossref","first-page":"10035","DOI":"10.1109\/TITS.2024.3354196","volume":"25","author":"X Li","year":"2024","unstructured":"Li X, Yang Y, Yuan Z et al (2024b) Progression cognition reinforcement learning with prioritized experience for multi-vehicle pursuit. IEEE Trans Intell Transp Syst 25(8):10035\u201310048","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"11439_CR55","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A et\u00a0al (2015) Continuous control with deep reinforcement learning. arXiv:1509.02971"},{"issue":"3","key":"11439_CR56","doi-asserted-by":"crossref","first-page":"5574","DOI":"10.1109\/TNNLS.2024.3379207","volume":"36","author":"M Lin","year":"2025","unstructured":"Lin M, Zhao B, Liu D (2025) Optimal learning output tracking control: a model-free policy optimization method with convergence analysis. IEEE Trans Neural Netw Learn Syst 36(3):5574\u20135585","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"11439_CR57","doi-asserted-by":"crossref","unstructured":"Littman ML (1994) Markov games as a framework for multi-agent reinforcement learning. In: Machine learning proceedings 1994. Elsevier, pp 157\u2013163","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"11439_CR58","doi-asserted-by":"crossref","unstructured":"Liu Y, Wang Q (2022) Game confrontation of 5v5 multi-agent based on Mappo reinforcement learning algorithm. In: 2022 37th youth academic annual conference of chinese association of automation (YAC), pp 1395\u20131398","DOI":"10.1109\/YAC57282.2022.10023568"},{"issue":"3","key":"11439_CR59","doi-asserted-by":"crossref","first-page":"621","DOI":"10.1109\/TNNLS.2013.2281663","volume":"25","author":"D Liu","year":"2013","unstructured":"Liu D, Wei Q (2013) Policy iteration adaptive dynamic programming algorithm for discrete-time nonlinear systems. IEEE Trans Neural Netw Learning Syst 25(3):621\u2013634","journal-title":"IEEE Trans Neural Netw Learning Syst"},{"issue":"3","key":"11439_CR60","doi-asserted-by":"crossref","first-page":"621","DOI":"10.1109\/TNNLS.2013.2281663","volume":"25","author":"D Liu","year":"2014","unstructured":"Liu D, Wei Q (2014) Policy iteration adaptive dynamic programming algorithm for discrete-time nonlinear systems. IEEE Trans Neural Netw Learn Syst 25(3):621\u2013634","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"11439_CR61","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-319-50815-3","volume-title":"Adaptive dynamic programming with applications in optimal control","author":"D Liu","year":"2017","unstructured":"Liu D, Wei Q, Wang D et al (2017) Adaptive dynamic programming with applications in optimal control. Springer, Cham"},{"issue":"1","key":"11439_CR62","doi-asserted-by":"crossref","first-page":"142","DOI":"10.1109\/TSMC.2020.3042876","volume":"51","author":"D Liu","year":"2021","unstructured":"Liu D, Xue S, Zhao B et al (2021a) Adaptive dynamic programming for control: a survey and recent advances. IEEE Trans Syst Man Cybern Syst 51(1):142\u2013160","journal-title":"IEEE Trans Syst Man Cybern Syst"},{"issue":"1","key":"11439_CR63","first-page":"857","volume":"35","author":"X Liu","year":"2021","unstructured":"Liu X, Zhang F, Hou Z et al (2021b) Self-supervised learning: generative or contrastive. IEEE Trans Knowl Data Eng 35(1):857\u2013876","journal-title":"IEEE Trans Knowl Data Eng"},{"issue":"4","key":"11439_CR64","doi-asserted-by":"crossref","first-page":"4372","DOI":"10.1109\/TVT.2022.3228198","volume":"72","author":"D Liu","year":"2023","unstructured":"Liu D, Dou L, Zhang R et al (2023) Multi-agent reinforcement learning-based coordinated dynamic task allocation for heterogenous UAVs. IEEE Trans Veh Technol 72(4):4372\u20134383","journal-title":"IEEE Trans Veh Technol"},{"issue":"5","key":"11439_CR65","doi-asserted-by":"crossref","first-page":"5527","DOI":"10.1109\/TMC.2023.3312220","volume":"23","author":"B Liu","year":"2024","unstructured":"Liu B, Han W, Wang E et al (2024) An efficient message dissemination scheme for cooperative drivings via cooperative hierarchical attention reinforcement learning. IEEE Trans Mob Comput 23(5):5527\u20135542","journal-title":"IEEE Trans Mob Comput"},{"issue":"2","key":"11439_CR66","doi-asserted-by":"crossref","first-page":"129","DOI":"10.1109\/TIT.1982.1056489","volume":"28","author":"SP Lloyd","year":"1982","unstructured":"Lloyd SP (1982) Least squares quantization in PCM. IEEE Trans Inf Theory 28(2):129\u2013137","journal-title":"IEEE Trans Inf Theory"},{"issue":"10","key":"11439_CR67","doi-asserted-by":"crossref","first-page":"12461","DOI":"10.1109\/TII.2024.3424529","volume":"20","author":"R Lu","year":"2024","unstructured":"Lu R, Jiang Z, Yang T et al (2024) A novel hybrid-action-based deep reinforcement learning for industrial energy management. IEEE Trans Ind Inf 20(10):12461\u201312475","journal-title":"IEEE Trans Ind Inf"},{"issue":"10","key":"11439_CR68","doi-asserted-by":"crossref","first-page":"14209","DOI":"10.1109\/TITS.2024.3400224","volume":"25","author":"Z Ma","year":"2024","unstructured":"Ma Z, Liu X, Huang Y (2024) Unsupervised reinforcement learning for multi-task autonomous driving: expanding skills and cultivating curiosity. IEEE Trans Intell Transpl Syst 25(10):14209\u201314219","journal-title":"IEEE Trans Intell Transpl Syst"},{"key":"11439_CR69","doi-asserted-by":"crossref","unstructured":"Malathy V, Al-Jawahry HM, GKM et\u00a0al (2024) A reinforcement learning method in cooperative multi-agent system for production control system. In: 2024 international conference on data science and network security (ICDSNS), pp 1\u20134","DOI":"10.1109\/ICDSNS62112.2024.10691212"},{"issue":"6","key":"11439_CR70","doi-asserted-by":"crossref","first-page":"1393","DOI":"10.1109\/TSMCB.2009.2017273","volume":"39","author":"JR Marden","year":"2009","unstructured":"Marden JR, Arslan G, Shamma JS (2009) Cooperative control and potential games. IEEE Trans Syst Man Cybern B 39(6):1393\u20131407","journal-title":"IEEE Trans Syst Man Cybern B"},{"issue":"1","key":"11439_CR71","first-page":"135","volume":"15","author":"AA Markov","year":"1906","unstructured":"Markov AA (1906) Extension of the law of large numbers to dependent quantities. Izv Fiz-Matem Obsch Kazan Univ 15(1):135\u2013156","journal-title":"Izv Fiz-Matem Obsch Kazan Univ"},{"key":"11439_CR72","unstructured":"Ministry of Industry and Information Technology of the People\u2019s Republic of China (2024) Notice on issuing the national comprehensive standardization system construction guidelines for the artificial intelligence industry (2024 edition). https:\/\/www.miit.gov.cn\/zwgk\/zcwj\/wjfb\/tz\/art\/2024\/art_e8ebf5600ec24d3db644150873712c5f.html"},{"issue":"1","key":"11439_CR73","doi-asserted-by":"crossref","first-page":"8","DOI":"10.1109\/JRPROC.1961.287775","volume":"49","author":"M Minsky","year":"2007","unstructured":"Minsky M (2007) Steps toward artificial intelligence. Proc IRE 49(1):8\u201330","journal-title":"Proc IRE"},{"issue":"7540","key":"11439_CR74","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"key":"11439_CR76","unstructured":"Mnih V, Kavukcuoglu K, Silver D et\u00a0al (2013) Playing Atari with deep reinforcement learning. arXiv:1312.5602"},{"key":"11439_CR75","unstructured":"Mnih V, Badia AP, Mirza M et\u00a0al (2016) Asynchronous methods for deep reinforcement learning. In: International conference on machine learning, PmLR, pp 1928\u20131937"},{"issue":"4","key":"11439_CR77","doi-asserted-by":"crossref","first-page":"3698","DOI":"10.1109\/TSG.2018.2834219","volume":"10","author":"E Mocanu","year":"2019","unstructured":"Mocanu E, Mocanu DC, Nguyen PH et al (2019) On-line building energy optimization using deep reinforcement learning. IEEE Trans Smart Grid 10(4):3698\u20133708","journal-title":"IEEE Trans Smart Grid"},{"issue":"3","key":"11439_CR78","doi-asserted-by":"crossref","first-page":"133","DOI":"10.1038\/s42256-019-0025-4","volume":"1","author":"EO Neftci","year":"2019","unstructured":"Neftci EO, Averbeck BB (2019) Reinforcement learning in artificial and biological systems. Nat Mach Intell 1(3):133\u2013143","journal-title":"Nat Mach Intell"},{"issue":"5","key":"11439_CR79","doi-asserted-by":"crossref","first-page":"2536","DOI":"10.1109\/JBHI.2023.3249571","volume":"27","author":"G Noaro","year":"2023","unstructured":"Noaro G, Zhu T, Cappon G et al (2023) A personalized and adaptive insulin bolus calculator based on double deep q-learning to improve type 1 diabetes management. IEEE J Biomed Health Inform 27(5):2536\u20132544","journal-title":"IEEE J Biomed Health Inform"},{"issue":"2","key":"11439_CR80","doi-asserted-by":"crossref","first-page":"175","DOI":"10.1145\/568271.223813","volume":"24","author":"JS Park","year":"1995","unstructured":"Park JS, Chen MS, Yu PS (1995) An effective hash-based algorithm for mining association rules. ACM SIGMOD Rec 24(2):175\u2013186","journal-title":"ACM SIGMOD Rec"},{"issue":"11","key":"11439_CR81","doi-asserted-by":"crossref","first-page":"559","DOI":"10.1080\/14786440109462720","volume":"2","author":"K Pearson","year":"1901","unstructured":"Pearson K (1901) Liii. On lines and planes of closest fit to systems of points in space. Lond Edinburgh Dublin Philos Mag J Sci 2(11):559\u2013572","journal-title":"Lond Edinburgh Dublin Philos Mag J Sci"},{"key":"11439_CR82","unstructured":"Peng P, Yuan Q, Wen Y et\u00a0al (2017) Multiagent bidirectionally-coordinated nets for learning to play Starcraft combat games. arXiv:1703.10069"},{"issue":"8","key":"11439_CR83","doi-asserted-by":"crossref","first-page":"10237","DOI":"10.1109\/TNNLS.2023.3250269","volume":"35","author":"RF Prudencio","year":"2023","unstructured":"Prudencio RF, Maximo MR, Colombini EL (2023) A survey on offline reinforcement learning: taxonomy, review, and open problems. IEEE Trans Neural Netw Learn Syst 35(8):10237\u201357","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"11439_CR84","doi-asserted-by":"crossref","first-page":"1092","DOI":"10.1109\/ACCESS.2023.3345343","volume":"12","author":"L Qi","year":"2024","unstructured":"Qi L, Sun Y, Luan W (2024) Large-scale traffic signal control based on multi-agent q-learning and pressure. IEEE Access 12:1092\u20131101","journal-title":"IEEE Access"},{"issue":"1","key":"11439_CR85","doi-asserted-by":"crossref","first-page":"50","DOI":"10.1109\/TSMC.2023.3302656","volume":"54","author":"C Qin","year":"2024","unstructured":"Qin C, Qiao X, Wang J et al (2024) Barrier-critic adaptive robust control of nonzero-sum differential games for uncertain nonlinear systems with state constraints. IEEE Trans Syst Man Cybern Syst 54(1):50\u201363","journal-title":"IEEE Trans Syst Man Cybern Syst"},{"key":"11439_CR86","volume":"161","author":"C Qin","year":"2025","unstructured":"Qin C, Hou S, Pang M et al (2025a) Reinforcement learning-based secure tracking control for nonlinear interconnected systems: an event-triggered solution approach. Eng Appl Artif Intell 161:112243","journal-title":"Eng Appl Artif Intell"},{"key":"11439_CR87","volume":"144","author":"C Qin","year":"2025","unstructured":"Qin C, Jiang K, Wang Y et al (2025b) Event-triggered H\u221e control for unknown constrained nonlinear systems with application to robot arm. Appl Math Model 144:116089","journal-title":"Appl Math Model"},{"key":"11439_CR88","volume":"183","author":"C Qin","year":"2025","unstructured":"Qin C, Ran X, Zhang D (2025c) Unsupervised image stitching based on generative adversarial networks and feature frequency awareness algorithm. Appl Soft Comput 183:113466","journal-title":"Appl Soft Comput"},{"issue":"3","key":"11439_CR89","doi-asserted-by":"crossref","first-page":"3448","DOI":"10.1109\/TNSM.2021.3087258","volume":"18","author":"G Qu","year":"2021","unstructured":"Qu G, Wu H, Li R et al (2021) DMRO: a deep meta reinforcement learning-based task offloading framework for edge-cloud computing. IEEE Trans Netw Serv Manage 18(3):3448\u20133459","journal-title":"IEEE Trans Netw Serv Manage"},{"key":"11439_CR91","unstructured":"Rashid T, Samvelyan M, Schroeder C et\u00a0al (2018) Qmix: monotonic value function factorisation for deep multi-agent reinforcement learning. In: International conference on machine learning, PMLR, pp 4295\u20134304"},{"key":"11439_CR90","unstructured":"Rashid T, Farquhar G, Peng B et\u00a0al (2020) Weighted qmix: expanding monotonic value function factorisation for deep multi-agent reinforcement learning. In: Advances in neural information processing systems, pp 10199\u201310210"},{"key":"11439_CR92","doi-asserted-by":"crossref","first-page":"48401","DOI":"10.1109\/ACCESS.2025.3551224","volume":"13","author":"S Rattal","year":"2025","unstructured":"Rattal S, Badri A, Moughit M et al (2025) Ai-driven optimization of low-energy IoT protocols for scalable and efficient smart healthcare systems. IEEE Access 13:48401\u201348415","journal-title":"IEEE Access"},{"key":"11439_CR93","doi-asserted-by":"crossref","unstructured":"Rawat RS, Rana DS (2023) Implementation of reinforcement learning and imaging for better decision-making in the medical sector. In: 2023 IEEE 8th international conference for convergence in technology (I2CT), pp 1\u20134","DOI":"10.1109\/I2CT57861.2023.10126243"},{"issue":"10","key":"11439_CR94","doi-asserted-by":"crossref","first-page":"12021","DOI":"10.1109\/TII.2024.3413356","volume":"20","author":"Y Ren","year":"2024","unstructured":"Ren Y, Zhang H, Du L et al (2024) Stealthy black-box attack with dynamic threshold against Marl-based traffic signal control system. IEEE Trans Ind Inf 20(10):12021\u201312031","journal-title":"IEEE Trans Ind Inf"},{"issue":"6","key":"11439_CR95","doi-asserted-by":"crossref","first-page":"386","DOI":"10.1037\/h0042519","volume":"65","author":"F Rosenblatt","year":"1958","unstructured":"Rosenblatt F (1958) The perceptron: a probabilistic model for information storage and organization in the brain. Psychol Rev 65(6):386\u2013408","journal-title":"Psychol Rev"},{"key":"11439_CR96","volume-title":"On-line Q-learning using connectionist systems","author":"GA Rummery","year":"1994","unstructured":"Rummery GA, Niranjan M (1994) On-line Q-learning using connectionist systems, vol 37. University of Cambridge, Department of Engineering, Cambridge"},{"key":"11439_CR97","doi-asserted-by":"crossref","unstructured":"Saeed AK, Holguin F, Yasin AS et\u00a0al (2024) Multi-agent and multi-target reinforcement learning for satellite sensor tasking. In: 2024 IEEE aerospace conference, pp 1\u201313","DOI":"10.1109\/AERO58975.2024.10521035"},{"issue":"3","key":"11439_CR98","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3068335","volume":"42","author":"E Schubert","year":"2017","unstructured":"Schubert E, Sander J, Ester M et al (2017) DBSCAN revisited, revisited: why and how you should (still) use DBSCAN. ACM Trans Database Syst (TODS) 42(3):1\u201321","journal-title":"ACM Trans Database Syst (TODS)"},{"issue":"1","key":"11439_CR99","doi-asserted-by":"crossref","first-page":"239","DOI":"10.1016\/j.ejor.2011.09.007","volume":"218","author":"HJ Sch\u00fctz","year":"2012","unstructured":"Sch\u00fctz HJ, Kolisch R (2012) Approximate dynamic programming for capacity allocation in the service industry. Eur J Oper Res 218(1):239\u2013250","journal-title":"Eur J Oper Res"},{"key":"11439_CR100","doi-asserted-by":"crossref","unstructured":"Schwung A, Schwung D, Abdul\u00a0Hameed MS (2019a) Cooperative robot control in flexible manufacturing cells: centralized vs. distributed approaches. In: 2019 IEEE 17th international conference on industrial informatics (INDIN), pp 233\u2013238","DOI":"10.1109\/INDIN41052.2019.8972060"},{"issue":"4","key":"11439_CR101","doi-asserted-by":"crossref","first-page":"360","DOI":"10.47839\/ijc.18.4.1607","volume":"18","author":"D Schwung","year":"2019","unstructured":"Schwung D, Schwung A, Ding SX (2019b) Actor-critic reinforcement learning for energy optimization in hybrid production environments. Int J Comput 18(4):360\u2013371","journal-title":"Int J Comput"},{"issue":"4","key":"11439_CR102","doi-asserted-by":"crossref","first-page":"2174","DOI":"10.1109\/TCYB.2020.3006620","volume":"52","author":"D Schwung","year":"2022","unstructured":"Schwung D, Schwung A, Ding SX (2022) Distributed self-optimization of modular production units: a state-based potential game approach. IEEE Trans Cybern 52(4):2174\u20132185","journal-title":"IEEE Trans Cybern"},{"issue":"4","key":"11439_CR103","doi-asserted-by":"crossref","first-page":"8432","DOI":"10.1109\/TTE.2024.3372945","volume":"10","author":"C Shang","year":"2024","unstructured":"Shang C, Fu L, Xiao H et al (2024) Joint optimization of power generation and voyage scheduling in ship power system based on operating scene clustering and multitask deep reinforcement learning. IEEE Trans Transp Electr 10(4):8432\u20138442","journal-title":"IEEE Trans Transp Electr"},{"key":"11439_CR104","doi-asserted-by":"crossref","unstructured":"Shen X, Zhang X, Wang Y (2021) Kernel temporal difference based reinforcement learning for brain machine interfaces. In: 2021 43rd annual international conference of the IEEE engineering in medicine & biology society (EMBC), pp 6721\u20136724","DOI":"10.1109\/EMBC46164.2021.9631086"},{"key":"11439_CR105","doi-asserted-by":"crossref","unstructured":"Shi B, Yuan H, Shi R (2018) Pricing cloud resource based on multi-agent reinforcement learning in the competing environment. In: 2018 IEEE Intl conf on parallel & distributed processing with applications, ubiquitous computing & communications, big data & Cloud computing, social computing & networking, sustainable computing & communications (ISPA\/IUCC\/BDCloud\/SocialCom\/SustainCom), pp 462\u2013468","DOI":"10.1109\/BDCloud.2018.00076"},{"key":"11439_CR106","unstructured":"Silver D, Lever G, Heess N et\u00a0al (2014) Deterministic policy gradient algorithms. In: International conference on machine learning, PMLR, pp 387\u2013395"},{"key":"11439_CR107","unstructured":"Singh A, Jain T, Sukhbaatar S (2019) Learning when to communicate at scale in multiagent cooperative and competitive tasks. In: 7th international conference on learning representations, ICLR 2019, New Orleans, LA, USA"},{"issue":"5","key":"11439_CR108","doi-asserted-by":"crossref","first-page":"221","DOI":"10.1037\/h0047662","volume":"11","author":"BF Skinner","year":"1956","unstructured":"Skinner BF (1956) A case history in scientific method. Am Psychol 11(5):221\u2013233","journal-title":"Am Psychol"},{"issue":"3","key":"11439_CR109","doi-asserted-by":"crossref","first-page":"94","DOI":"10.1037\/h0049039","volume":"13","author":"BF Skinner","year":"1958","unstructured":"Skinner BF (1958) Reinforcement today. Am Psychol 13(3):94\u201399","journal-title":"Am Psychol"},{"key":"11439_CR110","unstructured":"Son K, Kim D, Kang WJ et\u00a0al (2019) QTRAN: learning to factorize with transformation for cooperative multi-agent reinforcement learning. In: International conference on machine learning, PMLR, pp 5887\u20135896"},{"key":"11439_CR111","unstructured":"Son K, Kim D, Kang WJ et\u00a0al (2020) Qtran++: improved value transformation for cooperative multi-agent reinforcement learning. arXiv:2006.12010"},{"key":"11439_CR112","doi-asserted-by":"crossref","unstructured":"Sui F, Yue W, Zhang Z et\u00a0al (2023) Trial-and-error learning for mems structural design enabled by deep reinforcement learning. In: 2023 IEEE 36th international conference on micro electro mechanical systems (MEMS), pp 503\u2013506","DOI":"10.1109\/MEMS49605.2023.10052277"},{"key":"11439_CR113","unstructured":"Sukhbaatar S, Fergus R (2016) Learning multiagent communication with backpropagation. In: Proceedings of the 29th conference on neural information processing systems, NIPS, Barcelona, pp 2252\u20132260"},{"issue":"7","key":"11439_CR114","first-page":"1301","volume":"46","author":"C Sun","year":"2020","unstructured":"Sun C, Mu C (2020) Some key scientific problems in multi-agent deep reinforcement learning. Acta Automatica Sinica 46(7):1301\u20131312","journal-title":"Acta Automatica Sinica"},{"issue":"8","key":"11439_CR115","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1007\/s10462-024-10804-4","volume":"57","author":"Z Sun","year":"2024","unstructured":"Sun Z, Zhou Y, Tang S et al (2024) Noise suppression zeroing neural network for online solving the time-varying inverse kinematics problem of four-wheel mobile manipulators with external disturbances. Artif Intell Rev 57(8):211","journal-title":"Artif Intell Rev"},{"key":"11439_CR116","doi-asserted-by":"crossref","unstructured":"Sunehag P, Lever G, Gruslys A et\u00a0al (2017) Value-decomposition networks for cooperative multi-agent learning. arXiv:1706.05296","DOI":"10.65109\/JSRC7365"},{"key":"11439_CR117","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1023\/A:1022633531479","volume":"3","author":"RS Sutton","year":"1988","unstructured":"Sutton RS (1988) Learning to predict by the methods of temporal differences. Mach Learn 3:9\u201344","journal-title":"Mach Learn"},{"issue":"5","key":"11439_CR118","doi-asserted-by":"crossref","first-page":"1054","DOI":"10.1109\/TNN.1998.712192","volume":"9","author":"R Sutton","year":"1998","unstructured":"Sutton R, Barto A (1998) Reinforcement learning: an introduction. IEEE Trans Neural Netw 9(5):1054\u20131054","journal-title":"IEEE Trans Neural Netw"},{"key":"11439_CR119","first-page":"1","volume":"12","author":"RS Sutton","year":"1999","unstructured":"Sutton RS, McAllester D, Singh S et al (1999) Policy gradient methods for reinforcement learning with function approximation. Adv Neural Inf Process Syst 12:1\u20137","journal-title":"Adv Neural Inf Process Syst"},{"key":"11439_CR120","unstructured":"Tamar A, Wu Y, Thomas G et\u00a0al (2016) Value iteration networks. Advances in neural information processing systems 29"},{"issue":"4","key":"11439_CR121","doi-asserted-by":"crossref","DOI":"10.1371\/journal.pone.0172395","volume":"12","author":"A Tampuu","year":"2017","unstructured":"Tampuu A, Matiisen T, Kodelja D et al (2017) Multiagent cooperation and competition with deep reinforcement learning. PLoS ONE 12(4):e0172395","journal-title":"PLoS ONE"},{"key":"11439_CR122","doi-asserted-by":"crossref","unstructured":"Tan M (1993) Multi-agent reinforcement learning: independent vs. cooperative agents. In: Proceedings of the tenth international conference on machine learning, pp 330\u2013337","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"issue":"2","key":"11439_CR123","doi-asserted-by":"crossref","first-page":"1543","DOI":"10.1007\/s10462-022-10205-5","volume":"56","author":"V Uc-Cetina","year":"2023","unstructured":"Uc-Cetina V, Navarro-Guerrero N, Martin-Gonzalez A et al (2023) Survey on reinforcement learning for language processing. Artif Intell Rev 56(2):1543\u20131575","journal-title":"Artif Intell Rev"},{"key":"11439_CR124","first-page":"1","volume":"2009","author":"RJ Urbanowicz","year":"2009","unstructured":"Urbanowicz RJ, Moore JH (2009) Learning classifier systems: a complete introduction, review, and roadmap. J Artif Evol Appl 2009:1\u201325","journal-title":"J Artif Evol Appl"},{"issue":"11","key":"11439_CR125","first-page":"2579","volume":"9","author":"L Van der Maaten","year":"2008","unstructured":"Van der Maaten L, Hinton G (2008) Visualizing data using t-SNW. J Mach Learn Res 9(11):2579","journal-title":"J Mach Learn Res"},{"key":"11439_CR126","doi-asserted-by":"crossref","unstructured":"Van\u00a0Hasselt H, Guez A, Silver D (2016) Deep reinforcement learning with double q-learning. In: Proceedings of the AAAI conference on artificial intelligence","DOI":"10.1609\/aaai.v30i1.10295"},{"issue":"4","key":"11439_CR127","doi-asserted-by":"crossref","first-page":"390","DOI":"10.1109\/TAC.1965.1098193","volume":"10","author":"DL Waltz","year":"1965","unstructured":"Waltz DL, Fu KS (1965) A heuristic approach to reinforcement learning control systems. IEEE Trans Autom Control 10(4):390\u2013398","journal-title":"IEEE Trans Autom Control"},{"key":"11439_CR128","unstructured":"Wang Z, Schaul T, Hessel M et\u00a0al (2016) Dueling network architectures for deep reinforcement learning. arXiv:1511.06521"},{"key":"11439_CR129","unstructured":"Wang J, Ren Z, Liu T et\u00a0al (2020) Qplex: duplex dueling multi-agent q-learning. arXiv:2008.01062"},{"issue":"13","key":"11439_CR130","doi-asserted-by":"crossref","first-page":"4117","DOI":"10.1080\/00207543.2021.2020927","volume":"60","author":"H Wang","year":"2022","unstructured":"Wang H, Tao J, Peng T et al (2022a) Dynamic inventory replenishment strategy for aerospace manufacturing supply chain: combining reinforcement learning and multi-agent simulation. Int J Prod Res 60(13):4117\u20134136","journal-title":"Int J Prod Res"},{"key":"11439_CR131","unstructured":"Wang L, Zhang Y, Hu Y et\u00a0al (2022b) Individual reward assisted multi-agent reinforcement learning. In: Proceedings of the 39th international conference on machine learning, ACM Press, Baltimore, pp 23417\u201323432"},{"issue":"1","key":"11439_CR132","doi-asserted-by":"crossref","first-page":"18","DOI":"10.1109\/JAS.2023.123843","volume":"11","author":"D Wang","year":"2024","unstructured":"Wang D, Gao N, Liu D et al (2024a) Recent progress in reinforcement learning and adaptive dynamic programming for advanced control applications. IEEE\/CAA J Autom Sin 11(1):18\u201336","journal-title":"IEEE\/CAA J Autom Sin"},{"key":"11439_CR133","doi-asserted-by":"crossref","unstructured":"Wang H, Liu Z, Hu G et\u00a0al (2024b) Offline meta-reinforcement learning for active pantograph control in high-speed railways. IEEE Trans Ind Inform","DOI":"10.1109\/TII.2024.3394554"},{"issue":"11","key":"11439_CR134","doi-asserted-by":"crossref","first-page":"1738","DOI":"10.3390\/math13111738","volume":"13","author":"S Wang","year":"2025","unstructured":"Wang S, Yue Q, Xu Z et al (2025) A collaborative multi-agent reinforcement learning approach for non-stationary environments with unknown change points. Mathematics 13(11):1738","journal-title":"Mathematics"},{"key":"11439_CR135","first-page":"279","volume":"8","author":"CJ Watkins","year":"1992","unstructured":"Watkins CJ, Dayan P (1992) Q-learning. Mach Learn 8:279\u2013292","journal-title":"Mach Learn"},{"key":"11439_CR136","doi-asserted-by":"crossref","first-page":"65028","DOI":"10.1109\/ACCESS.2022.3181999","volume":"10","author":"S Wei","year":"2022","unstructured":"Wei S, Wang S, Sun S et al (2022) Stock ranking prediction based on an adversarial game neural network. IEEE Access 10:65028\u201365036","journal-title":"IEEE Access"},{"issue":"5","key":"11439_CR137","first-page":"1200","volume":"38","author":"G Wen","year":"2023","unstructured":"Wen G, Yang T, Zhou J et al (2023) Reinforcement learning and adaptive\/approximate dynamic programming: a survey from theory to applications in multi-agent systems. Kongzhi yu Juece\/Control Decision 38(5):1200\u20131230","journal-title":"Kongzhi yu Juece\/Control Decision"},{"issue":"9","key":"11439_CR138","doi-asserted-by":"crossref","first-page":"11648","DOI":"10.1109\/TITS.2024.3379508","volume":"25","author":"J Wu","year":"2024","unstructured":"Wu J, Li D, Yu Y et al (2024) An attention mechanism and adaptive accuracy triple-dependent MADDPG formation control method for hybrid UAVs. IEEE Trans Intell Transpl Syst 25(9):11648\u201311663","journal-title":"IEEE Trans Intell Transpl Syst"},{"key":"11439_CR139","doi-asserted-by":"crossref","first-page":"410","DOI":"10.1016\/j.jmsy.2023.05.005","volume":"68","author":"X Xia","year":"2023","unstructured":"Xia X, Fu X, Zhong S et al (2023) A multi-agent convolution deep reinforcement learning network for aeroengine fleet maintenance strategy optimization. J Manuf Syst 68:410\u2013425","journal-title":"J Manuf Syst"},{"issue":"9","key":"11439_CR140","doi-asserted-by":"crossref","first-page":"12484","DOI":"10.1109\/TVT.2024.3389555","volume":"73","author":"X Xing","year":"2024","unstructured":"Xing X, Zhou Z, Li Y et al (2024) Multi-UAV adaptive cooperative formation trajectory planning based on an improved MATD3 algorithm of deep reinforcement learning. IEEE Trans Veh Technol 73(9):12484\u201312499","journal-title":"IEEE Trans Veh Technol"},{"issue":"12","key":"11439_CR141","first-page":"2537","volume":"46","author":"L Xing-Xing","year":"2020","unstructured":"Xing-Xing L, Yang-He F, Yang M et al (2020) Deep multi-agent reinforcement learning: a survey. Acta Automatica Sinica 46(12):2537\u20132557","journal-title":"Acta Automatica Sinica"},{"issue":"12","key":"11439_CR142","doi-asserted-by":"crossref","first-page":"18307","DOI":"10.1109\/TVT.2024.3444475","volume":"73","author":"D Xu","year":"2024","unstructured":"Xu D, Yu Z, Liao X et al (2024) A graph deep reinforcement learning traffic signal control for multiple intersections considering missing data. IEEE Trans Veh Technol 73(12):18307\u201318319","journal-title":"IEEE Trans Veh Technol"},{"issue":"1","key":"11439_CR143","doi-asserted-by":"crossref","first-page":"2290","DOI":"10.1109\/TIV.2023.3298292","volume":"9","author":"Y Xue","year":"2023","unstructured":"Xue Y, Chen W (2023) Multi-agent deep reinforcement learning for UAVs navigation in unknown complex environment. IEEE Trans Intell Veh 9(1):2290\u20132303","journal-title":"IEEE Trans Intell Veh"},{"issue":"7","key":"11439_CR144","doi-asserted-by":"crossref","first-page":"2939","DOI":"10.1109\/TNNLS.2020.3009015","volume":"32","author":"S Xue","year":"2021","unstructured":"Xue S, Luo B, Liu D (2021) Event-triggered adaptive dynamic programming for unmatched uncertain nonlinear continuous-time systems. IEEE Trans Neural Netw Learn Syst 32(7):2939\u20132951","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"issue":"9","key":"11439_CR145","doi-asserted-by":"crossref","first-page":"9001","DOI":"10.1109\/TCYB.2021.3054626","volume":"52","author":"S Xue","year":"2022","unstructured":"Xue S, Luo B, Liu D et al (2022) Event-triggered ADP for tracking control of partially unknown constrained uncertain systems. IEEE Trans Cybern 52(9):9001\u20139012","journal-title":"IEEE Trans Cybern"},{"issue":"4","key":"11439_CR146","doi-asserted-by":"crossref","first-page":"1706","DOI":"10.1109\/TCYB.2025.3533139","volume":"55","author":"S Xue","year":"2025","unstructured":"Xue S, Zhang W, Luo B et al (2025a) Integral reinforcement learning-based dynamic event-triggered nonzero-sum games of USVs. IEEE Trans Cybern 55(4):1706\u20131716","journal-title":"IEEE Trans Cybern"},{"issue":"6","key":"11439_CR147","doi-asserted-by":"crossref","first-page":"9961","DOI":"10.1109\/TNNLS.2024.3512539","volume":"36","author":"S Xue","year":"2025","unstructured":"Xue S, Zhao N, Zhang W et al (2025b) A hybrid adaptive dynamic programming for optimal tracking control of USVs. IEEE Trans Neural Netw Learn Syst 36(6):9961\u20139969","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"11439_CR148","doi-asserted-by":"crossref","unstructured":"Xu H, Zuo L, Sun F et\u00a0al (2022) Low-latency patient monitoring service for cloud computing based healthcare system by applying reinforcement learning. In: 2022 IEEE 8th international conference on computer and communications (ICCC), pp 1373\u20131377","DOI":"10.1109\/ICCC56324.2022.10065744"},{"issue":"2","key":"11439_CR149","doi-asserted-by":"crossref","first-page":"789","DOI":"10.1109\/TASE.2022.3168621","volume":"20","author":"Z Yan","year":"2023","unstructured":"Yan Z, Kreidieh AR, Vinitsky E et al (2023) Unified automatic control of vehicular systems with reinforcement learning. IEEE Trans Autom Sci Eng 20(2):789\u2013804","journal-title":"IEEE Trans Autom Sci Eng"},{"key":"11439_CR150","unstructured":"Yang Y, Hao J, Chen G et\u00a0al (2018) Multi-agent soft q-learning. arXiv:1804.04175"},{"key":"11439_CR154","unstructured":"Yang Y, Hao J, Chen G et\u00a0al (2020a) Q-value path decomposition for deep multi-agent reinforcement learning. arXiv:2002.03950"},{"key":"11439_CR151","unstructured":"Yang Y, Hao J, Liao B et\u00a0al (2020b) Qatten: a general framework for cooperative multi-agent reinforcement learning. arXiv:2002.03939"},{"issue":"9","key":"11439_CR152","doi-asserted-by":"crossref","first-page":"8934","DOI":"10.1109\/TKDE.2022.3220219","volume":"35","author":"X Yang","year":"2022","unstructured":"Yang X, Song Z, King I et al (2022a) A survey on deep semi-supervised learning. IEEE Trans Knowl Data Eng 35(9):8934\u20138954","journal-title":"IEEE Trans Knowl Data Eng"},{"issue":"11","key":"11439_CR153","doi-asserted-by":"crossref","first-page":"16288","DOI":"10.1109\/TNNLS.2023.3293508","volume":"35","author":"Q Yang","year":"2023","unstructured":"Yang Q, Wang S, Zhang Q et al (2023) Hundreds guide millions: adaptive offline reinforcement learning with expert guidance. IEEE Trans Neural Netw Learn Syst 35(11):16288\u201316300","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"issue":"2","key":"11439_CR155","doi-asserted-by":"crossref","first-page":"735","DOI":"10.1109\/TITS.2019.2893683","volume":"21","author":"C Yu","year":"2019","unstructured":"Yu C, Wang X, Xu X et al (2019) Distributed multiagent coordinated learning for autonomous driving in highways based on dynamic coordination graphs. IEEE Trans Intell Transp Syst 21(2):735\u2013748","journal-title":"IEEE Trans Intell Transp Syst"},{"issue":"2","key":"11439_CR156","doi-asserted-by":"crossref","first-page":"735","DOI":"10.1109\/TITS.2019.2893683","volume":"21","author":"C Yu","year":"2020","unstructured":"Yu C, Wang X, Xu X et al (2020) Distributed multiagent coordinated learning for autonomous driving in highways based on dynamic coordination graphs. IEEE Trans Intell Transp Syst 21(2):735\u2013748","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"11439_CR157","doi-asserted-by":"crossref","unstructured":"Yuan L, Wang J, Zhang F et\u00a0al (2022) Multi-agent incentive communication via decentralized teammate modeling. In: Proceedings of the 36th AAAI conference on artificial intelligence, AAAI Press, Ottawa, pp 9466\u20139474","DOI":"10.1609\/aaai.v36i9.21179"},{"key":"11439_CR158","unstructured":"Yu C, Velu A, Vinitsky E et\u00a0al (2022) The surprising effectiveness of PPO in cooperative multi-agent games. In: Advances in Neural Information Processing Systems, pp 24611\u201324624"},{"key":"11439_CR164","doi-asserted-by":"crossref","unstructured":"Zhang X, Wang Y (2023) A kernel reinforcement learning decoding framework integrating neural and feedback signals for brain control*. In: 2023 45th annual international conference of the IEEE engineering in medicine & biology society (EMBC), pp 1\u20134","DOI":"10.1109\/EMBC40787.2023.10340203"},{"issue":"6","key":"11439_CR159","doi-asserted-by":"crossref","first-page":"1612","DOI":"10.1109\/TSMCB.2011.2157998","volume":"41","author":"ML Zhang","year":"2011","unstructured":"Zhang ML, Zhou ZH (2011) Cotrade: confident co-training with data editing. IEEE Trans Syst Man Cybern B 41(6):1612\u20131626","journal-title":"IEEE Trans Syst Man Cybern B"},{"key":"11439_CR163","doi-asserted-by":"crossref","unstructured":"Zhang K, Yang Z, Ba\u015far T (2021) Multi-agent reinforcement learning: a selective overview of theories and algorithms. Handbook of reinforcement learning and control pp 321\u2013384","DOI":"10.1007\/978-3-030-60990-0_12"},{"issue":"1","key":"11439_CR160","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s10489-023-05184-1","volume":"54","author":"D Zhang","year":"2023","unstructured":"Zhang D, Wang Y, Jiang K et al (2023) Safe optimal robust control of nonlinear systems with asymmetric input constraints using reinforcement learning. Appl Intell 54(1):1\u201313","journal-title":"Appl Intell"},{"key":"11439_CR161","doi-asserted-by":"crossref","first-page":"309","DOI":"10.1016\/j.isatra.2024.09.018","volume":"155","author":"D Zhang","year":"2024","unstructured":"Zhang D, Wang Y, Meng L et al (2024) Adaptive critic design for safety-optimal FTC of unknown nonlinear systems with asymmetric constrained-input. ISA Trans 155:309\u2013318","journal-title":"ISA Trans"},{"key":"11439_CR162","volume":"184","author":"D Zhang","year":"2025","unstructured":"Zhang D, Yu C, Li Z et al (2025) A lightweight network enhanced by attention-guided cross-scale interaction for underwater object detection. Appl Soft Comput 184:113811","journal-title":"Appl Soft Comput"},{"issue":"9","key":"11439_CR165","doi-asserted-by":"crossref","first-page":"5876","DOI":"10.1109\/TSMC.2025.3573738","volume":"55","author":"Y Zhang","year":"2025","unstructured":"Zhang Y, Zhao B, Liu D (2025) Distributed optimal containment control of wheeled mobile robots via adaptive dynamic programming. IEEE Trans Syst Man Cybern Syst 55(9):5876\u20135886","journal-title":"IEEE Trans Syst Man Cybern Syst"},{"key":"11439_CR166","doi-asserted-by":"crossref","unstructured":"Zhao F, Hua Y, Zheng H et\u00a0al (2023a) Cooperative target pursuit by multiple fixed-wing UAVs based on deep reinforcement learning and artificial potential field. In: 2023 42nd Chinese control conference (CCC), pp 5693\u20135698","DOI":"10.23919\/CCC58697.2023.10241187"},{"issue":"10","key":"11439_CR167","doi-asserted-by":"crossref","first-page":"12315","DOI":"10.1007\/s10462-023-10497-1","volume":"56","author":"M Zhao","year":"2023","unstructured":"Zhao M, Wang D, Qiao J et al (2023b) Advanced value iteration for discrete\u2013time intelligent critic control: a survey. Artif Intell Rev 56(10):12315\u201312346","journal-title":"Artif Intell Rev"},{"issue":"3","key":"11439_CR168","doi-asserted-by":"crossref","first-page":"4713","DOI":"10.1109\/TNNLS.2024.3362800","volume":"36","author":"B Zhao","year":"2025","unstructured":"Zhao B, Zhang S, Liu D (2025) Self-triggered approximate optimal neuro-control for nonlinear systems through adaptive dynamic programming. IEEE Trans Neural Netw Learn Syst 36(3):4713\u20134723","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"11439_CR169","doi-asserted-by":"crossref","first-page":"471","DOI":"10.1007\/s40265-020-01435-4","volume":"81","author":"H Zheng","year":"2021","unstructured":"Zheng H, Ryzhov IO, Xie W et al (2021) Personalized multimorbidity management for patients with type 2 diabetes using reinforcement learning of electronic health records. Drugs 81:471\u2013482","journal-title":"Drugs"},{"key":"11439_CR171","doi-asserted-by":"crossref","unstructured":"Zhu H, Vyetrenko S, Dwarakanath K et\u00a0al (2023a) Once burned, twice shy? The effect of stock market bubbles on traders that learn by experience. In: 2023 Winter simulation conference (WSC), pp 291\u2013302","DOI":"10.1109\/WSC60868.2023.10408617"},{"issue":"11","key":"11439_CR170","doi-asserted-by":"crossref","first-page":"13344","DOI":"10.1109\/TPAMI.2023.3292075","volume":"45","author":"Z Zhu","year":"2023","unstructured":"Zhu Z, Lin K, Jain AK et al (2023b) Transfer learning in deep reinforcement learning: a survey. IEEE Trans Pattern Anal Mach Intell 45(11):13344\u201313362","journal-title":"IEEE Trans Pattern Anal Mach Intell"}],"container-title":["Artificial Intelligence Review"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-025-11439-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10462-025-11439-9","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-025-11439-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T05:45:22Z","timestamp":1771479922000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10462-025-11439-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,27]]},"references-count":171,"journal-issue":{"issue":"2","published-online":{"date-parts":[[2026,2]]}},"alternative-id":["11439"],"URL":"https:\/\/doi.org\/10.1007\/s10462-025-11439-9","relation":{},"ISSN":["1573-7462"],"issn-type":[{"value":"1573-7462","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,27]]},"assertion":[{"value":"29 April 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 October 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 November 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"46"}}