{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T04:18:14Z","timestamp":1743394694493,"version":"3.40.3"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2025,3,13]],"date-time":"2025-03-13T00:00:00Z","timestamp":1741824000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,3,13]],"date-time":"2025-03-13T00:00:00Z","timestamp":1741824000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"name":"Zhejiang Key R&D Program","award":["2021C03157"],"award-info":[{"award-number":["2021C03157"]}]},{"name":"start-up funding from Westlake Universitydation from Henan University of Technology","award":["041030150118"],"award-info":[{"award-number":["041030150118"]}]},{"name":"Scientific Research Funding Project of Westlake University","award":["2021WUFP017"],"award-info":[{"award-number":["2021WUFP017"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Complex Intell. Syst."],"published-print":{"date-parts":[[2025,4]]},"DOI":"10.1007\/s40747-025-01788-x","type":"journal-article","created":{"date-parts":[[2025,3,13]],"date-time":"2025-03-13T08:18:13Z","timestamp":1741853893000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Target pursuit for multi-AUV system: zero-sum stochastic game with WoLF-PHC assisted"],"prefix":"10.1007","volume":"11","author":[{"given":"Le","family":"Hong","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1871-2658","authenticated-orcid":false,"given":"Weicheng","family":"Cui","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,3,13]]},"reference":[{"key":"1788_CR1","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1038\/s41586-020-03153-z","volume":"591","author":"G Li","year":"2021","unstructured":"Li G, Chen X, Zhou F, Liang Y, Xiao Y, Cao X et al (2021) Self-powered soft robot in the Mariana Trench. Nature 591:66\u201371. https:\/\/doi.org\/10.1038\/s41586-020-03153-z","journal-title":"Nature"},{"key":"1788_CR2","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1177\/17298806221091885","volume":"19","author":"G Wu","year":"2022","unstructured":"Wu G, Xu T, Sun Y, Zhang J (2022) Review of multiple unmanned surface vessels collaborative search and hunting based on swarm intelligence. Int J Adv Robot Syst 19:1\u201320. https:\/\/doi.org\/10.1177\/17298806221091885","journal-title":"Int J Adv Robot Syst"},{"key":"1788_CR3","doi-asserted-by":"publisher","unstructured":"Wang Y, Zhong F, Xu J, Wang Y (2022) ToM2C: target-oriented multi-agent communication and cooperation with theory of mind. In: Int Conf Learn Represent (ICLR). https:\/\/doi.org\/10.48550\/arXiv.2111.09189","DOI":"10.48550\/arXiv.2111.09189"},{"key":"1788_CR4","doi-asserted-by":"publisher","first-page":"841","DOI":"10.1016\/j.jfranklin.2022.11.037","volume":"360","author":"Q Zhang","year":"2023","unstructured":"Zhang Q, Song X, Song S, Stojanovic V (2023) Finite-Time sliding mode control for singularly perturbed PDE systems. J Franklin I 360:841\u2013861. https:\/\/doi.org\/10.1016\/j.jfranklin.2022.11.037","journal-title":"J Franklin I"},{"key":"1788_CR5","doi-asserted-by":"publisher","first-page":"1943","DOI":"10.1177\/01423312231225782","volume":"46","author":"Y Tao","year":"2024","unstructured":"Tao Y, Tao H, Zhuang Z, Stojanovic V, Paszke W (2024) Quantized iterative learning control of communication-constrained systems with encoding and decoding mechanism. Trans I Meas Control 46:1943\u20131954. https:\/\/doi.org\/10.1177\/01423312231225782","journal-title":"Trans I Meas Control"},{"key":"1788_CR6","doi-asserted-by":"publisher","first-page":"7451","DOI":"10.1007\/s40747-023-01135-y","volume":"9","author":"Z Peng","year":"2023","unstructured":"Peng Z, Song X, Song S, Stojanovic V (2023) Hysteresis quantified control for switched reaction\u2013diffusion systems and its application. Complex Intell Syst 9:7451\u20137460. https:\/\/doi.org\/10.1007\/s40747-023-01135-y","journal-title":"Complex Intell Syst"},{"key":"1788_CR7","doi-asserted-by":"publisher","first-page":"1347","DOI":"10.1109\/TAES.2014.130569","volume":"51","author":"W Lin","year":"2015","unstructured":"Lin W, Qu Z, Simaan MA (2015) Nash strategies for pursuit-evasion differential games involving limited observations. IEEE Trans Aero Elec Sys 51:1347\u20131356. https:\/\/doi.org\/10.1109\/TAES.2014.130569","journal-title":"IEEE Trans Aero Elec Sys"},{"key":"1788_CR8","doi-asserted-by":"publisher","first-page":"3971","DOI":"10.1109\/TAES.2023.3235873","volume":"59","author":"Z Zheng","year":"2023","unstructured":"Zheng Z, Zhang P, Yuan J (2023) Nonzero-sum pursuit-evasion game control for spacecraft systems: a Q-learning method. IEEE Trans Aero Elec Sys 59:3971\u20133981. https:\/\/doi.org\/10.1109\/TAES.2023.3235873","journal-title":"IEEE Trans Aero Elec Sys"},{"key":"1788_CR9","doi-asserted-by":"publisher","unstructured":"Talebi S, Simaan MA (2017) Multi-pursuer pursuit-evasion games under parameters uncertainty: a Monte Carlo approach. In: 2017 12th System of systems engineering conference (SoSE). IEEE, pp 1\u20136. https:\/\/doi.org\/10.1109\/SYSOSE.2017.7994937","DOI":"10.1109\/SYSOSE.2017.7994937"},{"key":"1788_CR10","doi-asserted-by":"publisher","DOI":"10.1093\/nsr\/nwac256","author":"X Deng","year":"2022","unstructured":"Deng X, Li N, Mguni D, Wang J, Yang Y (2022) On the complexity of computing Markov perfect equilibrium in general-sum stochastic games. Natl Sci Rev. https:\/\/doi.org\/10.1093\/nsr\/nwac256","journal-title":"Natl Sci Rev"},{"key":"1788_CR11","doi-asserted-by":"publisher","first-page":"636","DOI":"10.1109\/TCDS.2020.2970487","volume":"12","author":"H Shi","year":"2020","unstructured":"Shi H, Zhai L, Wu H, Hwang M, Hwang KS, Hsu HP (2020) A multitier reinforcement learning model for a cooperative multiagent system. IEEE Trans Cogn Dev Syst 12:636\u2013644. https:\/\/doi.org\/10.1109\/TCDS.2020.2970487","journal-title":"IEEE Trans Cogn Dev Syst"},{"key":"1788_CR12","doi-asserted-by":"publisher","unstructured":"Shi D, Sauter MZ, Kralik JD (2009) Distributed, heterogeneous, multi-agent social coordination via reinforcement learning. In: 2009 IEEE international conference on robotics and biomimetics (ROBIO). IEEE, pp 653\u201358. https:\/\/doi.org\/10.1109\/ROBIO.2009.5420595","DOI":"10.1109\/ROBIO.2009.5420595"},{"key":"1788_CR13","doi-asserted-by":"publisher","unstructured":"Bowling M, Veloso M (2001) Convergence of gradient dynamics with a variable learning rate. In: Proceedings of the eighteenth international conference on machine learning (ICML), pp 27\u201334 https:\/\/doi.org\/10.5555\/645530.655659","DOI":"10.5555\/645530.655659"},{"key":"1788_CR14","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1016\/j.enconman.2015.06.030","volume":"103","author":"L Xi","year":"2015","unstructured":"Xi L, Yu T, Yang B, Zhang X (2015) A novel multi-agent decentralized win or learn fast policy hill-climbing with eligibility trace algorithm for smart generation control of interconnected complex power grids. Energ Convers Manage 103:82\u201393. https:\/\/doi.org\/10.1016\/j.enconman.2015.06.030","journal-title":"Energ Convers Manage"},{"key":"1788_CR15","doi-asserted-by":"publisher","first-page":"692","DOI":"10.1109\/TCC.2021.3110965","volume":"11","author":"T Mai","year":"2021","unstructured":"Mai T, Yao H, Zhang N, Xu L, Guizani M, Guo S (2021) Cloud mining pool aided blockchain-enabled Internet of Things: an evolutionary game approach. IEEE Trans Cloud Comput 11:692\u2013703. https:\/\/doi.org\/10.1109\/TCC.2021.3110965","journal-title":"IEEE Trans Cloud Comput"},{"key":"1788_CR16","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1088\/0031-9112\/17\/2\/009","volume":"17","author":"D Wishart","year":"1966","unstructured":"Wishart D (1966) Differential games. A mathematical theory with applications to warfare and pursuit, control and optimization. Phys Bull 17:60. https:\/\/doi.org\/10.1088\/0031-9112\/17\/2\/009","journal-title":"Phys Bull"},{"key":"1788_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.neucom.2021.12.025","volume":"475","author":"J Selvakumar","year":"2022","unstructured":"Selvakumar J, Bakolas E (2022) Min-Max Q-learning for multi-player pursuit-evasion games. Neurocomputing 475:1\u201314","journal-title":"Neurocomputing"},{"key":"1788_CR18","doi-asserted-by":"publisher","first-page":"7900","DOI":"10.1109\/TNNLS.2022.3146976","volume":"34","author":"R Zhang","year":"2022","unstructured":"Zhang R, Zong Q, Zhang X, Dou L, Tian B (2022) Game of drones: Multi-UAV pursuit-evasion game with online motion planning by deep reinforcement learning. IEEE Trans Neur Net Lear 34:7900\u20137909. https:\/\/doi.org\/10.1109\/TNNLS.2022.3146976","journal-title":"IEEE Trans Neur Net Lear"},{"key":"1788_CR19","doi-asserted-by":"publisher","DOI":"10.5555\/1942844","volume-title":"Game theory for wireless communications and networking","author":"Y Zhang","year":"2011","unstructured":"Zhang Y, Guizani M (2011) Game theory for wireless communications and networking. CRC Press, Boca Raton. https:\/\/doi.org\/10.5555\/1942844"},{"key":"1788_CR20","doi-asserted-by":"publisher","first-page":"696","DOI":"10.1109\/TCYB.2019.2914869","volume":"51","author":"J Selvakumar","year":"2019","unstructured":"Selvakumar J, Bakolas E (2019) Feedback strategies for a reach-avoid game with a single evader and multiple pursuers. IEEE Trans Cybern 51:696\u2013707. https:\/\/doi.org\/10.1109\/TCYB.2019.2914869","journal-title":"IEEE Trans Cybern"},{"key":"1788_CR21","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1016\/j.neucom.2020.06.031","volume":"412","author":"Y Wang","year":"2020","unstructured":"Wang Y, Dong L, Sun C (2020) Cooperative control for multi-player pursuit-evasion games with reinforcement learning. Neurocomputing 412:101\u2013114. https:\/\/doi.org\/10.1016\/j.neucom.2020.06.031","journal-title":"Neurocomputing"},{"key":"1788_CR22","doi-asserted-by":"publisher","first-page":"976","DOI":"10.1109\/TSMCB.2008.920998","volume":"38","author":"P Vrancx","year":"2008","unstructured":"Vrancx P, Verbeeck K, Now\u00e9 A (2008) Decentralized learning in Markov games. IEEE Trans Syst Man Cy B 38:976\u2013981. https:\/\/doi.org\/10.1109\/TSMCB.2008.920998","journal-title":"IEEE Trans Syst Man Cy B"},{"key":"1788_CR23","doi-asserted-by":"publisher","first-page":"769","DOI":"10.1109\/21.293490","volume":"24","author":"PS Sastry","year":"1994","unstructured":"Sastry PS, Phansalkar VV, Thathachar M (1994) Decentralized learning of Nash equilibria in multi-person stochastic games with incomplete information. IEEE Trans Syst Man Cy 24:769\u2013777. https:\/\/doi.org\/10.1109\/21.293490","journal-title":"IEEE Trans Syst Man Cy"},{"key":"1788_CR24","doi-asserted-by":"publisher","DOI":"10.1016\/J.TRC.2021.103122","author":"W Du","year":"2021","unstructured":"Du W, Guo T, Chen J, Li B, Zhu G, Cao X (2021) Cooperative pursuit of unauthorized UAVs in urban airspace via Multi-agent reinforcement learning. Transp Res C-Emer. https:\/\/doi.org\/10.1016\/J.TRC.2021.103122","journal-title":"Transp Res C-Emer"},{"key":"1788_CR25","doi-asserted-by":"publisher","unstructured":"P\u00e9rolat J, Piot B, Geist M, Scherrer B, Pietquin O (2016) Softened approximate policy iteration for Markov games. In: Int Conf Mach Learn (ICML). PMLR, pp 1860\u20138 https:\/\/doi.org\/10.5555\/3045390.3045587","DOI":"10.5555\/3045390.3045587"},{"key":"1788_CR26","doi-asserted-by":"publisher","unstructured":"Sidford A, Wang M, Yang L, Ye Y (2020) Solving discounted stochastic two-player games with near-optimal time and sample complexity. In: Int Conf Artif Intell Stat (AISTATS). PMLR, pp 2992\u20133002. https:\/\/doi.org\/10.48550\/arXiv.1908.11071","DOI":"10.48550\/arXiv.1908.11071"},{"key":"1788_CR27","doi-asserted-by":"publisher","unstructured":"Littman ML (1994) Markov games as a framework for multi-agent reinforcement learning. In: Mach Learn Proc. Elsevier, pp 157\u201363 https:\/\/doi.org\/10.1016\/B978-1-55860-335-6.50027-1","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"1788_CR28","doi-asserted-by":"publisher","unstructured":"Hong L, Cui W (2023) Strategy determination for multiple USVs: a min-max Q-learning approach. In: Int Conf Neur Comput Adv Appl Springer, pp 403\u201317. https:\/\/doi.org\/10.1007\/978-981-99-5847-4_29","DOI":"10.1007\/978-981-99-5847-4_29"},{"key":"1788_CR29","doi-asserted-by":"publisher","unstructured":"Fan J, Wang Z, Xie Y, Yang Z (2020) A theoretical analysis of deep Q-learning. In: Learn Dynam Control (L4DC). PMLR, pp 486\u2013489. https:\/\/doi.org\/10.48550\/arXiv.1901.00137","DOI":"10.48550\/arXiv.1901.00137"},{"key":"1788_CR30","doi-asserted-by":"publisher","unstructured":"Bowling M, Veloso M (2001) Rational and convergent learning in stochastic games. In: Int Jt Conf Artif Iintell (IJCAI). Citeseer, pp 1021\u20131026. https:\/\/doi.org\/10.5555\/1642194.1642231","DOI":"10.5555\/1642194.1642231"},{"key":"1788_CR31","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1007\/s10994-006-0143-1","volume":"67","author":"V Conitzer","year":"2007","unstructured":"Conitzer V, Sandholm T (2007) AWESOME: a general multiagent learning algorithm that converges in self-play and learns a best response against stationary opponents. Mach Learn 67:23\u201343. https:\/\/doi.org\/10.1007\/s10994-006-0143-1","journal-title":"Mach Learn"},{"key":"1788_CR32","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1804.02884","author":"DT Nguyen","year":"2017","unstructured":"Nguyen DT, Kumar A, Lau HC (2017) Policy gradient with value function approximation for collective multiagent planning. Adv Neur Inform Process Syst (NIPS). https:\/\/doi.org\/10.48550\/arXiv.1804.02884","journal-title":"Adv Neur Inform Process Syst (NIPS)"},{"key":"1788_CR33","doi-asserted-by":"publisher","DOI":"10.1016\/j.oceaneng.2022.112742","author":"Z Sun","year":"2023","unstructured":"Sun Z, Sun H, Li P, Zou J (2023) Cooperative strategy for pursuit-evasion problem with collision avoidance. Ocean Eng. https:\/\/doi.org\/10.1016\/j.oceaneng.2022.112742","journal-title":"Ocean Eng"},{"key":"1788_CR34","doi-asserted-by":"publisher","DOI":"10.1016\/j.oceaneng.2023.114016","author":"X Qu","year":"2023","unstructured":"Qu X, Gan W, Song D, Zhou L (2023) Pursuit-evasion game strategy of USV based on deep reinforcement learning in complex multi-obstacle environment. Ocean Eng. https:\/\/doi.org\/10.1016\/j.oceaneng.2023.114016","journal-title":"Ocean Eng"},{"key":"1788_CR35","doi-asserted-by":"publisher","first-page":"1","DOI":"10.3390\/jmse12071221","volume":"12","author":"L Hong","year":"2024","unstructured":"Hong L, Cui W, Chen H, Song C, Li W (2024) Maneuver planning for multiple pursuit intelligent surface vehicles in a sequence of zero-sum pursuit-evasion games. J Mar Sci Eng 12:1\u201321. https:\/\/doi.org\/10.3390\/jmse12071221","journal-title":"J Mar Sci Eng"},{"key":"1788_CR36","doi-asserted-by":"publisher","first-page":"156","DOI":"10.1109\/TSMCC.2007.913919","volume":"38","author":"L Busoniu","year":"2008","unstructured":"Busoniu L, Babuska R, De Schutter B (2008) A comprehensive survey of multiagent reinforcement learning. IEEE Trans Syst Man Cy C 38:156\u2013172. https:\/\/doi.org\/10.1109\/TSMCC.2007.913919","journal-title":"IEEE Trans Syst Man Cy C"},{"key":"1788_CR37","doi-asserted-by":"publisher","first-page":"1166","DOI":"10.48550\/arXiv.2007.07461","volume":"33","author":"K Zhang","year":"2020","unstructured":"Zhang K, Kakade S, Basar T, Yang L (2020) Model-based multi-agent rl in zero-sum Markov games with near-optimal sample complexity. Adv Neur Inform Process Syst (NIPS) 33:1166\u20131178. https:\/\/doi.org\/10.48550\/arXiv.2007.07461","journal-title":"Adv Neur Inform Process Syst (NIPS)"},{"key":"1788_CR38","doi-asserted-by":"publisher","first-page":"89","DOI":"10.32917\/hmj\/1206139508","volume":"28","author":"AM Fink","year":"1964","unstructured":"Fink AM (1964) Equilibrium in a stochastic n-person game. J Sci Hiroshima Univ Ser A-I Math 28:89\u201393. https:\/\/doi.org\/10.32917\/hmj\/1206139508","journal-title":"J Sci Hiroshima Univ Ser A-I Math"},{"key":"1788_CR39","doi-asserted-by":"publisher","first-page":"7256","DOI":"10.1109\/TII.2021.3055817","volume":"17","author":"X Gao","year":"2021","unstructured":"Gao X, Chan KW, Xia S, Zhang X, Zhang K, Zhou J (2021) A multiagent competitive bidding strategy in a pool-based electricity market with price-maker participants of WPPs and EV aggregators. IEEE Trans Ind Inform 17:7256\u20137268. https:\/\/doi.org\/10.1109\/TII.2021.3055817","journal-title":"IEEE Trans Ind Inform"},{"key":"1788_CR40","doi-asserted-by":"publisher","first-page":"18320","DOI":"10.48550\/arXiv.2106.02748","volume":"34","author":"M Sayin","year":"2021","unstructured":"Sayin M, Zhang K, Leslie D, Basar T, Ozdaglar A (2021) Decentralized Q-learning in zero-sum Markov games. Adv Neur Inform Process Syst (NIPS) 34:18320\u201318334. https:\/\/doi.org\/10.48550\/arXiv.2106.02748","journal-title":"Adv Neur Inform Process Syst (NIPS)"},{"key":"1788_CR41","doi-asserted-by":"publisher","first-page":"659","DOI":"10.1613\/jair.4818","volume":"53","author":"D Bloembergen","year":"2015","unstructured":"Bloembergen D, Tuyls K, Hennes D, Kaisers M (2015) Evolutionary dynamics of multi-agent learning: a survey. J Artif Intell Res 53:659\u2013697. https:\/\/doi.org\/10.1613\/jair.4818","journal-title":"J Artif Intell Res"},{"key":"1788_CR42","doi-asserted-by":"publisher","unstructured":"Fiester C, Gomez-Ibanez D, Grund M, Purcell M, Jaffre F, Forrester N et al (2019) A modular, compact, and efficient next generation remus 600 auv. In: OCEANS 2019-Marseille. IEEE, pp 1\u20136 https:\/\/doi.org\/10.1109\/OCEANSE.2019.8867248","DOI":"10.1109\/OCEANSE.2019.8867248"},{"key":"1788_CR43","doi-asserted-by":"publisher","DOI":"10.3969\/j.issn.1000-1093.2020.08.022","author":"R Zheng","year":"2020","unstructured":"Zheng R, Xin C, Tang Z, Song T (2020) Review on the platform technology of autonomous deployment of AUV by USV. Acta Aliment Hung. https:\/\/doi.org\/10.3969\/j.issn.1000-1093.2020.08.022","journal-title":"Acta Aliment Hung"}],"container-title":["Complex &amp; Intelligent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-025-01788-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s40747-025-01788-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-025-01788-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,30]],"date-time":"2025-03-30T21:28:12Z","timestamp":1743370092000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s40747-025-01788-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,13]]},"references-count":43,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,4]]}},"alternative-id":["1788"],"URL":"https:\/\/doi.org\/10.1007\/s40747-025-01788-x","relation":{},"ISSN":["2199-4536","2198-6053"],"issn-type":[{"type":"print","value":"2199-4536"},{"type":"electronic","value":"2198-6053"}],"subject":[],"published":{"date-parts":[[2025,3,13]]},"assertion":[{"value":"3 July 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 January 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 March 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"207"}}