{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,17]],"date-time":"2025-12-17T13:07:06Z","timestamp":1765976826104,"version":"3.44.0"},"reference-count":55,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T00:00:00Z","timestamp":1743120000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T00:00:00Z","timestamp":1743120000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["61573236"],"award-info":[{"award-number":["61573236"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2025,5]]},"DOI":"10.1007\/s10489-025-06380-x","type":"journal-article","created":{"date-parts":[[2025,3,30]],"date-time":"2025-03-30T18:54:54Z","timestamp":1743360894000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["A deep reinforcement learning approach and its application in multi-USV adversarial game simulation"],"prefix":"10.1007","volume":"55","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7998-2091","authenticated-orcid":false,"given":"Jinjun","family":"Rao","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0006-4575-6030","authenticated-orcid":false,"given":"Cong","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0078-7029","authenticated-orcid":false,"given":"Mei","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Jinbo","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Jingtao","family":"Lei","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1747-4010","authenticated-orcid":false,"given":"Wojciech","family":"Giernacki","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,3,28]]},"reference":[{"issue":"2","key":"6380_CR1","doi-asserted-by":"publisher","first-page":"61","DOI":"10.13788\/j.cnki.cbgc.2023.02.08","volume":"45","author":"Y Ping","year":"2023","unstructured":"Ping Y, Liu WB, Miao ZY et al (2023) Research status and development trend of intelligent unmanned surface vehicle. Shipengineering 45(2):61\u201369. https:\/\/doi.org\/10.13788\/j.cnki.cbgc.2023.02.08. (in Chinese)","journal-title":"Shipengineering"},{"key":"6380_CR2","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1016\/j.apor.2018.12.001","volume":"83","author":"R Song","year":"2019","unstructured":"Song R, Liu Y, Bucknall R (2019) Smoothed A* algorithm for practical unmanned surface vehicle path planning. Appl Ocean Res 83:9\u201320. https:\/\/doi.org\/10.1016\/j.apor.2018.12.001","journal-title":"Appl Ocean Res"},{"key":"6380_CR3","doi-asserted-by":"publisher","first-page":"112147","DOI":"10.1016\/j.oceaneng.2022.112147","volume":"261","author":"J Rao","year":"2022","unstructured":"Rao J, Xu X, Bian H et al (2022) A modified random network distillation algorithm and its application in USVs naval battle simulation. Ocean Eng 261:112147. https:\/\/doi.org\/10.1016\/j.oceaneng.2022.112147","journal-title":"Ocean Eng"},{"key":"6380_CR4","doi-asserted-by":"publisher","DOI":"10.1016\/j.isatra.2023.01.007","author":"Y Wang","year":"2023","unstructured":"Wang Y, Liu W, Liu J, Sun C (2023) Cooperative USV-UAV marine search and rescue with visual navigation and reinforcement learning-based control. ISA Trans. https:\/\/doi.org\/10.1016\/j.isatra.2023.01.007","journal-title":"ISA Trans"},{"key":"6380_CR5","doi-asserted-by":"publisher","first-page":"105201","DOI":"10.1016\/j.knosys.2019.105201","volume":"196","author":"X Wu","year":"2020","unstructured":"Wu X, Chen H, Chen C et al (2020) The autonomous navigation and obstacle avoidance for USVs with ANOA deep reinforcement learning method. Knowledge-Based Systems 196:105201. https:\/\/doi.org\/10.1016\/j.knosys.2019.105201","journal-title":"Knowledge-Based Systems"},{"issue":"S1","key":"6380_CR6","doi-asserted-by":"publisher","first-page":"52","DOI":"10.3969\/j.issn.1000-4882.2020.z1.007.(inChinese)","volume":"61","author":"Y Zhuang","year":"2020","unstructured":"Zhuang Y, Li Y, Huang H, Wang X, Jiang L (2020) Deployment algorithm of unmanned surface vehicle swarm for field coverage. Shipbuild China 61(S1):52\u201359. https:\/\/doi.org\/10.3969\/j.issn.1000-4882.2020.z1.007.(inChinese)","journal-title":"Shipbuild China"},{"key":"6380_CR7","volume-title":"Deep Learning","author":"I Goodfellow","year":"2016","unstructured":"Goodfellow I, Bengio Y, Courville A (2016) Deep Learning. MIT Press, Cambridge, USA"},{"key":"6380_CR8","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton RS, Barto AG (2018) Reinforcement Learning: An Introduction. A Bradford Book, Cambridge, USA"},{"issue":"6","key":"6380_CR9","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1109\/MSP.2017.2743240","volume":"34","author":"K Arulkumaran","year":"2017","unstructured":"Arulkumaran K, Deisenroth MP, Brundage M, Bharath AA (2017) Deep reinforcement learning: a brief survey. IEEE Signal Process Mag 34(6):26\u201338. https:\/\/doi.org\/10.1109\/MSP.2017.2743240","journal-title":"IEEE Signal Process Mag"},{"key":"6380_CR10","doi-asserted-by":"publisher","unstructured":"Pathak D, Agrawal P, Efros AA, Darrell T (2017) Curiosity-Driven Exploration by Self-Supervised Prediction. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), Honolulu, HI, USA 488\u2013489. https:\/\/doi.org\/10.1109\/CVPRW.2017.70","DOI":"10.1109\/CVPRW.2017.70"},{"key":"6380_CR11","doi-asserted-by":"publisher","unstructured":"Cohen A, Teng E, Berges V-P, Dong R-P, Henry H, Mattar M, Zook A, Ganguly S (2021) On the use and misuse of absorbing states in multi-agent reinforcement learning. arXiv preprint https:\/\/doi.org\/10.48550\/arXiv.2111.05992","DOI":"10.48550\/arXiv.2111.05992"},{"key":"6380_CR12","doi-asserted-by":"publisher","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O (2017) Proximal policy optimization algorithms. arXiv preprint https:\/\/doi.org\/10.48550\/arXiv.1707.06347","DOI":"10.48550\/arXiv.1707.06347"},{"issue":"7540","key":"6380_CR13","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533. https:\/\/doi.org\/10.1038\/nature14236","journal-title":"Nature"},{"issue":"7587","key":"6380_CR14","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver D, Huang A, Maddison CJ et al (2016) Mastering the game of Go with deep neural networks and tree search. Nature 529(7587):484\u2013489. https:\/\/doi.org\/10.1038\/nature16961","journal-title":"Nature"},{"issue":"7676","key":"6380_CR15","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver D, Schrittwieser J, Simonyan K et al (2017) Mastering the game of go without human knowledge. Nature 550(7676):354\u2013359. https:\/\/doi.org\/10.1038\/nature24270","journal-title":"Nature"},{"issue":"3-4","key":"6380_CR16","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/BF00992698","volume":"8","author":"CJCH Watkins","year":"1992","unstructured":"Watkins CJCH, Dayan P (1992) Technical Note: Q-Learning. Mach Learn 8(3\u20134):279\u2013292. https:\/\/doi.org\/10.1007\/BF00992698","journal-title":"Mach Learn"},{"key":"6380_CR17","doi-asserted-by":"publisher","unstructured":"Sutton RS, McAllester D, Singh S, Mansour Y (1999) Policy gradient methods for reinforcement learning with function approximation. In: Proceedings of the 12th International Conference on Neural Information Processing Systems (NIPS), MIT Press, Cambridge, USA, 1057\u20131063. https:\/\/doi.org\/10.5555\/3009657.3009806","DOI":"10.5555\/3009657.3009806"},{"key":"6380_CR18","doi-asserted-by":"publisher","first-page":"e0172395","DOI":"10.1371\/journal.pone.0172395","volume":"12","author":"A Tampuu","year":"2017","unstructured":"Tampuu A, Matiisen T, Kodelja D, Kuzovkin I, Korjus K, Aru J, Aru J, Vicente R (2017) Multiagent cooperation and competition with deep reinforcement learning. Plos One 12:e0172395. https:\/\/doi.org\/10.1371\/journal.pone.0172395","journal-title":"Plos One"},{"key":"6380_CR19","doi-asserted-by":"publisher","unstructured":"Leibo JZ, Zambaldi V, Lanctot M, Marecki J, Graepel T (2017) Multi-agent reinforcement learning in sequential social dilemmas, in: Proceedings of the 16th Conference on Autonomous Agents and MultiAgent Systems (AAMAS). International Foundation for Autonomous Agents and Multiagent Systems, Richland, SC, 464\u2013473. https:\/\/doi.org\/10.5555\/3091125.3091194","DOI":"10.5555\/3091125.3091194"},{"key":"6380_CR20","doi-asserted-by":"publisher","unstructured":"Foerster JN, Assael YM, Freitas N, Whiteson S (2016) Learning to communicate with deep multi-agent reinforcement learning. In: Proceedings of the 30th International Conference on Neural Information Processing Systems (NIPS). Curran Associates Inc., Red Hook, NY, USA, 2145\u20132153. https:\/\/doi.org\/10.5555\/3157096.3157336","DOI":"10.5555\/3157096.3157336"},{"key":"6380_CR21","doi-asserted-by":"publisher","unstructured":"Diallo EAO, Sugiyama A, Sugawara T (2017) Learning to coordinate with deep reinforcement learning in doubles pong game. In: 2017 16th IEEE International Conference on Machine Learning and Applications (ICMLA), Cancun, Mexico. 14\u201319. https:\/\/doi.org\/10.1109\/ICMLA.2017.0-184","DOI":"10.1109\/ICMLA.2017.0-184"},{"key":"6380_CR22","doi-asserted-by":"publisher","unstructured":"Bansal T, Pachocki J, Sidor S, Sutskever I, Mordatch I (2017) Emergent complexity via multi-agent competition. arXiv preprint https:\/\/doi.org\/10.48550\/arXiv.1710.03748","DOI":"10.48550\/arXiv.1710.03748"},{"issue":"2-3","key":"6380_CR23","doi-asserted-by":"publisher","first-page":"311","DOI":"10.1016\/0004-3702(92)90058-6","volume":"55","author":"S Mahadevan","year":"1992","unstructured":"Mahadevan S, Connell J (1992) Automatic programming of behavior-based robots using reinforcement learning. Artif Intell 55(2\u20133):311\u2013365. https:\/\/doi.org\/10.1016\/0004-3702(92)90058-6","journal-title":"Artif Intell"},{"key":"6380_CR24","doi-asserted-by":"publisher","unstructured":"Ng AY, Harada D, Russell S (1999) Policy invariance under reward transformations: theory and application to reward shaping. In: Proceedings of the Sixteenth International Conference on Machine Learning (ICML), Morgan Kaufmann Publishers Inc., San Francisco, CA, USA, 278\u2013287. https:\/\/doi.org\/10.5555\/645528.657613","DOI":"10.5555\/645528.657613"},{"issue":"7782","key":"6380_CR25","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","volume":"575","author":"O Vinyals","year":"2019","unstructured":"Vinyals O, Babuschkin I, Czarnecki WM et al (2019) Grandmaster level in StarCraft II using multi-agent reinforcement learning. Nature 575(7782):350\u2013354. https:\/\/doi.org\/10.1038\/s41586-019-1724-z","journal-title":"Nature"},{"key":"6380_CR26","doi-asserted-by":"publisher","unstructured":"Vinyals O, Ewalds T, Bartunov S et al. (2017) StarCraft II: A new challenge for reinforcement learning. arXiv preprint. https:\/\/doi.org\/10.48550\/arXiv.1708.04782","DOI":"10.48550\/arXiv.1708.04782"},{"key":"6380_CR27","doi-asserted-by":"publisher","unstructured":"Raiman J, Zhang S, Wolski F (2019) Long-Term Planning and Situational Awareness in OpenAI Five. arXiv preprint https:\/\/doi.org\/10.48550\/arXiv.1912.06721","DOI":"10.48550\/arXiv.1912.06721"},{"issue":"6443","key":"6380_CR28","doi-asserted-by":"publisher","first-page":"859","DOI":"10.1126\/science.aau6249","volume":"364","author":"M Jaderberg","year":"2019","unstructured":"Jaderberg M, Czarnecki WM, Dunning I et al (2019) Human-level performance in 3D multiplayer games with population-based reinforcement learning. Science 364(6443):859\u2013865. https:\/\/doi.org\/10.1126\/science.aau6249","journal-title":"Science"},{"key":"6380_CR29","doi-asserted-by":"publisher","unstructured":"Baker B, Kanitscheider I, Markov T et al. (2019) Emergent tool use from multi-agent autocurricula. arXiv preprint https:\/\/doi.org\/10.48550\/arXiv.1909.07528","DOI":"10.48550\/arXiv.1909.07528"},{"issue":"6","key":"6380_CR30","doi-asserted-by":"publisher","first-page":"1526","DOI":"10.1007\/s11263-022-01611-x","volume":"130","author":"P Soviany","year":"2022","unstructured":"Soviany P, Ionescu RT, Rota P, Sebe N (2022) Curriculum Learning: A Survey. Int J Comput Vision 130(6):1526\u20131565. https:\/\/doi.org\/10.1007\/s11263-022-01611-x","journal-title":"Int J Comput Vision"},{"key":"6380_CR31","doi-asserted-by":"publisher","unstructured":"Ciosek K (2021) Imitation learning by reinforcement learning. arXiv preprint https:\/\/doi.org\/10.48550\/arXiv.2108.04763","DOI":"10.48550\/arXiv.2108.04763"},{"issue":"3","key":"6380_CR32","doi-asserted-by":"publisher","first-page":"1243","DOI":"10.1109\/TNNLS.2021.3105140","volume":"34","author":"H Xu","year":"2023","unstructured":"Xu H, Szymanski L, McCane B (2023) VASE: Variational Assorted Surprise Exploration for reinforcement learning. IEEE Trans Neural Netw Learn Syst 34(3):1243\u20131252. https:\/\/doi.org\/10.1109\/TNNLS.2021.3105140","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"issue":"03","key":"6380_CR33","doi-asserted-by":"publisher","first-page":"223","DOI":"10.16451\/j.cnki.issn1003-6059.202103004","volume":"34","author":"B Fang","year":"2021","unstructured":"Fang B, Ma Y, Wang Z, Wang H (2021) Emotion-based heterogeneous multi-agent reinforcement learning with sparse reward. Pattern Recogn Artif Intell 34(03):223\u2013231. https:\/\/doi.org\/10.16451\/j.cnki.issn1003-6059.202103004. (in Chinese)","journal-title":"Pattern Recogn Artif Intell"},{"issue":"7","key":"6380_CR34","doi-asserted-by":"publisher","first-page":"6180","DOI":"10.1109\/JIOT.2020.2973193","volume":"7","author":"C Wang","year":"2020","unstructured":"Wang C, Wang J, Wang J, Zhang X (2020) Deep-reinforcement-learning-based autonomous UAV navigation with sparse rewards. IEEE Internet Things J 7(7):6180\u20136190. https:\/\/doi.org\/10.1109\/JIOT.2020.2973193","journal-title":"IEEE Internet Things J"},{"issue":"3","key":"6380_CR35","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1145\/203330.203343","volume":"38","author":"G Tesauro","year":"1995","unstructured":"Tesauro G (1995) Temporal difference learning and TD-Gammon. Commun ACM 38(3):58\u201368. https:\/\/doi.org\/10.1145\/203330.203343","journal-title":"Commun ACM"},{"key":"6380_CR36","doi-asserted-by":"publisher","unstructured":"Heinrich J, Lanctot M, Silver D (2015) Fictitious self-play in extensive-form games. In: Proceedings of the 32nd International Conference on Machine Learning. PMLR 37:805\u2013813. https:\/\/doi.org\/10.5555\/3045118.3045205","DOI":"10.5555\/3045118.3045205"},{"key":"6380_CR37","doi-asserted-by":"publisher","unstructured":"Sukhbaatar S, Lin Z, Kostrikov I, Synnaeve G, Szlam A, Fergus R (2017) Intrinsic motivation and automatic curricula via asymmetric Self-Play. arXiv preprint. https:\/\/doi.org\/10.48550\/arXiv.1703.05407","DOI":"10.48550\/arXiv.1703.05407"},{"key":"6380_CR38","doi-asserted-by":"publisher","first-page":"207","DOI":"10.1016\/j.neucom.2021.04.006","volume":"449","author":"S Liu","year":"2021","unstructured":"Liu S, Cao J, Wang Y, Chen W, Liu Y (2021) Self-play reinforcement learning with comprehensive critic in computer games. Neurocomputing 449:207\u2013213. https:\/\/doi.org\/10.1016\/j.neucom.2021.04.006","journal-title":"Neurocomputing"},{"issue":"4","key":"6380_CR39","doi-asserted-by":"publisher","first-page":"2603","DOI":"10.1109\/TRO.2023.3257541","volume":"39","author":"Y Gao","year":"2023","unstructured":"Gao Y, Chen J, Chen X, Wang C, Hu J, Deng F, Lam TL (2023) Asymmetric Self-Play-enabled intelligent heterogeneous multirobot catching system using deep multiagent reinforcement learning. IEEE Trans Rob 39(4):2603\u20132622. https:\/\/doi.org\/10.1109\/TRO.2023.3257541","journal-title":"IEEE Trans Rob"},{"key":"6380_CR40","doi-asserted-by":"publisher","unstructured":"Sutton RS (n.d.) Temporal credit assignment in reinforcement learning. Ph.D. Dissertation, Universityof Massachusetts Amherst, Order Number: AAI8410337. https:\/\/doi.org\/10.5555\/911176","DOI":"10.5555\/911176"},{"key":"6380_CR41","doi-asserted-by":"publisher","first-page":"108466","DOI":"10.1016\/j.patcog.2021.108466","volume":"124","author":"D Yan","year":"2022","unstructured":"Yan D, Weng J, Huang S, Li C, Zhou Y, Su H, Zhu J (2022) Deep reinforcement learning with credit assignment for combinatorial optimization. Pattern Recognition 124:108466. https:\/\/doi.org\/10.1016\/j.patcog.2021.108466","journal-title":"Pattern Recognition"},{"key":"6380_CR42","unstructured":"Mesnard T, Weber T, Viola F et al. (2021) Counterfactual Credit Assignment in Model-Free Reinforcement Learning. In: Proceedings of the 38th International Conference on Machine Learning, PMLR 139:7654\u20137664. http:\/\/proceedings.mlr.press\/v139\/mesnard21a\/mesnard21a.pdf"},{"key":"6380_CR43","doi-asserted-by":"publisher","unstructured":"Liu Y, Luo Y, Zhong Y, Chen X, Liu Q, Peng J (n.d.) Sequence modeling of temporal credit assignment for episodic reinforcement learning, arXiv preprint https:\/\/doi.org\/10.48550\/arXiv.1905.13420","DOI":"10.48550\/arXiv.1905.13420"},{"key":"6380_CR44","doi-asserted-by":"publisher","first-page":"110709","DOI":"10.1016\/j.knosys.2023.110709","volume":"275","author":"S Wang","year":"2023","unstructured":"Wang S, Chen W, Huang L, Zhang F, Zhao Z, Qu H (2023) Regularization-Adapted Anderson Acceleration for multi-agent reinforcement learning. Knowledge-Based Syst 275:110709. https:\/\/doi.org\/10.1016\/j.knosys.2023.110709","journal-title":"Knowledge-Based Syst"},{"key":"6380_CR45","doi-asserted-by":"publisher","unstructured":"Leroy P, Pisane J, Ernst D (2022) Value-based CTDE methods in symmetric two-team Markov Game: from cooperation to team competition. arXiv preprint. https:\/\/doi.org\/10.48550\/arXiv.2211.11886","DOI":"10.48550\/arXiv.2211.11886"},{"key":"6380_CR46","doi-asserted-by":"publisher","unstructured":"Lyu X, Xiao Y, Daley B, Amato C (2021) Contrasting centralized and decentralized critics in multi-agent reinforcement learning, in: Proceedings of the 20th International Conference on Autonomous Agents and MultiAgent Systems (AAMAS). International Foundation for Autonomous Agents and Multiagent Systems, Richland, SC, 844\u2013852. https:\/\/doi.org\/10.5555\/3463952.3464053","DOI":"10.5555\/3463952.3464053"},{"key":"6380_CR47","doi-asserted-by":"publisher","unstructured":"Vaswani A, Shazeer N, Parmar N et al. (2017) Attention is all you need. In: Proceedings of the 31st International Conference on Neural Information Processing Systems (NIPS). Curran Associates Inc., Red Hook, NY, USA 6000\u20136010. https:\/\/doi.org\/10.5555\/3295222.3295349","DOI":"10.5555\/3295222.3295349"},{"key":"6380_CR48","doi-asserted-by":"publisher","unstructured":"Foerster J, Farquhar G, Afouras T, Nardelli N, Whiteson S (2018) Counterfactual multi-agent policy gradients. In: Proceedings of the AAAI Conference on Artificial Intelligence. 32(1). https:\/\/doi.org\/10.1609\/aaai.v32i1.11794","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"6380_CR49","doi-asserted-by":"publisher","unstructured":"Oliehoek FA, Amato C (2016) A Concise Introduction to Decentralized POMDPs, first ed., Vol. 1. Cham, Switzerland: Springer International Publishing. https:\/\/doi.org\/10.1007\/978-3-319-28929-8","DOI":"10.1007\/978-3-319-28929-8"},{"key":"6380_CR50","doi-asserted-by":"publisher","unstructured":"Schulman J, Levine S, Moritz P, Jordan M, Abbeel P (2015) Trust region policy optimization. In: Proceedings of the 32nd International Conference on Machine Learning. PMLR 37:1889-1897. https:\/\/doi.org\/10.5555\/3045118.3045319","DOI":"10.5555\/3045118.3045319"},{"issue":"1","key":"6380_CR51","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1023\/A:1022633531479","volume":"3","author":"RS Sutton","year":"1988","unstructured":"Sutton RS (1988) Learning to predict by the methods of temporal differences. Mach Learn 3(1):9\u201344. https:\/\/doi.org\/10.1023\/A:1022633531479","journal-title":"Mach Learn"},{"issue":"2","key":"6380_CR52","doi-asserted-by":"publisher","first-page":"289","DOI":"10.1016\/j.automatica.2004.10.006","volume":"41","author":"R Skjetne","year":"2005","unstructured":"Skjetne R, Fossen TI, Kokotovi\u0107 PV (2005) Adaptive maneuvering, with experiments, for a model ship in a marine control laboratory. Automatica 41(2):289\u2013298. https:\/\/doi.org\/10.1016\/j.automatica.2004.10.006","journal-title":"Automatica"},{"key":"6380_CR53","doi-asserted-by":"publisher","DOI":"10.1002\/9781119575016","volume-title":"Handbook of marine craft hydrodynamics and motion control","author":"TI Fossen","year":"2021","unstructured":"Fossen TI (2021) Handbook of marine craft hydrodynamics and motion control, 2nd edn. Wiley, England","edition":"2"},{"key":"6380_CR54","unstructured":"Qin Z (2019) The study on motion control and swarm coordinated planning for Unmanned Surface Vehicles (USV), Harbin Engineering University. PhD dissertation. in Chinese"},{"key":"6380_CR55","doi-asserted-by":"publisher","unstructured":"Juliani A, Berges V-P, Teng E et al. (2018) Unity: A general platform for intelligent agents. arXiv preprint https:\/\/doi.org\/10.48550\/arXiv.1809.02627","DOI":"10.48550\/arXiv.1809.02627"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-025-06380-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-025-06380-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-025-06380-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T19:38:20Z","timestamp":1758310700000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-025-06380-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,28]]},"references-count":55,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2025,5]]}},"alternative-id":["6380"],"URL":"https:\/\/doi.org\/10.1007\/s10489-025-06380-x","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"type":"print","value":"0924-669X"},{"type":"electronic","value":"1573-7497"}],"subject":[],"published":{"date-parts":[[2025,3,28]]},"assertion":[{"value":"14 February 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 March 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Ethical approval was not needed for this research.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval and informed consent"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"591"}}