{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,20]],"date-time":"2026-06-20T03:24:08Z","timestamp":1781925848716,"version":"3.54.5"},"reference-count":109,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"Basic Science Research Program through the National Research Foundation of Korea (NRF) funded by the Ministry of Education","award":["NRF-2020R1A6A1A03038540"],"award-info":[{"award-number":["NRF-2020R1A6A1A03038540"]}]},{"DOI":"10.13039\/501100014188","name":"Korean Government through the Ministry of Science and ICT (MSIT), South Korea","doi-asserted-by":"publisher","award":["NRF-2023R1A2C1002656"],"award-info":[{"award-number":["NRF-2023R1A2C1002656"]}],"id":[{"id":"10.13039\/501100014188","id-type":"DOI","asserted-by":"publisher"}]},{"name":"ICT Challenge and Advanced Network of HRD Program through MSIT","award":["IITP-2025-RS-2022-00156345"],"award-info":[{"award-number":["IITP-2025-RS-2022-00156345"]}]},{"name":"Institute of Information and Communications Technology Planning and Evaluation (IITP)-Information Technology Research Center (ITRC) grant funded by the Korean Government","award":["IITP-2025-RS-2024-00437494"],"award-info":[{"award-number":["IITP-2025-RS-2024-00437494"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/access.2025.3609457","type":"journal-article","created":{"date-parts":[[2025,9,12]],"date-time":"2025-09-12T17:31:22Z","timestamp":1757698282000},"page":"161505-161528","source":"Crossref","is-referenced-by-count":5,"title":["A Survey of Multi-Agent Reinforcement Learning for Cooperative Control in Multi-AUV Systems"],"prefix":"10.1109","volume":"13","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4635-6400","authenticated-orcid":false,"given":"Arif","family":"Wibisono","sequence":"first","affiliation":[{"name":"Department of Intelligent Mechatronics Engineering, Sejong University, Seoul, South Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3274-4982","authenticated-orcid":false,"given":"Hyoung-Kyu","family":"Song","sequence":"additional","affiliation":[{"name":"Department of Convergence Engineering for Intelligent Drone, Sejong University, Seoul, South Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3675-929X","authenticated-orcid":false,"given":"Byung Moo","family":"Lee","sequence":"additional","affiliation":[{"name":"Department of Convergence Engineering for Intelligent Drone, Sejong University, Seoul, South Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2024.3521889"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2025.127256"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.3390\/jmse10070920"},{"key":"ref4","article-title":"Scaling multi agent reinforcement learning for underwater acoustic tracking via autonomous vehicles","author":"Gallici","year":"2025","journal-title":"arXiv:2505.08222"},{"key":"ref5","article-title":"Is Fisher all you need in the multi-AUV underwater target tracking task?","author":"Xu","year":"2024","journal-title":"arXiv:2412.03959"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/IECON51785.2023.10312315"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.3390\/s18113859"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.jfranklin.2023.01.020"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.3390\/app10041256"},{"key":"ref10","article-title":"State-of-the-art underwater vehicles and technologies enabling smart ocean: Survey and classifications","author":"Xu","year":"2024","journal-title":"arXiv:2412.18667"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.3390\/jmse11101863"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.oceaneng.2023.113861"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2004.1307121"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1155\/2022\/8154573"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2020.2993012"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2922060"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TCOMM.2021.3089476"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.oceaneng.2024.117634"},{"key":"ref19","article-title":"An introduction to centralized training for decentralized execution in cooperative multi-agent reinforcement learning","author":"Amato","year":"2024","journal-title":"arXiv:2409.03052"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCC.2007.913919"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.jai.2024.02.003"},{"key":"ref22","article-title":"Multi-agent reinforcement learning: A comprehensive survey","author":"Huh","year":"2023","journal-title":"arXiv:2312.10256"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-60990-0_12"},{"key":"ref24","article-title":"Game-theoretic multiagent reinforcement learning","author":"Yang","year":"2020","journal-title":"arXiv:2011.00583"},{"key":"ref25","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","author":"Lowe","year":"2017","journal-title":"arXiv:1706.02275"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.3390\/electronics14040820"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-022-10299-x"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-020-09938-y"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.3390\/app11114948"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-019-09421-1"},{"key":"ref31","article-title":"Value-decomposition networks for cooperative multi-agent learning","author":"Sunehag","year":"2017","journal-title":"arXiv:1706.05296"},{"key":"ref32","article-title":"QMIX: Monotonic value function factorisation for deep multi-agent reinforcement learning","author":"Rashid","year":"2018","journal-title":"arXiv:1803.11485"},{"key":"ref33","first-page":"5887","article-title":"QTRAN: Learning to factorize with transformation for cooperative multi-agent reinforcement learning","volume-title":"Proc. 36th Int. Conf. Mach. Learn. (ICML)","author":"Son"},{"key":"ref34","article-title":"QPLEX: Duplex dueling multi-agent Q-learning","author":"Wang","year":"2020","journal-title":"arXiv:2008.01062"},{"key":"ref35","first-page":"5571","article-title":"Mean field multi-agent reinforcement learning","volume-title":"Proc. 35th Int. Conf. Mach. Learn. (ICML)","author":"Yang"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2103.01955"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"ref38","first-page":"12208","article-title":"FACMAC: Factored multi-agent centralised policy gradients","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","author":"Peng"},{"key":"ref39","first-page":"13458","article-title":"Settling the variance of multi-agent policy gradients","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","author":"Kuba"},{"key":"ref40","article-title":"Order matters: Agent-by-agent policy optimization","author":"Wang","year":"2023","journal-title":"arXiv:2302.06205"},{"key":"ref41","first-page":"4190","article-title":"A unified game-theoretic approach to multiagent reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","volume":"30","author":"Lanctot"},{"key":"ref42","first-page":"2252","article-title":"Learning multiagent communication with backpropagation","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","volume":"29","author":"Sukhbaatar"},{"key":"ref43","first-page":"1","article-title":"Learning to communicate with deep multi-agent reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","author":"Foerster"},{"key":"ref44","article-title":"ROMA: Multi-agent reinforcement learning with emergent roles","author":"Wang","year":"2020","journal-title":"arXiv:2003.08039"},{"key":"ref45","first-page":"1804","article-title":"Opponent modeling in deep reinforcement learning","volume-title":"Proc. 33rd Int. Conf. Mach. Learn. (ICML)","author":"He"},{"key":"ref46","article-title":"Learning with opponent-learning awareness","author":"Foerster","year":"2017","journal-title":"arXiv:1709.04326"},{"key":"ref47","first-page":"1101","article-title":"Coordinating multi-agent reinforcement learning with limited communication","volume-title":"Proc. Int. Conf. Auto. Agents Multi-Agent Syst. (AAMAS)","author":"Zhang"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11492"},{"key":"ref49","article-title":"Learning to communicate to solve riddles with deep distributed recurrent Q-networks","author":"Foerster","year":"2016","journal-title":"arXiv:1602.02672"},{"key":"ref50","first-page":"14502","article-title":"Collaborating with humans without human data","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","author":"Strouse"},{"key":"ref51","article-title":"Multi-agent reinforcement learning in sequential social dilemmas","author":"Leibo","year":"2017","journal-title":"arXiv:1702.03037"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0172395"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/s11235-025-01279-x"},{"key":"ref54","first-page":"15032","article-title":"PettingZoo: Gym for multi-agent reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","author":"Terry"},{"key":"ref55","article-title":"Benchmarking multi-agent deep reinforcement learning algorithms in cooperative tasks","author":"Papoudakis","year":"2020","journal-title":"arXiv:2006.07869"},{"key":"ref56","volume-title":"Pettingzoo: Multi-Agent Reinforcement Learning Environments","author":"Terry","year":"2023"},{"key":"ref57","article-title":"OceanSim: A GPU-accelerated underwater robot perception simulation framework","author":"Song","year":"2025","journal-title":"arXiv:2503.01074"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.3390\/app10186393"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.5772\/24098"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3061308"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.4031\/002533208786861263"},{"key":"ref62","volume-title":"Multi-Agent Particle Environment","year":"2018"},{"key":"ref63","volume-title":"Uwsim: Underwater Simulator","year":"2015"},{"key":"ref64","volume-title":"Gazebo Simulation","year":"2024"},{"key":"ref65","volume-title":"UUV Simulator: Underwater Vehicle Simulator for Gazebo and Ros","year":"2024"},{"key":"ref66","volume-title":"UNAV-Sim: Unreal-Based Underwater Navigation Simulator","year":"2024"},{"key":"ref67","article-title":"Oceansim: GPU-accelerated synthetic underwater perception","author":"Wang","year":"2025","journal-title":"arXiv:2503.01074v1"},{"key":"ref68","volume-title":"Project Dave: Diverse Aquatic Virtual Environment","year":"2024"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.3390\/machines10070582"},{"key":"ref70","first-page":"2817","article-title":"Robust adversarial reinforcement learning","volume-title":"Proc. 34th Int. Conf. Mach. Learn. (ICML)","author":"Pinto"},{"key":"ref71","first-page":"1126","article-title":"Model-agnostic meta-learning for fast adaptation of deep networks","volume-title":"Proc. 34th Int. Conf. Mach. Learn.","author":"Finn"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2008.2005605"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2009.191"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386025"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-025-58125-6"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/TMECH.2025.3603065"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.15837\/ijccc.2024.5.6457"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.3390\/s24237490"},{"issue":"32","key":"ref79","first-page":"1","article-title":"Heterogeneous-agent reinforcement learning","volume":"25","author":"Zhong","year":"2023","journal-title":"J. Mach. Learn. Res."},{"key":"ref80","first-page":"1538","article-title":"TarMAC: Targeted multi-agent communication","volume-title":"Proc. 36th Int. Conf. Mach. Learn. (ICML)","author":"Das"},{"key":"ref81","article-title":"Learning to schedule communication in multi-agent reinforcement learning","author":"Kim","year":"2019","journal-title":"arXiv:1902.01554"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i16.29700"},{"key":"ref83","article-title":"Trust region policy optimisation in multi-agent reinforcement learning","author":"Kuba","year":"2021","journal-title":"arXiv:2109.11251"},{"key":"ref84","article-title":"Generalization in cooperative multi-agent systems","author":"Mahajan","year":"2022","journal-title":"arXiv:2202.00104"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/MCOM.2009.4752682"},{"key":"ref86","first-page":"1","article-title":"A survey of underwater vehicle navigation: Recent advances and new challenges","volume-title":"Proc. IFAC Conf. Manoeuvring Control Mar. Craft","author":"Eustice"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202133"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2017.xiii.034"},{"key":"ref89","article-title":"Understanding domain randomization for sim-to-real transfer","author":"Chen","year":"2021","journal-title":"arXiv:2110.03239"},{"key":"ref90","first-page":"1","article-title":"Gradient episodic memory for continual learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","author":"L\u00f3pez-Paz"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01252-6_33"},{"key":"ref92","article-title":"Don\u2019t forget, there is more than forgetting: New metrics for continual learning","author":"D\u00edaz-Rodr\u00edguez","year":"2018","journal-title":"arXiv:1810.13166"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/oceanskobe.2008.4531073"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00178"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.3390\/rs14174297"},{"key":"ref96","article-title":"Test-time training with self-supervision for generalization under distribution shifts","author":"Sun","year":"2019","journal-title":"arXiv:1909.13231"},{"key":"ref97","article-title":"Tent: Fully test-time adaptation by entropy minimization","author":"Wang","year":"2020","journal-title":"arXiv:2006.10726"},{"key":"ref98","article-title":"On pitfalls of test-time adaptation","author":"Zhao","year":"2023","journal-title":"arXiv:2306.03536"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2022.3149272"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.3390\/s24134421"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1109\/LCOMM.2021.3135688"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2023.3281668"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.3390\/rs16122068"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1016\/j.comnet.2022.109455"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.3390\/s22186949"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.3390\/s23052622"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2021.3052691"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.3390\/s23146417"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.13"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6287639\/10820123\/11162516.pdf?arnumber=11162516","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T17:38:33Z","timestamp":1758303513000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11162516\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":109,"URL":"https:\/\/doi.org\/10.1109\/access.2025.3609457","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]}}}