{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T19:38:22Z","timestamp":1776109102933,"version":"3.50.1"},"reference-count":31,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["3072024XX0602"],"award-info":[{"award-number":["3072024XX0602"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100013804","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100013804","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Engineering Applications of Artificial Intelligence"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.engappai.2026.114465","type":"journal-article","created":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T10:50:56Z","timestamp":1773831056000},"page":"114465","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Dynamic residual multi-stage replay policy gradient method for multi-agent cooperation and competition"],"prefix":"10.1016","volume":"174","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0281-0336","authenticated-orcid":false,"given":"Xingmei","family":"Wang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0002-3161-7700","authenticated-orcid":false,"given":"Junzheng","family":"Xu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0007-5817-6796","authenticated-orcid":false,"given":"Haotian","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5016-1721","authenticated-orcid":false,"given":"Zining","family":"Yan","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.engappai.2026.114465_b1","series-title":"Neural machine translation by jointly learning to align and translate","author":"Bahdanau","year":"2015"},{"key":"10.1016\/j.engappai.2026.114465_b2","series-title":"Neural machine translation by jointly learning to align and translate","author":"Bahdanau","year":"2016"},{"key":"10.1016\/j.engappai.2026.114465_b3","series-title":"Prioritized sequence experience replay","author":"Brittain","year":"2019"},{"key":"10.1016\/j.engappai.2026.114465_b4","series-title":"Addressing function approximation error in actor-critic methods","author":"Fujimoto","year":"2018"},{"key":"10.1016\/j.engappai.2026.114465_b5","series-title":"1999 Ninth International Conference on Artificial Neural Networks ICANN 99. (Conf. Publ. No. 470)","first-page":"850","article-title":"Learning to forget: continual prediction with LSTM","volume":"2","author":"Gers","year":"1999"},{"issue":"10","key":"10.1016\/j.engappai.2026.114465_b6","doi-asserted-by":"crossref","first-page":"2451","DOI":"10.1162\/089976600300015015","article-title":"Learning to forget: Continual prediction with LSTM","volume":"12","author":"Gers","year":"2000","journal-title":"Neural Comput."},{"key":"10.1016\/j.engappai.2026.114465_b7","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., et al., 2018. Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor. In: International Conference on Machine Learning. pp. 1861\u20131870."},{"key":"10.1016\/j.engappai.2026.114465_b8","doi-asserted-by":"crossref","unstructured":"Hado, V.H., Guez, A., Silver, D., 2016. Deep Reinforcement Learning with Double Q-learning. In: Proceedings of the 30th AAAI Conference on Artificial Intelligence. Phoenix, pp. 2094\u20132100.","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"10.1016\/j.engappai.2026.114465_b9","series-title":"CVPR","first-page":"770","article-title":"Deep residual learning for image recognition","author":"He","year":"2016"},{"key":"10.1016\/j.engappai.2026.114465_b10","doi-asserted-by":"crossref","unstructured":"Hessel, M., Modayil, M., Hasselt, V. H., et al., 2018. Rainbow: Combining Improvements in Deep Reinforcement Learning. In: Proceedings of the 32nd AAAI Conference on Artificial Intelligence. Orleans, pp. 3215\u20133222.","DOI":"10.1609\/aaai.v32i1.11796"},{"issue":"6","key":"10.1016\/j.engappai.2026.114465_b11","doi-asserted-by":"crossref","first-page":"120","DOI":"10.1109\/MWC.2017.1700363","article-title":"Stackelberg game approaches for anti-jamming defence in wireless networks","volume":"25","author":"Jia","year":"2018","journal-title":"IEEE Wirel. Commun."},{"key":"10.1016\/j.engappai.2026.114465_b12","article-title":"Distributed pursuit-evasion game of limited perception USV swarm based on multiagent proximal policy optimization","author":"Li","year":"2024","journal-title":"IEEE Trans. Syst. Man, Cybern.: Syst."},{"key":"10.1016\/j.engappai.2026.114465_b13","series-title":"Continuous control with deep reinforcement learning","author":"Lillicrap","year":"2015"},{"key":"10.1016\/j.engappai.2026.114465_b14","doi-asserted-by":"crossref","unstructured":"Littman, M.L., 1994. Markov Games as a Framework for Multi-Agent Reinforcement Learning. In: Proceedings of the Eleventh International Conference on Machine Learning. Vol. 157, pp. 157\u2013163.","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"10.1016\/j.engappai.2026.114465_b15","doi-asserted-by":"crossref","unstructured":"Liu, R., Zou, J., 2018. The Effects of Memory Replay in Reinforcement Learning. In: Annual Allerton Conference on Communication, Control, and Computing. pp. 478\u2013485.","DOI":"10.1109\/ALLERTON.2018.8636075"},{"key":"10.1016\/j.engappai.2026.114465_b16","unstructured":"Lowe, Ryan, Wu, Yi, Tamar, Aviv, Harb, Jean, Abbeel, Pieter, Mordatch, Igor, Multi-agent actor-critic for mixed cooperative-competitive environments. 30."},{"key":"10.1016\/j.engappai.2026.114465_b17","series-title":"MAC-PO: Multi-agent experience replay via collective priority optimization","first-page":"466","author":"Mei","year":"2023"},{"key":"10.1016\/j.engappai.2026.114465_b18","series-title":"Multi-agent cooperation through learning-aware policy gradients","author":"Meulemans","year":"2025"},{"key":"10.1016\/j.engappai.2026.114465_b19","series-title":"Asynchronous methods for deep reinforcement learning","author":"Mnih","year":"2016"},{"key":"10.1016\/j.engappai.2026.114465_b20","doi-asserted-by":"crossref","unstructured":"Qu, Xiuqing, Gan, Wenhao, Song, Dalei, Zhou, Liqin, Pursuit-evasion game strategy of USV Based on deep reinforcement learning in complex multi-obstacle environment 273 (2023).","DOI":"10.1016\/j.oceaneng.2023.114016"},{"key":"10.1016\/j.engappai.2026.114465_b21","series-title":"K-level policy gradients for multi-agent reinforcement learning","author":"Reddi","year":"2025"},{"key":"10.1016\/j.engappai.2026.114465_b22","series-title":"Prioritized experience replay","author":"Schaul","year":"2016"},{"key":"10.1016\/j.engappai.2026.114465_b23","series-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"},{"key":"10.1016\/j.engappai.2026.114465_b24","series-title":"Proceedings of the 40th International Conference on Machine Learning","article-title":"Complementary attention for multi-agent reinforcement learning","author":"Shao","year":"2023"},{"key":"10.1016\/j.engappai.2026.114465_b25","unstructured":"Vanseijen, H., Sutton, R, 2015. A Deeper Look at Planning as Learning from Replay. In: Proceedings of the International Conference on Machine Learning. pp. 2314\u20132322."},{"key":"10.1016\/j.engappai.2026.114465_b26","first-page":"279","article-title":"Q-learning","volume":"8","author":"Watkins","year":"1992","journal-title":"Mach. Learn."},{"key":"10.1016\/j.engappai.2026.114465_b27","doi-asserted-by":"crossref","first-page":"155","DOI":"10.1016\/j.oceaneng.2019.04.099","article-title":"Deep reinforcement learning-based controller for path following of an unmanned surface vehicle","volume":"183","author":"Woo","year":"2019","journal-title":"Ocean Eng."},{"key":"10.1016\/j.engappai.2026.114465_b28","doi-asserted-by":"crossref","DOI":"10.1016\/j.oceaneng.2024.118342","article-title":"Deep reinforcement learning with intrinsic curiosity module based trajectory tracking control for USV","volume":"308","author":"Wu","year":"2024","journal-title":"Ocean Eng."},{"key":"10.1016\/j.engappai.2026.114465_b29","doi-asserted-by":"crossref","DOI":"10.1016\/j.neunet.2025.107574","article-title":"Multi-agent self-attention reinforcement learning for multi-USV hunting target","author":"Xue","year":"2025","journal-title":"Neural Netw."},{"key":"10.1016\/j.engappai.2026.114465_b30","series-title":"Sample-efficient multiagent reinforcement learning with reset replay","author":"Yang","year":"2024"},{"key":"10.1016\/j.engappai.2026.114465_b31","series-title":"Experience replay optimization","first-page":"4243","author":"Zha","year":"2019"}],"container-title":["Engineering Applications of Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0952197626007463?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0952197626007463?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T18:08:21Z","timestamp":1776103701000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0952197626007463"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":31,"alternative-id":["S0952197626007463"],"URL":"https:\/\/doi.org\/10.1016\/j.engappai.2026.114465","relation":{},"ISSN":["0952-1976"],"issn-type":[{"value":"0952-1976","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Dynamic residual multi-stage replay policy gradient method for multi-agent cooperation and competition","name":"articletitle","label":"Article Title"},{"value":"Engineering Applications of Artificial Intelligence","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.engappai.2026.114465","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"114465"}}