{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:05:33Z","timestamp":1775228733500,"version":"3.50.1"},"reference-count":44,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100005693","name":"Scientific Research Fund Project of Hainan University","doi-asserted-by":"publisher","award":["KYQD(ZR)-21007"],"award-info":[{"award-number":["KYQD(ZR)-21007"]}],"id":[{"id":"10.13039\/501100005693","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004761","name":"Natural Science Foundation of Hainan Province","doi-asserted-by":"publisher","award":["621QN212"],"award-info":[{"award-number":["621QN212"]}],"id":[{"id":"10.13039\/501100004761","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100017944","name":"Hainan Provincial Natural Science Foundation of China","doi-asserted-by":"publisher","award":["622RC618"],"award-info":[{"award-number":["622RC618"]}],"id":[{"id":"10.13039\/100017944","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62062030"],"award-info":[{"award-number":["62062030"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004543","name":"State Scholarship Fund, China Scholarship Council","doi-asserted-by":"publisher","award":["202207565036"],"award-info":[{"award-number":["202207565036"]}],"id":[{"id":"10.13039\/501100004543","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Intell. Transport. Syst."],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1109\/tits.2023.3296769","type":"journal-article","created":{"date-parts":[[2023,7,28]],"date-time":"2023-07-28T17:36:35Z","timestamp":1690565795000},"page":"14281-14293","source":"Crossref","is-referenced-by-count":41,"title":["Multi-Agent Reinforcement Learning With Policy Clipping and Average Evaluation for UAV-Assisted Communication Markov Game"],"prefix":"10.1109","volume":"24","author":[{"given":"Zikai","family":"Feng","sequence":"first","affiliation":[{"name":"School of Information and Communication Engineering, Hainan University, Haikou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8848-2566","authenticated-orcid":false,"given":"Mengxing","family":"Huang","sequence":"additional","affiliation":[{"name":"School of Information and Communication Engineering, Hainan University, Haikou, China"}]},{"given":"Di","family":"Wu","sequence":"additional","affiliation":[{"name":"School of Information and Communication Engineering, Hainan University, Haikou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1301-9870","authenticated-orcid":false,"given":"Edmond Q.","family":"Wu","sequence":"additional","affiliation":[{"name":"Department of Automation, Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9307-2120","authenticated-orcid":false,"given":"Chau","family":"Yuen","sequence":"additional","affiliation":[{"name":"School of Electrical and Electronic Engineering, Nanyang Technological University, Jurong West, Singapore"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2021.3082576"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1155\/2022\/1544447"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.3390\/s22093136"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TCOMM.2016.2611512"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TCOMM.2019.2947918"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TCOMM.2021.3049387"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2019.2933417"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2017.2789293"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2019.2890999"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TCOMM.2023.3240697"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2020.3042977"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.comnet.2022.108857"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.01.061"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2019.03.029"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/LNET.2020.2989130"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2021.3049555"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/s10723-020-09512-4"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.08.040"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1631\/FITEE.1900641"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"ref23","first-page":"1","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Lillicrap"},{"key":"ref24","first-page":"1889","article-title":"Trust region policy optimization","volume":"37","author":"Schulman","year":"2015","journal-title":"Comput. Sci."},{"key":"ref25","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"},{"key":"ref26","article-title":"Simplified action decoder for deep multi-agent reinforcement learning","author":"Hu","year":"2019","journal-title":"arXiv:1912.02288"},{"key":"ref27","article-title":"AI-QMIX: Attention and imagination for dynamic multi-agent reinforcement learning","author":"Shariq","year":"2020","journal-title":"arXiv:2006.04222"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref29","article-title":"Dota 2 with large scale deep reinforcement learning","author":"Christopher","year":"2019","journal-title":"arXiv:912.06680"},{"key":"ref30","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","author":"Lowe","year":"2017","journal-title":"arXiv:1706.02275"},{"key":"ref31","article-title":"The surprising effectiveness of MAPPO in cooperative","author":"Yu","year":"2021","journal-title":"arXiv:2111.01100"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/GLOBECOM42002.2020.9322567"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.3390\/s22010270"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/SURV.2009.090404"},{"issue":"3","key":"ref35","first-page":"1","article-title":"MARL-based design of multi-unmanned aerial vehicle assisted communication system with hybrid gaming mode","volume":"43","author":"Wu","year":"2021","journal-title":"J. Electron. Inf. Technol."},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2021.3104139"},{"issue":"2","key":"ref38","doi-asserted-by":"crossref","first-page":"286","DOI":"10.2307\/1969529","article-title":"Non-cooperative games","volume":"54","author":"John","year":"1951","journal-title":"Ann. Math."},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.2307\/1911749"},{"key":"ref40","article-title":"Monotonic improvement guarantees under non-stationarity for decentralized PPO","author":"Sun","year":"2022","journal-title":"arXiv:2202.00082"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/WINCOM.2017.8238199"},{"key":"ref42","article-title":"Adam: A method for stochastic optimization","author":"Kingma","year":"2014","journal-title":"arXiv:1412.6980"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/glocom.2014.7037248"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2018.2816811"}],"container-title":["IEEE Transactions on Intelligent Transportation Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6979\/10339106\/10197291.pdf?arnumber=10197291","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T20:27:35Z","timestamp":1710361655000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10197291\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12]]},"references-count":44,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tits.2023.3296769","relation":{},"ISSN":["1524-9050","1558-0016"],"issn-type":[{"value":"1524-9050","type":"print"},{"value":"1558-0016","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,12]]}}}