{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T07:43:56Z","timestamp":1769586236135,"version":"3.49.0"},"reference-count":41,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Pre-Research Project on Civil Aerospace Technologies of China National Space Administration","award":["D010301"],"award-info":[{"award-number":["D010301"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Internet Things J."],"published-print":{"date-parts":[[2026,2,1]]},"DOI":"10.1109\/jiot.2025.3636204","type":"journal-article","created":{"date-parts":[[2025,11,24]],"date-time":"2025-11-24T19:01:29Z","timestamp":1764010889000},"page":"4668-4682","source":"Crossref","is-referenced-by-count":0,"title":["A Two-Layered Reinforcement Learning Framework for AoI-Aware Trajectory Planning and Scheduling Optimization in Multi-UAV Networks"],"prefix":"10.1109","volume":"13","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-3448-2981","authenticated-orcid":false,"given":"Kang","family":"Fu","sequence":"first","affiliation":[{"name":"School of Computer Science and Technology, Beijing Institute of Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6955-4170","authenticated-orcid":false,"given":"Qingjie","family":"Zhao","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Beijing Institute of Technology, Beijing, China"}]},{"given":"Lei","family":"Wang","sequence":"additional","affiliation":[{"name":"Beijing Institute of Control Engineering, Beijing, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2024.3521496"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2023.3285942"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2025.3578613"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2025.3553285"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2024.3458194"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2024.3522249"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2022.3189214"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2023.3345280"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2024.3373411"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2018.2871079"},{"key":"ref11","first-page":"1889","article-title":"Trust region policy optimization","author":"Schulman","year":"2015","journal-title":"Proc. Int. Conf. Mach. Learn."},{"key":"ref12","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref14","article-title":"Continuous control with deep reinforcement learning","author":"Lillicrap","year":"2015","journal-title":"arXiv:1509.02971"},{"key":"ref15","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja"},{"key":"ref16","first-page":"6379","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Lowe"},{"key":"ref17","first-page":"1","article-title":"Monotonic value function factorisation for deep multi-agent reinforcement learning","author":"Rashid","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref18","article-title":"Value-decomposition networks for cooperative multi-agent learning","author":"Sunehag","year":"2017","journal-title":"arXiv:1706.05296"},{"key":"ref19","first-page":"2961","article-title":"Actor-attention-critic for multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Iqbal"},{"key":"ref20","first-page":"24611","article-title":"The surprising effectiveness of PPO in cooperative, multi-agent games","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Yu"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2020.3004162"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2024.3402740"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2025.3543042"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2022.3232366"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2024.3461333"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN60899.2024.10650175"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2025.3540508"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2024.3474926"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/tgcn.2025.3542611"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2023.3345395"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TCOMM.2025.3525566"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2019.2955732"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2023.3289172"},{"key":"ref34","article-title":"High-dimensional continuous control using generalized advantage estimation","author":"Schulman","year":"2015","journal-title":"arXiv:1506.02438"},{"key":"ref35","first-page":"1","article-title":"Order matters: Agent-by-agent policy optimization","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Wang"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1016\/j.cor.2021.105692"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1126\/science.153.3731.34"},{"key":"ref38","first-page":"31226","article-title":"Learning generalizable models for vehicle routing problems via knowledge distillation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Bi"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/3299904.3340307"},{"key":"ref40","article-title":"Is independent learning all you need in the StarCraft multi-agent challenge?","author":"Schroeder de Witt","year":"2020","journal-title":"arXiv:2011.09533"},{"key":"ref41","first-page":"16509","article-title":"Multi-agent reinforcement learning is a sequence modeling problem","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Wen"}],"container-title":["IEEE Internet of Things Journal"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6488907\/11363065\/11265756.pdf?arnumber=11265756","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T06:01:51Z","timestamp":1769493711000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11265756\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,1]]},"references-count":41,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/jiot.2025.3636204","relation":{},"ISSN":["2327-4662","2372-2541"],"issn-type":[{"value":"2327-4662","type":"electronic"},{"value":"2372-2541","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2,1]]}}}