{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T19:00:41Z","timestamp":1773774041489,"version":"3.50.1"},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T00:00:00Z","timestamp":1773360000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T00:00:00Z","timestamp":1773360000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Sci. China Inf. Sci."],"published-print":{"date-parts":[[2026,4]]},"DOI":"10.1007\/s11432-025-4808-x","type":"journal-article","created":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T14:48:38Z","timestamp":1773758918000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Robust model-based MARL via masked cross-agent completion under observation loss"],"prefix":"10.1007","volume":"69","author":[{"given":"Zifeng","family":"Shi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Meiqin","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jian","family":"Sun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ronghao","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shanling","family":"Dong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,3,13]]},"reference":[{"key":"4808_CR1","doi-asserted-by":"publisher","first-page":"180204","DOI":"10.1007\/s11432-023-4088-2","volume":"67","author":"Z Q Zheng","year":"2024","unstructured":"Zheng Z Q, Wei C, Duan H B. UAV swarm air combat maneuver decision-making method based on multi-agent reinforcement learning and transferring. Sci China Inf Sci, 2024, 67: 180204","journal-title":"Sci China Inf Sci"},{"key":"4808_CR2","first-page":"2017","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","author":"L Matignon","year":"2022","unstructured":"Matignon L, Jeanpierre L, Mouaddib A I. Coordinated multi-robot exploration under communication constraints using decentralized Markov decision processes. In: Proceedings of the AAAI Conference on Artificial Intelligence, 2022. 2017\u20132023"},{"key":"4808_CR3","doi-asserted-by":"publisher","first-page":"186","DOI":"10.1109\/TCYB.2015.2509646","volume":"47","author":"S M Hung","year":"2017","unstructured":"Hung S M, Givigi S N. A Q-learning approach to flocking with UAVs in a stochastic environment. IEEE Trans Cybern, 2017, 47: 186\u2013197","journal-title":"IEEE Trans Cybern"},{"key":"4808_CR4","doi-asserted-by":"publisher","first-page":"129005","DOI":"10.1016\/j.neucom.2024.129005","volume":"617","author":"M T Ramezanlou","year":"2025","unstructured":"Ramezanlou M T, Schwartz H, Lambadaris I, et al. Enhancing cooperative multi-agent reinforcement learning through the integration of R-STDP and federated learning. Neurocomputing, 2025, 617: 129005","journal-title":"Neurocomputing"},{"key":"4808_CR5","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.robot.2019.01.003","volume":"114","author":"C You","year":"2019","unstructured":"You C, Lu J, Filev D, et al. Advanced planning for autonomous vehicles using reinforcement learning and deep inverse reinforcement learning. Robot Autonomous Syst, 2019, 114: 1\u201318","journal-title":"Robot Autonomous Syst"},{"key":"4808_CR6","doi-asserted-by":"publisher","first-page":"128482","DOI":"10.1016\/j.neucom.2024.128482","volume":"609","author":"H Gao","year":"2024","unstructured":"Gao H, Zhao M, Zheng X, et al. An improved hierarchical deep reinforcement learning algorithm for multi-intelligent vehicle lane change. Neurocomputing, 2024, 609: 128482","journal-title":"Neurocomputing"},{"key":"4808_CR7","first-page":"1","volume":"21","author":"T Rashid","year":"2020","unstructured":"Rashid T, Samvelyan M, De Witt C S, et al. Monotonic value function factorisation for deep multi-agent reinforcement learning. J Mach Learn Res, 2020, 21: 1\u201351","journal-title":"J Mach Learn Res"},{"key":"4808_CR8","volume-title":"Proceedings of International Conference on Learning Representations","author":"B Baker","year":"2020","unstructured":"Baker B, Kanitscheider I, Markov T, et al. Emergent tool use from multi-agent autocurricula. In: Proceedings of International Conference on Learning Representations, 2020"},{"key":"4808_CR9","first-page":"6672","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","author":"D Ye","year":"2020","unstructured":"Ye D, Liu Z, Sun M, et al. Mastering complex control in MOBA games with deep reinforcement learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, 2020. 6672\u20136679"},{"key":"4808_CR10","doi-asserted-by":"publisher","first-page":"895","DOI":"10.1007\/s10462-021-09996-w","volume":"55","author":"S Gronauer","year":"2022","unstructured":"Gronauer S, Diepold K. Multi-agent deep reinforcement learning: a survey. Artif Intell Rev, 2022, 55: 895\u2013943","journal-title":"Artif Intell Rev"},{"key":"4808_CR11","doi-asserted-by":"publisher","first-page":"128068","DOI":"10.1016\/j.neucom.2024.128068","volume":"599","author":"K Hu","year":"2024","unstructured":"Hu K, Li M, Song Z, et al. A review of research on reinforcement learning algorithms for multi-agents. Neurocomputing, 2024, 599: 128068","journal-title":"Neurocomputing"},{"key":"4808_CR12","doi-asserted-by":"publisher","first-page":"121101","DOI":"10.1007\/s11432-022-3696-5","volume":"67","author":"F M Luo","year":"2024","unstructured":"Luo F M, Xu T, Lai H, et al. A survey on model-based reinforcement learning. Sci China Inf Sci, 2024, 67: 121101","journal-title":"Sci China Inf Sci"},{"key":"4808_CR13","volume-title":"Proceedings of International Conference on Learning Representations","author":"D Hafner","year":"2020","unstructured":"Hafner D, Lillicrap T P, Ba J, et al. Dream to control: learning behaviors by latent imagination. In: Proceedings of International Conference on Learning Representations, 2020"},{"key":"4808_CR14","first-page":"12519","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"M Janner","year":"2019","unstructured":"Janner M, Fu J, Zhang M, et al. When to trust your model: model-based policy optimization. In: Proceedings of Advances in Neural Information Processing Systems, 2019. 12519\u201312530"},{"key":"4808_CR15","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1561\/2200000086","volume":"16","author":"T M Moerland","year":"2023","unstructured":"Moerland T M, Broekens J, Plaat A, et al. Model-based reinforcement learning: a survey. Found Trends Mach Learn, 2023, 16: 1\u2013118","journal-title":"Found Trends Mach Learn"},{"key":"4808_CR16","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1016\/j.neucom.2023.01.076","volume":"530","author":"P Malekzadeh","year":"2023","unstructured":"Malekzadeh P, Hou M, Plataniotis K N. Uncertainty-aware transfer across tasks using hybrid model-based successor feature reinforcement learning. Neurocomputing, 2023, 530: 165\u2013187","journal-title":"Neurocomputing"},{"key":"4808_CR17","first-page":"776","volume-title":"Proceedings of Conference on Robot Learning","author":"O Krupnik","year":"2020","unstructured":"Krupnik O, Mordatch I, Tamar A. Multi-agent reinforcement learning with multi-step generative models. In: Proceedings of Conference on Robot Learning, 2020. 776\u2013790"},{"key":"4808_CR18","first-page":"381","volume-title":"Proceedings of the International Conference on Autonomous Agents and Multiagent Systems","author":"V Egorov","year":"2022","unstructured":"Egorov V, Shpilman A. Scalable multi-agent model-based reinforcement learning. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems, 2022. 381\u2013390"},{"key":"4808_CR19","first-page":"10435","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","author":"Z Wu","year":"2023","unstructured":"Wu Z, Yu C, Chen C, et al. Models as agents: optimizing multi-step predictions of interactive local models in model-based multi-agent reinforcement learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, 2023. 10435\u201310443"},{"key":"4808_CR20","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-28929-8","volume-title":"A Concise Introduction to Decentralized POMDPs","author":"F A Oliehoek","year":"2016","unstructured":"Oliehoek F A, Amato C. A Concise Introduction to Decentralized POMDPs. Berlin: Springer, 2016"},{"key":"4808_CR21","doi-asserted-by":"publisher","first-page":"043126","DOI":"10.1063\/1.4768663","volume":"22","author":"Y Sun","year":"2012","unstructured":"Sun Y, Li W, Zhao D. Convergence time and speed of multi-agent systems in noisy environments. Chaos, 2012, 22: 043126","journal-title":"Chaos"},{"key":"4808_CR22","unstructured":"Chen B, Xu M, Liu Z, et al. Delay-aware multi-agent reinforcement learning for cooperative and competitive environments. 2020. ArXiv:2005.05441"},{"key":"4808_CR23","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1007\/s13042-024-02170-y","volume":"16","author":"J Gao","year":"2024","unstructured":"Gao J, Wang S, Wang X, et al. Reinforcement learning for multi-agent with asynchronous missing information fusion method. Int J Mach Learn Cybernet, 2024, 16: 75\u201391","journal-title":"Int J Mach Learn Cybernet"},{"key":"4808_CR24","unstructured":"Shi Z, Liu M, Zhang S, et al. RMIO: a model-based marl framework for scenarios with observation loss in some agents. 2024. ArXiv:2411.19639"},{"key":"4808_CR25","unstructured":"Shi Z, Liu M, Zhang S, et al. GAWM: global-aware world model for multi-agent reinforcement learning. 2025. ArXiv:2501.10116"},{"key":"4808_CR26","first-page":"2186","volume-title":"Proceedings of the International Conference on Autonomous Agents and MultiAgent Systems","author":"M Samvelyan","year":"2019","unstructured":"Samvelyan M, Rashid T, Schroeder de Witt C, et al. The StarCraft multi-agent challenge. In: Proceedings of the International Conference on Autonomous Agents and MultiAgent Systems, 2019. 2186\u20132188"},{"key":"4808_CR27","first-page":"12208","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"B Peng","year":"2021","unstructured":"Peng B, Rashid T, Schroeder de Witt C, et al. FACMAC: factored multi-agent centralised policy gradients. In: Proceedings of Advances in Neural Information Processing Systems, 2021. 12208\u201312221"},{"key":"4808_CR28","unstructured":"Feinberg V, Wan A, Stoica I, et al. Model-based value estimation for efficient model-free reinforcement learning. 2018. ArXiv:1803.00101"},{"key":"4808_CR29","first-page":"463","volume-title":"Proceedings of International Conference on Machine Learning","author":"A Ayoub","year":"2020","unstructured":"Ayoub A, Jia Z, Szepesvari C, et al. Model-based reinforcement learning with value-targeted regression. In: Proceedings of International Conference on Machine Learning, 2020. 463\u2013474"},{"key":"4808_CR30","volume-title":"Proceedings of International Conference on Learning Representations","author":"D Hafner","year":"2021","unstructured":"Hafner D, Lillicrap T, Norouzi M, et al. Mastering Atari with discrete world models. In: Proceedings of International Conference on Learning Representations, 2021"},{"key":"4808_CR31","doi-asserted-by":"publisher","first-page":"647","DOI":"10.1038\/s41586-025-08744-2","volume":"640","author":"D Hafner","year":"2025","unstructured":"Hafner D, Pasukonis J, Ba J, et al. Mastering diverse control tasks through world models. Nature, 2025, 640: 647\u2013653","journal-title":"Nature"},{"key":"4808_CR32","volume-title":"Proceedings of International Conference on Learning Representations","author":"V Micheli","year":"2023","unstructured":"Micheli V, Alonso E, Fleuret F. Transformers are sample-efficient world models. In: Proceedings of International Conference on Learning Representations, 2023"},{"key":"4808_CR33","first-page":"27147","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"W Zhang","year":"2024","unstructured":"Zhang W, Wang G, Sun J, et al. Storm: efficient stochastic transformer based world models for reinforcement learning. In: Proceedings of Advances in Neural Information Processing Systems, 2024. 27147\u201327166"},{"key":"4808_CR34","volume-title":"Proceedings of International Conference on Learning Representations","author":"J Robine","year":"2023","unstructured":"Robine J, Hoftmann M, Uelwer T, et al. Transformer-based world models are happy with 100k interactions. In: Proceedings of International Conference on Learning Representations, 2023"},{"key":"4808_CR35","first-page":"6000","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"A Vaswani","year":"2017","unstructured":"Vaswani A, Shazeer N, Parmar N, et al. Attention is all you need. In: Proceedings of Advances in Neural Information Processing Systems, 2017. 6000\u20136010"},{"key":"4808_CR36","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1613\/jair.3912","volume":"47","author":"M G Bellemare","year":"2013","unstructured":"Bellemare M G, Naddaf Y, Veness J, et al. The arcade learning environment: an evaluation platform for general agents. J Artif Intell Res, 2013, 47: 253\u2013279","journal-title":"J Artif Intell Res"},{"key":"4808_CR37","first-page":"1865","volume-title":"Proceedings of the 23rd International Conference on Autonomous Agents and Multiagent Systems","author":"A Venugopal","year":"2024","unstructured":"Venugopal A, Milani S, Fang F, et al. MABL: bi-level latent-variable world model for sample-efficient multi-agent reinforcement learning. In: Proceedings of the 23rd International Conference on Autonomous Agents and Multiagent Systems, 2024. 1865\u20131873"},{"key":"4808_CR38","doi-asserted-by":"publisher","first-page":"142102","DOI":"10.1007\/s11432-023-3853-y","volume":"67","author":"L Yuan","year":"2024","unstructured":"Yuan L, Jiang T, Li L H, et al. Robust cooperative multi-agent reinforcement learning via multi-view message certification. Sci China Inf Sci, 2024, 67: 142102","journal-title":"Sci China Inf Sci"},{"key":"4808_CR39","doi-asserted-by":"publisher","first-page":"152104","DOI":"10.1007\/s11432-021-3688-y","volume":"67","author":"Y Qiaoben","year":"2024","unstructured":"Qiaoben Y, Ying C Y, Zhou X N, et al. Understanding adversarial attacks on observations in deep reinforcement learning. Sci China Inf Sci, 2024, 67: 152104","journal-title":"Sci China Inf Sci"},{"key":"4808_CR40","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1016\/j.neucom.2021.04.015","volume":"450","author":"B Chen","year":"2021","unstructured":"Chen B, Xu M, Li L, et al. Delay-aware model-based reinforcement learning for continuous control. Neurocomputing, 2021, 450: 119\u2013128","journal-title":"Neurocomputing"},{"key":"4808_CR41","unstructured":"Fu S, Chen S, Zhao S, et al. Rainbow delay compensation: a multi-agent reinforcement learning framework for mitigating delayed observation. 2025. ArXiv:2505.03586"},{"key":"4808_CR42","first-page":"6202","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","author":"J Wang","year":"2020","unstructured":"Wang J, Liu Y, Li B. Reinforcement learning with perturbed rewards. In: Proceedings of the AAAI Conference on Artificial Intelligence, 2020. 6202\u20136209"},{"key":"4808_CR43","doi-asserted-by":"publisher","first-page":"176","DOI":"10.1016\/j.patrec.2021.06.022","volume":"150","author":"M Agarwal","year":"2021","unstructured":"Agarwal M, Aggarwal V. Blind decision making: reinforcement learning with delayed observations. Pattern Recognition Lett, 2021, 150: 176\u2013182","journal-title":"Pattern Recognition Lett"},{"key":"4808_CR44","first-page":"46390","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"M Chen","year":"2023","unstructured":"Chen M, Bai Y, Poor H V, et al. Efficient RL with impaired observability: learning to act with delayed and missing state observations. In: Proceedings of Advances in Neural Information Processing Systems, 2023. 46390\u201346418"},{"key":"4808_CR45","unstructured":"Karamzade A, Kim K, Kalsi M, et al. Reinforcement learning from delayed observations via world models. 2024. ArXiv:2403.12309"},{"key":"4808_CR46","first-page":"784","volume-title":"Proceedings of International Conference on Intelligent Transportation Systems","author":"J Liu","year":"2024","unstructured":"Liu J, Wang Z, Hang P, et al. Delay-aware multi-agent reinforcement learning for cooperative adaptive cruise control with model-based stability enhancement. In: Proceedings of International Conference on Intelligent Transportation Systems, 2024. 784\u2013791"},{"key":"4808_CR47","first-page":"1724","volume-title":"Proceedings of the Conference on Empirical Methods in Natural Language Processing","author":"K Cho","year":"2014","unstructured":"Cho K, van Merrienboer B, Gulcehre C, et al. Learning phrase representations using RNN encoder-decoder for statistical machine translation. In: Proceedings of the Conference on Empirical Methods in Natural Language Processing, 2014. 1724\u20131734"},{"key":"4808_CR48","volume-title":"Proceedings of International Conference on Learning Representations","author":"V Lee","year":"2024","unstructured":"Lee V, Abbeel P, Lee Y. Dreamsmooth: improving model-based reinforcement learning via reward smoothing. In: Proceedings of International Conference on Learning Representations, 2024"},{"key":"4808_CR49","first-page":"24611","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"C Yu","year":"2022","unstructured":"Yu C, Velu A, Vinitsky E, et al. The surprising effectiveness of PPO in cooperative multi-agent games. In: Proceedings of Advances in Neural Information Processing Systems, 2022. 24611\u201324624"}],"container-title":["Science China Information Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-025-4808-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11432-025-4808-x","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-025-4808-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T16:10:16Z","timestamp":1773763816000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11432-025-4808-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,13]]},"references-count":49,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2026,4]]}},"alternative-id":["4808"],"URL":"https:\/\/doi.org\/10.1007\/s11432-025-4808-x","relation":{},"ISSN":["1674-733X","1869-1919"],"issn-type":[{"value":"1674-733X","type":"print"},{"value":"1869-1919","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,3,13]]},"assertion":[{"value":"8 June 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 September 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 October 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 March 2026","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"142204"}}