{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T16:04:24Z","timestamp":1772121864134,"version":"3.50.1"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"22","license":[{"start":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T00:00:00Z","timestamp":1725840000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T00:00:00Z","timestamp":1725840000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100018537","name":"National Science and Technology Major Project","doi-asserted-by":"publisher","award":["2022ZD0116401"],"award-info":[{"award-number":["2022ZD0116401"]}],"id":[{"id":"10.13039\/501100018537","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62141605"],"award-info":[{"award-number":["62141605"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2024,11]]},"DOI":"10.1007\/s10489-024-05811-5","type":"journal-article","created":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T04:02:01Z","timestamp":1725854521000},"page":"12020-12034","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Uncertainty modified policy for multi-agent reinforcement learning"],"prefix":"10.1007","volume":"54","author":[{"given":"Xinyu","family":"Zhao","sequence":"first","affiliation":[]},{"given":"Jianxiang","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1878-3547","authenticated-orcid":false,"given":"Faguo","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Xiao","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Guojian","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,9,9]]},"reference":[{"key":"5811_CR1","doi-asserted-by":"publisher","unstructured":"Li SE (2023) Deep Reinforcement Learning, pp. 365\u2013402. Springer, Singapore. https:\/\/doi.org\/10.1007\/978-981-19-7784-8_10","DOI":"10.1007\/978-981-19-7784-8_10"},{"key":"5811_CR2","unstructured":"Samvelyan M, Rashid T, Witt C, Farquhar G, Nardelli N, Rudner TG, Hung C-M, Torr PH, Foerster J, Whiteson S (2019) The starcraft multi-agent challenge. In: Proceedings of the 18th international conference on autonomous agents and multiagent systems, pp 2186\u20132188"},{"key":"5811_CR3","doi-asserted-by":"crossref","unstructured":"Jiang J, Lu Z (2023) Offline decentralized multi-agent reinforcement learning. In: ECAI, pp 1148\u20131155","DOI":"10.3233\/FAIA230390"},{"issue":"1","key":"5811_CR4","doi-asserted-by":"publisher","first-page":"411","DOI":"10.1146\/annurev-control-042920-020211","volume":"5","author":"L Brunke","year":"2022","unstructured":"Brunke L, Greeff M, Hall AW, Yuan Z, Zhou S, Panerati J, Schoellig AP (2022) Safe learning in robotics: from learning-based control to safe reinforcement learning. Annual Review of Control, Robotics, and Autonomous Systems. 5(1):411\u2013444","journal-title":"Annual Review of Control, Robotics, and Autonomous Systems."},{"key":"5811_CR5","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1016\/j.inffus.2021.05.008","volume":"76","author":"M Abdar","year":"2021","unstructured":"Abdar M, Pourpanah F, Hussain S, Rezazadegan D, Liu L, Ghavamzadeh M, Fieguth P, Cao X, Khosravi A, Acharya UR et al (2021) A review of uncertainty quantification in deep learning: techniques, applications and challenges. Information Fusion. 76:243\u2013297","journal-title":"Information Fusion."},{"key":"5811_CR6","first-page":"14210","volume":"35","author":"C Wu","year":"2022","unstructured":"Wu C, Li T, Zhang Z, Yu Y (2022) Bayesian optimistic optimization: optimistic exploration for model-based reinforcement learning. Adv Neural Inf Process Syst 35:14210\u201314223","journal-title":"Adv Neural Inf Process Syst"},{"key":"5811_CR7","doi-asserted-by":"publisher","first-page":"7664","DOI":"10.1609\/aaai.v36i7.20733","volume":"36","author":"X Ma","year":"2022","unstructured":"Ma X, Isele D, Gupta JK, Fujimura K, Kochenderfer MJ (2022) Recursive reasoning graph for multi-agent reinforcement learning. Proceedings of the AAAI Conference on Artificial Intelligence 36:7664\u20137671","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"5811_CR8","unstructured":"Lowe R, Wu YI, Tamar A, Harb J, Pieter\u00a0Abbeel O, Mordatch I (2017) Multi-agent actor-critic for mixed cooperative-competitive environments. Advances in neural information processing systems 30"},{"key":"5811_CR9","doi-asserted-by":"crossref","unstructured":"Wang N, Das GP, Millard AG (2022) Learning cooperative behaviours in adversarial multi-agent systems. In: Annual conference towards autonomous robotic systems, pp 179\u2013189. Springer","DOI":"10.1007\/978-3-031-15908-4_15"},{"issue":"11","key":"5811_CR10","doi-asserted-by":"publisher","first-page":"4948","DOI":"10.3390\/app11114948","volume":"11","author":"L Canese","year":"2021","unstructured":"Canese L, Cardarilli GC, Di Nunzio L, Fazzolari R, Giardino D, Re M, Span\u00f2 S (2021) Multi-agent reinforcement learning: a review of challenges and applications. Appl Sci 11(11):4948","journal-title":"Appl Sci"},{"key":"5811_CR11","doi-asserted-by":"crossref","unstructured":"Chen L, Lei C (2021) Deep reinforcement learning. Deep Learning and Practice with MindSpore, 217\u2013243","DOI":"10.1007\/978-981-16-2233-5_10"},{"key":"5811_CR12","doi-asserted-by":"crossref","unstructured":"Thakur A, Konde A (2021) Fundamentals of neural networks. International Journal for Research in Applied Science and Engineering Technology. 9(VIII), 407\u2013426","DOI":"10.22214\/ijraset.2021.37362"},{"key":"5811_CR13","doi-asserted-by":"crossref","unstructured":"Xiao Z, Tong H, Qu R, Xing H, Luo S, Zhu Z, Song F, Feng L (2023) Capmatch: semi-supervised contrastive transformer capsule with feature-based knowledge distillation for human activity recognition. IEEE Transactions on Neural Networks and Learning Systems","DOI":"10.1109\/TNNLS.2023.3344294"},{"key":"5811_CR14","doi-asserted-by":"crossref","unstructured":"Foerster J, Farquhar G, Afouras T, Nardelli N, Whiteson S (2018) Counterfactual multi-agent policy gradients. In: Proceedings of the AAAI conference on artificial intelligence, vol 32","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"5811_CR15","unstructured":"Sunehag P, Lever G, Gruslys A, Czarnecki WM, Zambaldi V, Jaderberg M, Lanctot M, Sonnerat N, Leibo JZ, Tuyls K et al (2018) Value-decomposition networks for cooperative multi-agent learning based on team reward. In: Proceedings of the 17th international conference on autonomous agents and multiagent systems, pp 2085\u20132087"},{"issue":"1","key":"5811_CR16","first-page":"7234","volume":"21","author":"T Rashid","year":"2020","unstructured":"Rashid T, Samvelyan M, De Witt CS, Farquhar G, Foerster J, Whiteson S (2020) Monotonic value function factorisation for deep multi-agent reinforcement learning. The Journal of Machine Learning Research. 21(1):7234\u20137284","journal-title":"The Journal of Machine Learning Research."},{"issue":"9","key":"5811_CR17","first-page":"1941","volume":"34","author":"J Luo","year":"2022","unstructured":"Luo J, Zhang W, Yuan W, Hu Z, Chen S, Chen J (2022) Research on opponent modeling framework for multi-agent game confrontation. Journal of System Simulation. 34(9):1941\u20131955","journal-title":"Journal of System Simulation."},{"key":"5811_CR18","unstructured":"Dai Z, Chen Y, Low BKH, Jaillet P, Ho T-H (2020) R2-b2: Recursive reasoning-based bayesian optimization for no-regret learning in games. In: International conference on machine learning, pp 2291\u20132301. PMLR"},{"issue":"3","key":"5811_CR19","doi-asserted-by":"publisher","first-page":"1622","DOI":"10.1287\/mnsc.2020.3595","volume":"67","author":"T-H Ho","year":"2021","unstructured":"Ho T-H, Park S-E, Su X (2021) A bayesian level-k model in n-person games. Manage Sci 67(3):1622\u20131638","journal-title":"Manage Sci"},{"issue":"3","key":"5811_CR20","doi-asserted-by":"publisher","first-page":"457","DOI":"10.1007\/s10994-021-05946-3","volume":"110","author":"E H\u00fcllermeier","year":"2021","unstructured":"H\u00fcllermeier E, Waegeman W (2021) Aleatoric and epistemic uncertainty in machine learning: an introduction to concepts and methods. Mach Learn 110(3):457\u2013506","journal-title":"Mach Learn"},{"key":"5811_CR21","unstructured":"Fujimoto S, Meger D, Precup D (2019) Off-policy deep reinforcement learning without exploration. In: International conference on machine learning, pp 2052\u20132062. PMLR"},{"key":"5811_CR22","unstructured":"Wu Y, Zhai S, Srivastava N, Susskind JM, Zhang J, Salakhutdinov R, Goh H (2021) Uncertainty weighted actor-critic for offline reinforcement learning. In: International conference on machine learning, pp 11319\u201311328. PMLR"},{"key":"5811_CR23","unstructured":"Yu R, Liu S, Wang X (2023) Dataset distillation: a comprehensive review. IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"5811_CR24","doi-asserted-by":"publisher","first-page":"3507","DOI":"10.1109\/TIP.2023.3286254","volume":"32","author":"Z Tu","year":"2023","unstructured":"Tu Z, Liu Y, Zhang Y, Mu Q, Yuan J (2023) Dtcm: joint optimization of dark enhancement and action recognition in videos. IEEE Trans Image Process 32:3507\u20133520","journal-title":"IEEE Trans Image Process"},{"key":"5811_CR25","first-page":"226","volume":"35","author":"W-C Tseng","year":"2022","unstructured":"Tseng W-C, Wang T-HJ, Lin Y-C, Isola P (2022) Offline multi-agent reinforcement learning with knowledge distillation. Adv Neural Inf Process Syst 35:226\u2013237","journal-title":"Adv Neural Inf Process Syst"},{"key":"5811_CR26","unstructured":"O\u2019Donoghue B, Osband I, Munos R, Mnih V (2018) The uncertainty bellman equation and exploration. In: International conference on machine learning, pp 3836\u20133845"},{"key":"5811_CR27","doi-asserted-by":"crossref","unstructured":"Wu J, Huang Z, Lv C (2022) Uncertainty-aware model-based reinforcement learning: methodology and application in autonomous driving. IEEE Transactions on Intelligent Vehicles. 8(1):194\u2013203","DOI":"10.1109\/TIV.2022.3185159"},{"key":"5811_CR28","doi-asserted-by":"crossref","unstructured":"L\u00fctjens B, Everett M, How JP (2019) Safe reinforcement learning with model uncertainty estimates. In: 2019 International Conference on Robotics and Automation (ICRA), pp 8662\u20138668. IEEE","DOI":"10.1109\/ICRA.2019.8793611"},{"key":"5811_CR29","unstructured":"Sun W-F, Lee, C-K, Lee C-Y (2021) Dfac framework: factorizing the value function via quantile mixture for multi-agent distributional q-learning. In: International conference on machine learning, pp 9945\u20139954. PMLR"},{"key":"5811_CR30","doi-asserted-by":"crossref","unstructured":"Zhao J, Yang M, Zhao Y, Hu X, Zhou W, Li H (2023) Mcmarl: parameterizing value function via mixture of categorical distributions for multi-agent reinforcement learning. IEEE Transactions on Games","DOI":"10.1109\/TG.2023.3310150"},{"key":"5811_CR31","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2023.119217","volume":"644","author":"Q Liu","year":"2023","unstructured":"Liu Q, Li Y, Chen S, Lin K, Shi X, Lou Y (2023) Distributional reinforcement learning with epistemic and aleatoric uncertainty estimation. Inf Sci 644:119217","journal-title":"Inf Sci"},{"key":"5811_CR32","doi-asserted-by":"crossref","unstructured":"Shen M, How JP (2021) Robust opponent modeling via adversarial ensemble reinforcement learning. Proceedings of the International Conference on Automated Planning and Scheduling 31:578\u2013587","DOI":"10.1609\/icaps.v31i1.16006"},{"key":"5811_CR33","unstructured":"He S, Han S, Su S, Han S, Zou S, Miao F (2023) Robust multi-agent reinforcement learning with state uncertainty. Transactions on Machine Learning Research"},{"key":"5811_CR34","doi-asserted-by":"crossref","unstructured":"Fan T, Long P, Liu W, Pan J (2020) Distributed multi-robot collision avoidance via deep reinforcement learning for navigation in complex scenarios. The International Journal of Robotics Research. 39(7):856\u2013892","DOI":"10.1177\/0278364920916531"},{"key":"5811_CR35","unstructured":"Wen Y, Yang Y, Luo R, Wang J, Pan W (2019) Probabilistic recursive reasoning for multi-agent reinforcement learning. In: 7th International Conference on Learning Representations, ICLR 2019"},{"key":"5811_CR36","doi-asserted-by":"crossref","unstructured":"Wen Y, Yang Y, Wang J (2021) Modelling bounded rationality in multi-agent interactions by generalized recursive reasoning. In: Proceedings of the twenty-ninth international conference on international joint conferences on artificial intelligence, pp 414\u2013421","DOI":"10.24963\/ijcai.2020\/58"},{"key":"5811_CR37","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1016\/j.neucom.2022.10.045","volume":"517","author":"Y Tian","year":"2023","unstructured":"Tian Y, Kladny K-R, Wang Q, Huang Z, Fink O (2023) Multi-agent actor-critic with time dynamical opponent model. Neurocomputing 517:165\u2013172","journal-title":"Neurocomputing"},{"key":"5811_CR38","first-page":"24611","volume":"35","author":"C Yu","year":"2022","unstructured":"Yu C, Velu A, Vinitsky E, Gao J, Wang Y, Bayen A, Wu Y (2022) The surprising effectiveness of ppo in cooperative multi-agent games. Adv Neural Inf Process Syst 35:24611\u201324624","journal-title":"Adv Neural Inf Process Syst"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-024-05811-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-024-05811-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-024-05811-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,18]],"date-time":"2024-09-18T15:32:33Z","timestamp":1726673553000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-024-05811-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,9]]},"references-count":38,"journal-issue":{"issue":"22","published-print":{"date-parts":[[2024,11]]}},"alternative-id":["5811"],"URL":"https:\/\/doi.org\/10.1007\/s10489-024-05811-5","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,9,9]]},"assertion":[{"value":"22 August 2024","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 September 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no relevant financial or non-financial interests to disclose.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing Interests"}},{"value":"The authors confirm their awareness that the data utilized in this article does not raise any ethical concerns.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical and Informed Consent"}}]}}