{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,5]],"date-time":"2026-05-05T12:05:20Z","timestamp":1777982720601,"version":"3.51.4"},"reference-count":92,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2026,2,3]],"date-time":"2026-02-03T00:00:00Z","timestamp":1770076800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,2,3]],"date-time":"2026-02-03T00:00:00Z","timestamp":1770076800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Quantum Mach. Intell."],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1007\/s42484-026-00361-0","type":"journal-article","created":{"date-parts":[[2026,2,3]],"date-time":"2026-02-03T06:50:47Z","timestamp":1770101447000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Q-CMAPO: A quantum-classical framework for balancing exploration and exploitation in multi-agent reinforcement learning"],"prefix":"10.1007","volume":"8","author":[{"given":"Mazyar","family":"Taghavi","sequence":"first","affiliation":[]},{"given":"Javad","family":"Vahidi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,2,3]]},"reference":[{"key":"361_CR1","doi-asserted-by":"crossref","unstructured":"Kober J, Bagnell JA, Peters J (2013) Reinforcement learning in robotics: A survey. Int J Robot Res 32(11):1238\u20131274","DOI":"10.1177\/0278364913495721"},{"key":"361_CR2","doi-asserted-by":"crossref","unstructured":"Sallab AE, Abdou M, Perot E, Yogamani S (2017) Deep reinforcement learning framework for autonomous driving. Electron Imaging 2017(19):70\u201376","DOI":"10.2352\/ISSN.2470-1173.2017.19.AVM-023"},{"key":"361_CR3","first-page":"39","volume":"23","author":"S Agrawal","year":"2012","unstructured":"Agrawal S, Goyal N (2012) Analysis of thompson sampling for the multi-armed bandit problem. J Mach Learn Res 23:39\u20131","journal-title":"J Mach Learn Res"},{"key":"361_CR4","first-page":"397","volume":"3","author":"P Auer","year":"2002","unstructured":"Auer P (2002) Using confidence bounds for exploitation-exploration trade-offs. J Mach Learn Res 3:397\u2013422","journal-title":"J Mach Learn Res"},{"issue":"10","key":"361_CR5","first-page":"106","volume":"62","author":"MM Azari","year":"2024","unstructured":"Azari MM et al (2024) Quantum multi-agent reinforcement learning is all you need: Coordinated global access in integrated tn\/ntn cube-satellite networks. IEEE Commun Mag 62(10):106\u2013112","journal-title":"IEEE Commun Mag"},{"key":"361_CR6","volume-title":"Distributed Consensus in Multi-vehicle Cooperative Control","author":"RW Beard","year":"2012","unstructured":"Beard RW, McLain TW (2012) Distributed Consensus in Multi-vehicle Cooperative Control. Springer, London"},{"issue":"1","key":"361_CR7","first-page":"15","volume":"4","author":"M Benedetti","year":"2022","unstructured":"Benedetti M, Fingerhuth L (2022) Hybrid quantum-classical approaches for reinforcement learning. Quantum Machine Learning 4(1):15\u201328","journal-title":"Quantum Machine Learning"},{"issue":"1","key":"361_CR8","doi-asserted-by":"publisher","first-page":"289","DOI":"10.1111\/j.2517-6161.1995.tb02031.x","volume":"57","author":"Y Benjamini","year":"1995","unstructured":"Benjamini Y, Hochberg Y (1995) Controlling the false discovery rate: A practical and powerful approach to multiple testing. J Roy Stat Soc B 57(1):289\u2013300","journal-title":"J Roy Stat Soc B"},{"issue":"4","key":"361_CR9","doi-asserted-by":"publisher","first-page":"819","DOI":"10.1287\/moor.27.4.819.297","volume":"27","author":"DS Bernstein","year":"2002","unstructured":"Bernstein DS, Givan R, Immerman N, Zilberstein S (2002) The complexity of decentralized control of markov decision processes. Math Oper Res 27(4):819\u2013840","journal-title":"Math Oper Res"},{"issue":"7671","key":"361_CR10","doi-asserted-by":"publisher","first-page":"195","DOI":"10.1038\/nature23474","volume":"549","author":"J Biamonte","year":"2017","unstructured":"Biamonte J, Wittek P, Pancotti N, Rebentrost P, Wiebe N, Lloyd S (2017) Quantum machine learning. Nature 549(7671):195\u2013202","journal-title":"Nature"},{"key":"361_CR11","unstructured":"Li S, Wu Y, Cui X, Dong H, Fang F, Russell S (2017) Collaborative deep reinforcement learning for multi-agent problems. In: Proceedings of the 16th International conference on autonomous agents and multiagent systems, pp 1623\u20131624"},{"key":"361_CR12","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2016) Continuous control with deep reinforcement learning. In: International conference on learning representations"},{"issue":"4738","key":"361_CR13","doi-asserted-by":"publisher","first-page":"555","DOI":"10.1126\/science.231.4738.555","volume":"231","author":"DM Ceperley","year":"1986","unstructured":"Ceperley DM, Alder BJ (1986) Quantum monte carlo. Science 231(4738):555\u2013560","journal-title":"Science"},{"issue":"86","key":"361_CR14","first-page":"5149","volume":"8","author":"F Chalumeau","year":"2023","unstructured":"Chalumeau F, Lim B, Boige R, Grillotti L, Flageat M, Mac\u2019e V, Pierrot T, Cully A (2023) Qdax: A library for quality-diversity and population-based algorithms with hardware acceleration. Journal of Open Source Software 8(86):5149","journal-title":"Journal of Open Source Software"},{"key":"361_CR15","doi-asserted-by":"crossref","unstructured":"Ceperley DM, Alder BJ (1986) Quantum monte carlo. Sci 231(4738):555\u2013560","DOI":"10.1126\/science.231.4738.555"},{"issue":"4","key":"361_CR16","doi-asserted-by":"publisher","first-page":"837","DOI":"10.1109\/TRO.2018.2857475","volume":"34","author":"S-J Chung","year":"2018","unstructured":"Chung S-J, Paranjape AA, Dames P, Shen S, Kumar V (2018) A survey on aerial swarm robotics. IEEE Trans Rob 34(4):837\u2013855","journal-title":"IEEE Trans Rob"},{"key":"361_CR17","unstructured":"Nielsen MA, Chuang IL (2010) Quantum Computation and Quantum Information. Cambridge University Press"},{"key":"361_CR18","unstructured":"Duan Y, Chen X, Houthooft R, Schulman J, Abbeel P (2016) Benchmarking deep reinforcement learning for continuous control. In: International conference on machine learning, pp 1329\u20131338. PMLR"},{"key":"361_CR19","doi-asserted-by":"crossref","unstructured":"Ventura D, Martinez T (2000) Quantum associative memory. Inform Sci 124(1-4):273\u2013296","DOI":"10.1016\/S0020-0255(99)00101-2"},{"key":"361_CR20","doi-asserted-by":"crossref","unstructured":"Horodecki R, Horodecki P, Horodecki M, Horodecki K (2009) Quantum entanglement. Rev Modern Phys 81(2):865","DOI":"10.1103\/RevModPhys.81.865"},{"key":"361_CR21","doi-asserted-by":"publisher","first-page":"79","DOI":"10.22331\/q-2018-08-06-79","volume":"2","author":"J Preskill","year":"2018","unstructured":"Preskill J (2018) Quantum computing in the nisq era and beyond. Quantum 2:79","journal-title":"Quantum"},{"issue":"2","key":"361_CR22","first-page":"250","volume":"21","author":"H Farghadani","year":"2023","unstructured":"Farghadani H, Mirzaei M (2023) Quantum-enhanced multi-agent reinforcement learning. Int J Quantum Inf 21(2):250\u2013265","journal-title":"Int J Quantum Inf"},{"issue":"6","key":"361_CR23","first-page":"103","volume":"55","author":"S Liu","year":"2022","unstructured":"Liu S, Zhang Z (2022) A survey on multi-agent q-learning: Algorithms and applications. J Mach Learn 55(6):103\u2013120","journal-title":"J Mach Learn"},{"key":"361_CR24","doi-asserted-by":"crossref","unstructured":"Hsu H-L, Wang W, Pajic M, Xu P (2024) Randomized exploration in cooperative multi-agent reinforcement learning. arXiv preprint arXiv:2404.10728","DOI":"10.52202\/079017-2374"},{"issue":"1","key":"361_CR25","doi-asserted-by":"publisher","first-page":"2103305119","DOI":"10.1073\/pnas.2103305119","volume":"119","author":"E Farhi","year":"2022","unstructured":"Farhi E, Harrow AW (2022) Quantum approximate optimization algorithm. Proc Natl Acad Sci 119(1):2103305119. https:\/\/doi.org\/10.1073\/pnas.2103305119","journal-title":"Proc Natl Acad Sci"},{"key":"361_CR26","unstructured":"Taylor ME, Stone P (2023) Multi-agent reinforcement learning: Foundations and modern approaches. In: MIT Press"},{"key":"361_CR27","doi-asserted-by":"crossref","unstructured":"Lee C-S, Wang M-H, Tsai Y-L, Chang W-S, Reformat M, Acampora G, Kubota N (2020) Fml-based reinforcement learning agent with fuzzy ontology for human-robot cooperative edutainment. Int J Uncertain Fuzziness Knowl-Based Syst 28(06):1023\u20131060","DOI":"10.1142\/S0218488520500440"},{"key":"361_CR28","unstructured":"Meyer N, Ufrecht C, Periyasamy M, Scherer DD, Plinge A, Mutschler C (2024) A survey on quantum reinforcement learning. arXiv preprint arXiv:2211.03464v2"},{"issue":"7","key":"361_CR29","doi-asserted-by":"publisher","first-page":"8762","DOI":"10.1109\/TNNLS.2023.3236361","volume":"35","author":"J Hao","year":"2023","unstructured":"Hao J, Yang T, Tang H, Bai C, Liu J, Meng Z, Liu P, Wang Z (2023) Exploration in deep reinforcement learning: From single-agent to multiagent domain. IEEE Transactions on Neural Networks and Learning Systems 35(7):8762\u20138782","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"361_CR30","unstructured":"Zhao L, Qian Y, Liu D (2024) Quantum-assisted exploration in multi-agent reinforcement learning. Quantum AI Letters 2(1):45\u201360"},{"issue":"6","key":"361_CR31","doi-asserted-by":"publisher","first-page":"750","DOI":"10.1007\/s10458-019-09421-1","volume":"33","author":"P Hern\u00e1ndez-Leal","year":"2019","unstructured":"Hern\u00e1ndez-Leal P, Kartal B, Taylor ME (2019) A survey and critique of multiagent deep reinforcement learning. Auton Agent Multi-Agent Syst 33(6):750\u2013797","journal-title":"Auton Agent Multi-Agent Syst"},{"issue":"2","key":"361_CR32","doi-asserted-by":"publisher","first-page":"865","DOI":"10.1103\/RevModPhys.81.865","volume":"81","author":"R Horodecki","year":"2009","unstructured":"Horodecki R, Horodecki P, Horodecki M, Horodecki K (2009) Quantum entanglement. Rev Mod Phys 81(2):865","journal-title":"Rev Mod Phys"},{"key":"361_CR33","doi-asserted-by":"crossref","unstructured":"Azari MM et al (2024) Quantum multi-agent reinforcement learning is all you need: Coordinated global access in integrated tn\/ntn cube-satellite networks. IEEE Comm Magazine 62(10):106\u2013112","DOI":"10.1109\/MCOM.010.2400001"},{"key":"361_CR34","unstructured":"Benedetti M, Fingerhuth L (2022) Hybrid quantum-classical approaches for reinforcement learning. Quantum Mach Learn 4(1):15\u201328"},{"issue":"11","key":"361_CR35","doi-asserted-by":"publisher","first-page":"1238","DOI":"10.1177\/0278364913495721","volume":"32","author":"J Kober","year":"2013","unstructured":"Kober J, Bagnell JA, Peters J (2013) Reinforcement learning in robotics: A survey. The International Journal of Robotics Research 32(11):1238\u20131274","journal-title":"The International Journal of Robotics Research"},{"key":"361_CR36","unstructured":"Li Y, Chang Y (2023) Hybrid quantum-classical optimization for exploration-exploitation in reinforcement learning. Quantum Comput Appl 9(1):40\u201355"},{"issue":"4","key":"361_CR37","first-page":"331","volume":"61","author":"J Koller","year":"2021","unstructured":"Koller J, Smith L (2021) Exploration-exploitation in multi-agent reinforcement learning: A survey. J Artif Intell Res 61(4):331\u2013348","journal-title":"J Artif Intell Res"},{"key":"361_CR38","doi-asserted-by":"crossref","unstructured":"Acampora G, Cuzzocrea A, Lapegna M, Schiattarella R, Vitiello A (2025) Generalizing reinforcement learning-based quantum circuit synthesis across multiple topologies. In: 2025 IEEE Symposium for multidisciplinary computational intelligence incubators (MCII), pp 1\u20137. IEEE","DOI":"10.1109\/MCII64973.2025.11032620"},{"key":"361_CR39","unstructured":"Osband I, Blundell C, Pritzel A, Van Roy B (2016) Deep exploration via bootstrapped dqn. In: Advances in Neural Information Processing Systems (NeurIPS), vol 29. arXiv:1602.04621"},{"issue":"06","key":"361_CR40","doi-asserted-by":"publisher","first-page":"1023","DOI":"10.1142\/S0218488520500440","volume":"28","author":"C-S Lee","year":"2020","unstructured":"Lee C-S, Wang M-H, Tsai Y-L, Chang W-S, Reformat M, Acampora G, Kubota N (2020) Fml-based reinforcement learning agent with fuzzy ontology for human-robot cooperative edutainment. Internat J Uncertain Fuzziness Knowledge-Based Systems 28(06):1023\u20131060","journal-title":"Internat J Uncertain Fuzziness Knowledge-Based Systems"},{"key":"361_CR41","unstructured":"Hsu H-L et al (2024) Maximize to explore: One objective function fusing estimation, planning, and exploration. arXiv preprint arXiv:2305.18258"},{"issue":"1","key":"361_CR42","first-page":"40","volume":"9","author":"Y Li","year":"2023","unstructured":"Li Y, Chang Y (2023) Hybrid quantum-classical optimization for exploration-exploitation in reinforcement learning. Quantum Computing and Applications 9(1):40\u201355","journal-title":"Quantum Computing and Applications"},{"key":"361_CR43","unstructured":"Farhi E, Goldwasser S (2023) Quantum optimization methods for multi-agent systems. In: Proceedings of the IEEE International conference on quantum computing, 127\u2013135"},{"key":"361_CR44","unstructured":"Wang R, Wei X (2023) Exploring the quantum approximate optimization algorithm in reinforcement learning. Quantum Comput Res 7(2):132\u2013145"},{"issue":"6","key":"361_CR45","first-page":"103","volume":"55","author":"S Liu","year":"2022","unstructured":"Liu S, Zhang Z (2022) A survey on multi-agent q-learning: Algorithms and applications. Journal of Machine Learning 55(6):103\u2013120","journal-title":"Journal of Machine Learning"},{"key":"361_CR46","volume":"298","author":"Y Liu","year":"2024","unstructured":"Liu Y, Rodriguez A, Kim S (2024) Cooperative multi-uav surveillance systems for environmental monitoring: A comprehensive review. Remote Sens Environ 298:113801","journal-title":"Remote Sens Environ"},{"key":"361_CR47","doi-asserted-by":"crossref","unstructured":"K\u00f6lle M, Schneider K, Egger S, Topp F, Phan T, Altmann P, N\u00fcsslein J, Linnhoff-Popien C (2024) Architectural influence on variational quantum circuits in multi-agent reinforcement learning: Evolutionary strategies for optimization. arXiv preprint arXiv:2407.20739","DOI":"10.1007\/978-3-031-87327-0_3"},{"key":"361_CR48","doi-asserted-by":"publisher","first-page":"1028","DOI":"10.1109\/TASE.2024.3358894","volume":"22","author":"X Mao","year":"2024","unstructured":"Mao X, Wu G, Fan M, Cao Z, Pedrycz W (2024) Dl-drl: A double-level deep reinforcement learning approach for large-scale task scheduling of multi-uav. IEEE Trans Autom Sci Eng 22:1028\u20131044","journal-title":"IEEE Trans Autom Sci Eng"},{"key":"361_CR49","doi-asserted-by":"crossref","unstructured":"Zhu Y, Zheng Y, Wei W, Fang Z (2024) Enhancing automated maneuvering decisions in ucav air combat games using homotopy-based reinforcement learning. Drones (2504-446X), 8(12)","DOI":"10.3390\/drones8120756"},{"issue":"2","key":"361_CR50","first-page":"88","volume":"13","author":"T Mikami","year":"2023","unstructured":"Mikami T, Tsuchida M (2023) Quantum-classical hybrid systems: Challenges and opportunities. Quantum Computing Review 13(2):88\u2013102","journal-title":"Quantum Computing Review"},{"issue":"1","key":"361_CR51","first-page":"1","volume":"5","author":"R Mikami","year":"2023","unstructured":"Mikami R, Matsuura S, Mitarai K, Fujii K (2023) Variational quantum multi-agent reinforcement learning. Quantum Machine Intelligence 5(1):1\u201319","journal-title":"Quantum Machine Intelligence"},{"issue":"4","key":"361_CR52","first-page":"406","volume":"129","author":"RM Murray","year":"2007","unstructured":"Murray RM, Dunbar WB (2007) Cooperative control of multi-vehicle systems using cost graphs and optimization. J Dyn Syst Meas Contr 129(4):406\u2013414","journal-title":"J Dyn Syst Meas Contr"},{"key":"361_CR53","doi-asserted-by":"crossref","unstructured":"Venturelli D, Gustafson E, Kurkcuoglu D, Zorzetti S (2025) Near-term application engineering challenges in emerging superconducting qudit processors. arXiv preprint arXiv:2506.05608","DOI":"10.1109\/DSN-W65791.2025.00061"},{"issue":"1","key":"361_CR54","doi-asserted-by":"publisher","first-page":"215","DOI":"10.1109\/JPROC.2006.887293","volume":"95","author":"R Olfati-Saber","year":"2007","unstructured":"Olfati-Saber R, Fax JA, Murray RM (2007) Consensus and cooperation in networked multi-agent systems. Proc IEEE 95(1):215\u2013233. https:\/\/doi.org\/10.1109\/JPROC.2006.887293","journal-title":"Proc IEEE"},{"key":"361_CR55","doi-asserted-by":"publisher","unstructured":"Zhou L, Wang S-T, Choi S, Pichler H, Lukin MD (2020) Quantum approximate optimization algorithm: Performance, mechanism, and implementation on near-term devices. Phys Rev X 10(2):021067. https:\/\/doi.org\/10.1103\/PhysRevX.10.021067","DOI":"10.1103\/PhysRevX.10.021067"},{"key":"361_CR56","unstructured":"Oord A, Li Y, Vinyals O (2018) Representation learning with contrastive predictive coding. In: Advances in Neural information processing systems (NeurIPS) (2018). arXiv:1807.03748"},{"key":"361_CR57","doi-asserted-by":"publisher","unstructured":"Olfati-Saber R, Fax JA, Murray RM (2007) Consensus and cooperation in networked multi-agent systems. In: Proceedings of the IEEE 95(1):215\u2013233. https:\/\/doi.org\/10.1109\/JPROC.2006.887293","DOI":"10.1109\/JPROC.2006.887293"},{"key":"361_CR58","doi-asserted-by":"publisher","unstructured":"Strogatz SH (2001) Exploring complex networks. Nature 410(6825):268\u2013276. https:\/\/doi.org\/10.1038\/35065725","DOI":"10.1038\/35065725"},{"key":"361_CR59","unstructured":"Graham RL, Rothschild BL, Spencer JH (1990) Ramsey Theory vol 121. John Wiley & Sons"},{"key":"361_CR60","doi-asserted-by":"publisher","first-page":"79","DOI":"10.22331\/q-2018-08-06-79","volume":"2","author":"J Preskill","year":"2018","unstructured":"Preskill J (2018) Quantum computing in the nisq era and beyond. Quantum 2:79","journal-title":"Quantum"},{"key":"361_CR61","unstructured":"Farghadani H, Mirzaei M (2023) Quantum-enhanced multi-agent reinforcement learning. Int J Quantum Inf 21(2):250\u2013265"},{"key":"361_CR62","unstructured":"Zhang L, Chen M, Wang X (2023) Deep reinforcement learning for wildfire monitoring and suppression using uav swarms. IEEE Transactions on aerospace and electronic systems 59(3):2847\u20132859"},{"issue":"3","key":"361_CR63","volume":"7","author":"P Rebentrost","year":"2022","unstructured":"Rebentrost P, Braun D (2022) Quantum approximate optimization algorithms: A review. Quantum Science and Technology 7(3):024005","journal-title":"Quantum Science and Technology"},{"issue":"19","key":"361_CR64","doi-asserted-by":"publisher","first-page":"70","DOI":"10.2352\/ISSN.2470-1173.2017.19.AVM-023","volume":"2017","author":"AE Sallab","year":"2017","unstructured":"Sallab AE, Abdou M, Perot E, Yogamani S (2017) Deep reinforcement learning framework for autonomous driving. Electronic Imaging 2017(19):70\u201376","journal-title":"Electronic Imaging"},{"key":"361_CR65","doi-asserted-by":"crossref","unstructured":"Chung S-J, Paranjape AA, Dames P, Shen S, Kumar V (2018) A survey on aerial swarm robotics. IEEE Trans Robot 34(4):837\u2013855","DOI":"10.1109\/TRO.2018.2857475"},{"key":"361_CR66","unstructured":"Murray RM, Dunbar WB (2007) Cooperative control of multi-vehicle systems using cost graphs and optimization. J Dynamic Syst Measure Control 129(4):406\u2013414"},{"key":"361_CR67","unstructured":"Wei Q, Thompson J, Patel N (2020) Cooperative multi-agent reinforcement learning for forest fire monitoring. In: Proceedings of the international conference on autonomous agents and multiagent systems, pp 1456\u20131464. ACM"},{"issue":"6825","key":"361_CR68","doi-asserted-by":"publisher","first-page":"268","DOI":"10.1038\/35065725","volume":"410","author":"SH Strogatz","year":"2001","unstructured":"Strogatz SH (2001) Exploring complex networks. Nature 410(6825):268\u2013276. https:\/\/doi.org\/10.1038\/35065725","journal-title":"Nature"},{"key":"361_CR69","doi-asserted-by":"crossref","unstructured":"Todorov E, Erez T, Tassa Y (2012) Mujoco: A physics engine for model-based control. In: 2012 IEEE\/RSJ International conference on intelligent robots and systems, pp 5026\u20135033. IEEE","DOI":"10.1109\/IROS.2012.6386109"},{"key":"361_CR70","unstructured":"Coumans E, Bai Y (2021) Pybullet, a python module for physics simulation for games, robotics and machine learning. http:\/\/pybullet.org"},{"key":"361_CR71","unstructured":"Chalumeau F, Lim B, Boige R, Grillotti L, Flageat M, Mac\u2019e V, Pierrot T, Cully A (2023) Qdax: A library for quality-diversity and population-based algorithms with hardware acceleration. J Open Source Softw 8(86):5149"},{"key":"361_CR72","unstructured":"Brockman G, Cheung V, Pettersson L, Schneider J, Schulman J, Tang J, Zaremba W (2016) OpenAI Gym"},{"key":"361_CR73","doi-asserted-by":"crossref","unstructured":"Samvelyan M, Rashid T, Witt C, Farquhar G, Nardelli N, Rudner TGJ, Torr PHS, Whiteson S, Foerster J (2019) The starcraft multi-agent challenge. In: Proceedings of the 18th International conference on autonomous agents and multiagent systems (AAMAS), pp 2186\u20132188. International Foundation for Autonomous Agents and Multiagent Systems. arXiv:1902.04043","DOI":"10.65109\/LVZZ5205"},{"key":"361_CR74","unstructured":"Papoudakis G, Christianos F, Sch\u00e4fer L, Albrecht SV (2021) Benchmarking multi-agent deep reinforcement learning algorithms in cooperative tasks. arXiv preprint arXiv:2006.07869"},{"issue":"3\/4","key":"361_CR75","doi-asserted-by":"publisher","first-page":"285","DOI":"10.2307\/2332286","volume":"25","author":"WR Thompson","year":"1933","unstructured":"Thompson WR (1933) On the likelihood that one unknown probability exceeds another in view of the evidence of two samples. Biometrika 25(3\/4):285\u2013294","journal-title":"Biometrika"},{"key":"361_CR76","unstructured":"Lowe R, Wu Y, Tamar A, Harb J, Abbeel P, Mordatch I (2017) Multi-agent actor-critic for mixed cooperative-competitive environments. In: Advances in neural information processing Systems (NeurIPS), pp 6379\u20136390"},{"key":"361_CR77","unstructured":"Yu C, Czarnecki WM, Omidshafiei S, Leibo JZ, Hessel M, Munos R, Dabney W, Kim H (2021) The surprising effectiveness of ppo in cooperative multi-agent games. In: arXiv Preprint arXiv:2103.01955"},{"issue":"1\u20134","key":"361_CR78","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1016\/S0020-0255(99)00101-2","volume":"124","author":"D Ventura","year":"2000","unstructured":"Ventura D, Martinez T (2000) Quantum associative memory. Inf Sci 124(1\u20134):273\u2013296","journal-title":"Inf Sci"},{"key":"361_CR79","unstructured":"Wang T, Zhang H, Dong H, Zhang C (2020) Hierarchical multi-agent reinforcement learning with skill discovery. In: Advances in neural information processing systems (NeurIPS), vol 33, pp 9008\u20139020"},{"issue":"4","key":"361_CR80","first-page":"112","volume":"21","author":"M Vince","year":"2022","unstructured":"Vince M, Kumar A (2022) Quantum annealing for multi-agent systems: A review. Quantum Inf Process 21(4):112\u2013127","journal-title":"Quantum Inf Process"},{"key":"361_CR81","unstructured":"Wang H, Lanctot M, Wang Y, Mao W, Zhang C (2021) Qplex: Duplex dueling multi-agent q-learning. In: International conference on learning representations (ICLR)"},{"key":"361_CR82","doi-asserted-by":"crossref","unstructured":"Kurach K, Raichuk A, Stanczyk P, Zajac M, Espeholt L, Marecki J, Michalski M, Bachem O, Brock A, Buesing L et al (2020) Google research football: A novel reinforcement learning environment. arXiv preprint arXiv:1907.11180","DOI":"10.1609\/aaai.v34i04.5878"},{"key":"361_CR83","unstructured":"Sitzmann V, Martel JNP, Bergman AW, Lindell DB, Wetzstein G (2020) Implicit neural representations with periodic activation functions. In: NeurIPS"},{"issue":"2","key":"361_CR84","first-page":"132","volume":"7","author":"R Wang","year":"2023","unstructured":"Wang R, Wei X (2023) Exploring the quantum approximate optimization algorithm in reinforcement learning. Quantum Computing Research 7(2):132\u2013145","journal-title":"Quantum Computing Research"},{"key":"361_CR85","first-page":"95000","volume":"10","author":"J Wang","year":"2022","unstructured":"Wang J, Chen X, Huang Q (2022) Review of quantum-enhanced learning for multi-agent systems. IEEE Access 10:95000\u201395015","journal-title":"IEEE Access"},{"issue":"3","key":"361_CR86","doi-asserted-by":"publisher","first-page":"1021","DOI":"10.1109\/TNNLS.2021.3087969","volume":"33","author":"Z Wang","year":"2022","unstructured":"Wang Z, Zhang H, Li X (2022) Hybrid quantum-classical algorithms for multi-agent reinforcement learning. IEEE Transactions on Neural Networks and Learning Systems 33(3):1021\u20131034. https:\/\/doi.org\/10.1109\/TNNLS.2021.3087969","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"361_CR87","doi-asserted-by":"crossref","unstructured":"Henderson P, Islam R, Bachman P et al (2018) Deep reinforcement learning that matters. In: AAAI","DOI":"10.1609\/aaai.v32i1.11694"},{"key":"361_CR88","unstructured":"Agarwal R, Machado MC, Castro P, Bellemare MG (2021) Deep reinforcement learning at the edge of the statistical precipice. In: NeurIPS"},{"issue":"3","key":"361_CR89","first-page":"2847","volume":"59","author":"L Zhang","year":"2023","unstructured":"Zhang L, Chen M, Wang X (2023) Deep reinforcement learning for wildfire monitoring and suppression using uav swarms. IEEE Trans Aerosp Electron Syst 59(3):2847\u20132859","journal-title":"IEEE Trans Aerosp Electron Syst"},{"issue":"1","key":"361_CR90","first-page":"45","volume":"2","author":"L Zhao","year":"2024","unstructured":"Zhao L, Qian Y, Liu D (2024) Quantum-assisted exploration in multi-agent reinforcement learning. Quantum AI Letters 2(1):45\u201360","journal-title":"Quantum AI Letters"},{"issue":"2","key":"361_CR91","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevX.10.021067","volume":"10","author":"L Zhou","year":"2020","unstructured":"Zhou L, Wang S-T, Choi S, Pichler H, Lukin MD (2020) Quantum approximate optimization algorithm: Performance, mechanism, and implementation on near-term devices. Phys Rev X 10(2):021067. https:\/\/doi.org\/10.1103\/PhysRevX.10.021067","journal-title":"Phys Rev X"},{"key":"361_CR92","doi-asserted-by":"publisher","unstructured":"Preskill J (2018) Quantum computing in the nisq era and beyond. Quantum, 2:79. https:\/\/doi.org\/10.22331\/q-2018-08-06-79","DOI":"10.22331\/q-2018-08-06-79"}],"container-title":["Quantum Machine Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42484-026-00361-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42484-026-00361-0","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42484-026-00361-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,3]],"date-time":"2026-02-03T06:51:02Z","timestamp":1770101462000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42484-026-00361-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,3]]},"references-count":92,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,6]]}},"alternative-id":["361"],"URL":"https:\/\/doi.org\/10.1007\/s42484-026-00361-0","relation":{"has-preprint":[{"id-type":"doi","id":"10.21203\/rs.3.rs-7111581\/v1","asserted-by":"object"}]},"ISSN":["2524-4906","2524-4914"],"issn-type":[{"value":"2524-4906","type":"print"},{"value":"2524-4914","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2,3]]},"assertion":[{"value":"13 July 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 January 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 February 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Authors declare that they have no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of Interest\/Competing Interests"}},{"value":"not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Clinical Trial Number"}},{"value":"The code\/data is available in the\n                      \n                      .","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Code Availability"}},{"value":"The authors declare no competing interests.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"7"}}