{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T17:19:29Z","timestamp":1775150369404,"version":"3.50.1"},"reference-count":57,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100000038","name":"Natural Sciences and Engineering Research Council of Canada","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000038","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Automat. Contr."],"published-print":{"date-parts":[[2022,10]]},"DOI":"10.1109\/tac.2021.3121228","type":"journal-article","created":{"date-parts":[[2021,10,26]],"date-time":"2021-10-26T08:59:56Z","timestamp":1635238796000},"page":"5230-5245","source":"Crossref","is-referenced-by-count":25,"title":["Decentralized Learning for Optimality in Stochastic Dynamic Teams and Games With Local Control and Global State Information"],"prefix":"10.1109","volume":"67","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2579-4498","authenticated-orcid":false,"given":"Bora","family":"Yongacoglu","sequence":"first","affiliation":[{"name":"Department of Mathematics and Statistics, Queen&#x2019;s University, Kingston, ON, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8295-1509","authenticated-orcid":false,"given":"Gurdal","family":"Arslan","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering, University of Hawaii, Honolulu, HI, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6099-5001","authenticated-orcid":false,"given":"Serdar","family":"Yuksel","sequence":"additional","affiliation":[{"name":"Department of Mathematics and Statistics, Queen&#x2019;s University, Kingston, ON, Canada"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CDC40024.2019.9030158"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/BF00993306"},{"key":"ref3","article-title":"Learning from delayed rewards","author":"Watkins","year":"1989"},{"issue":"3","key":"ref4","first-page":"279","article-title":"Q-learning","volume-title":"Mach. Learn.","volume":"8","author":"Watkins","year":"1992"},{"key":"ref5","article-title":"A survey of learning in multiagent environments: Dealing with non-stationarity","author":"Hernandez-Leal","year":"2017"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/BF01206560"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2013.05.025"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.32917\/hmj\/1206139508"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1002\/9781118453988.ch14"},{"key":"ref10","volume-title":"Game Theory","author":"Fudenberg","year":"1991"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/PROC.1980.11718"},{"key":"ref13","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4614-7085-4","volume-title":"Stochastic Networked Control Systems","author":"Yksel","year":"2013"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/0899-8256(89)90003-1"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.geb.2004.09.012"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2016.2598476"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"key":"ref18","first-page":"426","article-title":"Learning to coordinate without sharing information","volume-title":"Proc. 12th Nat. Conf. Artif. Intell.","author":"Sen","year":"1994"},{"key":"ref19","first-page":"746","article-title":"The dynamics of reinforcement learning in cooperative multiagent systems","volume-title":"Proc. 10th Innov. Appl. Artif. Intell. Conf.","author":"Claus","year":"1998"},{"key":"ref20","first-page":"310","article-title":"A generalized reinforcement-learning model: Convergence and applications","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"96","author":"Littman","year":"1996"},{"key":"ref21","first-page":"1039","article-title":"Nash Q-learning for general-sum stochastic games","volume":"4","author":"Hu","year":"2003","journal-title":"J. Mach. Learn. Res."},{"key":"ref22","first-page":"242","article-title":"Multiagent reinforcement learning: Theoretical framework and an algorithm","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"98","author":"Hu","year":"1998"},{"key":"ref23","first-page":"322","article-title":"Friend-or-foe Q-learning in general-sum games","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"1","author":"Littman","year":"2001"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/S1389-0417(01)00015-8"},{"key":"ref25","first-page":"242","article-title":"Correlated Q-learning","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"20","author":"Greenwald","year":"2003"},{"key":"ref26","first-page":"871","article-title":"Extending Q-learning to general adaptive multi-agent systems","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Tesauro","year":"2004"},{"key":"ref27","first-page":"1603","article-title":"Reinforcement learning to play an optimal Nash equilibrium in team Markov games","volume-title":"Proc. 15th Int. Conf. Neural Inf. Process. Syst.","author":"Wang","year":"2002"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.2307\/2951778"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1017\/S0269888912000057"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.21236\/ada333248"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2017.12.001"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-60990-0_12"},{"key":"ref33","first-page":"535","article-title":"An algorithm for distributed reinforcement learning in cooperative multi-agent systems","volume-title":"Proc. 17th Int. Conf. Mach. Learn.","author":"Lauer","year":"2000"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(02)00121-2"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2007.4399095"},{"key":"ref36","first-page":"326","article-title":"Reinforcement learning of coordination in cooperative multi-agent systems","volume-title":"Proc. AAAI\/IAAI","volume":"2002","author":"Kapetanakis","year":"2002"},{"key":"ref37","article-title":"Coordination of independent learners in cooperative Markov games","author":"Matignon","year":"2009"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/1160633.1160776"},{"issue":"1","key":"ref39","first-page":"2914","article-title":"Lenient learning in independent-learner stochastic cooperative games","volume":"17","author":"Wei","year":"2016","journal-title":"J. Mach. Learn. Res."},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1137\/110852462"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1137\/070680199"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1137\/110850694"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1016\/j.geb.2012.03.006"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1016\/j.geb.2012.02.017"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1016\/j.geb.2004.01.004"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/s10846-010-9396-9"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/VETECS.2010.5493950"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/PIMRC.2016.7794793"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2916155"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2016.2539923"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4612-0729-0"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1023\/A:1007678930559"},{"key":"ref53","first-page":"5872","article-title":"Fully decentralized multi-agent reinforcement learning with networked agents","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zhang","year":"2018"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6911(97)90015-3"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1515\/9780691214252"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-16170-4_12"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1137\/1101029"}],"container-title":["IEEE Transactions on Automatic Control"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9\/9903918\/09580732.pdf?arnumber=9580732","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,11]],"date-time":"2024-01-11T23:58:58Z","timestamp":1705017538000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9580732\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10]]},"references-count":57,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/tac.2021.3121228","relation":{},"ISSN":["0018-9286","1558-2523","2334-3303"],"issn-type":[{"value":"0018-9286","type":"print"},{"value":"1558-2523","type":"electronic"},{"value":"2334-3303","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,10]]}}}