{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,5]],"date-time":"2025-04-05T05:28:34Z","timestamp":1743830914869,"version":"3.37.3"},"reference-count":36,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/OAPA.html"}],"funder":[{"DOI":"10.13039\/501100007129","name":"Natural Science Foundation of Shandong Province","doi-asserted-by":"publisher","award":["ZR2017PF005"],"award-info":[{"award-number":["ZR2017PF005"]}],"id":[{"id":"10.13039\/501100007129","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61873138","61803218","61573353","61533017","61573205"],"award-info":[{"award-number":["61873138","61803218","61573353","61533017","61573205"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2018]]},"DOI":"10.1109\/access.2018.2878853","type":"journal-article","created":{"date-parts":[[2018,10,31]],"date-time":"2018-10-31T19:04:19Z","timestamp":1541012659000},"page":"70223-70235","source":"Crossref","is-referenced-by-count":9,"title":["A Gradient-Based Reinforcement Learning Algorithm for Multiple Cooperative Agents"],"prefix":"10.1109","volume":"6","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6615-629X","authenticated-orcid":false,"given":"Zhen","family":"Zhang","sequence":"first","affiliation":[{"name":"School of Automation, Qingdao University, Qingdao, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dongqing","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Automation, Qingdao University, Qingdao, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dongbin","family":"Zhao","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Management and Control for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qiaoni","family":"Han","sequence":"additional","affiliation":[{"name":"School of Automation, Qingdao University, Qingdao, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tingting","family":"Song","sequence":"additional","affiliation":[{"name":"School of Automation, Qingdao University, Qingdao, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2854283"},{"key":"ref32","first-page":"1789","article-title":"Collaborative multiagent reinforcement learning by payoff propagation","volume":"7","author":"kok","year":"2006","journal-title":"J Mach Learn Res"},{"key":"ref31","first-page":"1041","article-title":"Preprocessing techniques for accelerating the DCOP algorithm ADOPT","author":"syed","year":"2005","journal-title":"Proc AAMAS"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-015-9447-5"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2016.2614002"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TIE.2016.2597763"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/S1874-1029(13)60031-2"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TPWRS.2011.2166091"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2011.09.005"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"article-title":"On-line Q-learning using connectionist systems","year":"1994","author":"rummery","key":"ref13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCC.2012.2218595"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2009.2037462"},{"key":"ref16","first-page":"878","article-title":"A multi-agent reinforcement learning using actor-critic methods","author":"li","year":"2008","journal-title":"Proc Int Conf Mach Learn Cybern"},{"key":"ref17","first-page":"541","article-title":"Nash convergence of gradient dynamics in general-sum games","author":"singh","year":"2000","journal-title":"Proc UAI"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(02)00121-2"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2007.368173"},{"key":"ref28","first-page":"2635","article-title":"Multi-agent reinforcement learning in common interest and fixed sum stochastic games: An experimental study","volume":"9","author":"bab","year":"2008","journal-title":"J Mach Learn Res"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2015.2488680"},{"key":"ref27","first-page":"322","article-title":"Friend-or-foe Q-learning in general-sum games","author":"littman","year":"2001","journal-title":"Proc ICML"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2012.05.074"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2016.2645699"},{"key":"ref29","first-page":"4878","article-title":"Multi-agent Q-learning with joint state value approximation","author":"chen","year":"2011","journal-title":"Proc CCC"},{"key":"ref5","first-page":"1039","article-title":"Nash Q-learning for general-sum stochastic games","volume":"4","author":"hu","year":"2003","journal-title":"J Mach Learn Res"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0181747"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/JAS.2014.7004682"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCC.2007.913919"},{"key":"ref9","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"2000","journal-title":"Advances in Neural Information Processing Systems 12"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1017\/S0269888912000057"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1017\/S026988890500041X"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1613\/jair.4818"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2007.01.004"},{"key":"ref24","article-title":"Q-learning in two-player two-action games","author":"babes","year":"2009","journal-title":"Proc AAMAS"},{"key":"ref23","first-page":"1145","article-title":"Dynamics of Boltzmann Q learning in two-player two-action games","volume":"85","author":"kianercy","year":"2012","journal-title":"Phys Rev E Stat Phys Plasmas Fluids Relat Interdiscip Top"},{"key":"ref26","first-page":"242","article-title":"Correlated-Q learning","author":"greenwald","year":"2003","journal-title":"Proc ICML"},{"key":"ref25","first-page":"840","article-title":"FMR-GA&#x2014;A cooperative multi-agent reinforcement learning algorithm based on gradient ascent","author":"zhang","year":"2017","journal-title":"Proc ICONIP"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/8274985\/08517104.pdf?arnumber=8517104","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,14]],"date-time":"2025-01-14T20:01:58Z","timestamp":1736884918000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8517104\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/access.2018.2878853","relation":{},"ISSN":["2169-3536"],"issn-type":[{"type":"electronic","value":"2169-3536"}],"subject":[],"published":{"date-parts":[[2018]]}}}