{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,20]],"date-time":"2026-04-20T19:13:52Z","timestamp":1776712432511,"version":"3.51.2"},"reference-count":48,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation Project","doi-asserted-by":"publisher","award":["U1909204"],"award-info":[{"award-number":["U1909204"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"China National Railway Group Company Science and Technology Research and Development Program Project","award":["L2022X002"],"award-info":[{"award-number":["L2022X002"]}]},{"name":"Open Topic of the National Railway Intelligent Transportation System Engineering Technology Research Center","award":["RITS2021KF03"],"award-info":[{"award-number":["RITS2021KF03"]}]},{"name":"Guangdong Provincial Key Area Research and Development Program Project","award":["2020B0909050001"],"award-info":[{"award-number":["2020B0909050001"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2023]]},"DOI":"10.1109\/access.2023.3345448","type":"journal-article","created":{"date-parts":[[2023,12,21]],"date-time":"2023-12-21T19:56:57Z","timestamp":1703188617000},"page":"145085-145100","source":"Crossref","is-referenced-by-count":9,"title":["Network-Wide Traffic Signal Control Based on MARL With Hierarchical Nash-Stackelberg Game Model"],"prefix":"10.1109","volume":"11","author":[{"given":"Hui","family":"Shen","sequence":"first","affiliation":[{"name":"School of Electrical and Control Engineering, North China University of Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hongxia","family":"Zhao","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0574-7464","authenticated-orcid":false,"given":"Zundong","family":"Zhang","sequence":"additional","affiliation":[{"name":"Beijing Key Laboratory of Urban Road Traffic Intelligent Technology, North China University of Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xun","family":"Yang","sequence":"additional","affiliation":[{"name":"Beijing Key Laboratory of Urban Road Traffic Intelligent Technology, North China University of Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yutong","family":"Song","sequence":"additional","affiliation":[{"name":"Beijing Key Laboratory of Urban Road Traffic Intelligent Technology, North China University of Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaoming","family":"Liu","sequence":"additional","affiliation":[{"name":"Beijing Key Laboratory of Urban Road Traffic Intelligent Technology, North China University of Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-008-9062-9"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.compeleceng.2017.10.016"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.matcom.2019.06.002"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3018267"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.3390\/s19102282"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.3182\/20080706-5-KR-1001.01213"},{"issue":"17","key":"ref7","first-page":"4","article-title":"Traffic signal coordination control for two adjacent intersections based on NashCC-Q learning","volume":"20","author":"Zhao","year":"2008","journal-title":"Int. J. Simul. Model"},{"issue":"2","key":"ref8","first-page":"148","article-title":"A distributed approach for coordination between traffic lights based on game theory","volume":"2","author":"Shamshirband","year":"2012","journal-title":"Int. Arab. J. Inf. Techn."},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2015.04.009"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2019.8917165"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2022.3140511"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5744"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/MITS.2020.2990189"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.3390\/s20154291"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.conengprac.2020.104525"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1155\/2021\/6693636"},{"issue":"129","key":"ref17","first-page":"5614","article-title":"On the approximation of cooperative heterogeneous multi-agent reinforcement learning (MARL) using mean field control (MFC)","volume":"23","author":"Mondal","year":"2022","journal-title":"J. Mach. Learn. Res."},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1103\/PRXEnergy.1.033005"},{"issue":"8","key":"ref19","first-page":"1","article-title":"Overview on multi-agent reinforcement learning","volume":"46","author":"Du","year":"2019","journal-title":"Comput. Sci."},{"issue":"3","key":"ref20","first-page":"23","article-title":"Reinforcement learning technology in multi-agent system","volume":"31","author":"Zhao","year":"2004","journal-title":"Comput. Sci."},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-14435-6_7"},{"key":"ref22","first-page":"121","article-title":"Distributed problem solving and planning","volume-title":"Multiagent Systems: A Modern Approach to Distributed Artificial Intelligence","author":"Weiss","year":"2000"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.21236\/ada333248"},{"issue":"3","key":"ref24","first-page":"1613","article-title":"Multi-agent reinforcement learning based traffic signal control for integrated urban network: Survey of state of art","volume":"35","author":"Yang","year":"2018","journal-title":"Appl. Res. Comput."},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3319619.3321894"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.6144"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2019.103216"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5878"},{"key":"ref29","first-page":"264","article-title":"Smarts: An open-source scalable multi-agent RL training school for autonomous driving","volume-title":"Proc. Mach. Learn. Res.","volume":"155","author":"Zhou"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8461113"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2020.2977374"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCC.2007.913919"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2013.2241057"},{"key":"ref35","first-page":"5872","article-title":"Fully decentralized multi-agent reinforcement learning with networked agents","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zhang"},{"key":"ref36","first-page":"1626","article-title":"Finite-time analysis of distributed TD(0) with linear function approximation on multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Doan"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-021-09996-w"},{"key":"ref38","first-page":"242","article-title":"Multi agent reinforcement learning: Theoretical framework and an algorithm","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Hu"},{"key":"ref39","first-page":"242","article-title":"Correlated Q-learning","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"3","author":"Greenwald"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/iat.2003.1241094"},{"key":"ref41","first-page":"322","article-title":"Friend-or-foe Q-learning in general-sum games","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"1","author":"Littman"},{"key":"ref42","first-page":"1395","article-title":"Combining independent and joint learning: A negotiation based approach","volume-title":"Proc. 11th Int. Conf. Auton. Agent. Multiagent Agent. Syst.","volume":"3","author":"Bianchi"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0222215"},{"key":"ref44","first-page":"2961","article-title":"Actor-attention-critic for multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Iqbal"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1155\/2013\/962869"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3225431"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1007\/s13369-017-3018-9"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3148706"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/10005208\/10367982.pdf?arnumber=10367982","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T23:45:53Z","timestamp":1705103153000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10367982\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"references-count":48,"URL":"https:\/\/doi.org\/10.1109\/access.2023.3345448","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]}}}