{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,11]],"date-time":"2026-04-11T00:12:02Z","timestamp":1775866322890,"version":"3.50.1"},"reference-count":116,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"Fundamental Research Grant Scheme","award":["FRGS\/1\/2019\/ICT03\/SYUC\/01\/1"],"award-info":[{"award-number":["FRGS\/1\/2019\/ICT03\/SYUC\/01\/1"]}]},{"name":"Partnership Grant between Sunway University and University of Malaya","award":["CR-UMSSTDCIS-2018-01 and RK004-2017"],"award-info":[{"award-number":["CR-UMSSTDCIS-2018-01 and RK004-2017"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2020]]},"DOI":"10.1109\/access.2020.3034141","type":"journal-article","created":{"date-parts":[[2020,10,27]],"date-time":"2020-10-27T19:48:59Z","timestamp":1603828139000},"page":"208016-208044","source":"Crossref","is-referenced-by-count":140,"title":["Deep Reinforcement Learning for Traffic Signal Control: A Review"],"prefix":"10.1109","volume":"8","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8908-8883","authenticated-orcid":false,"given":"Faizan","family":"Rasheed","sequence":"first","affiliation":[{"name":"Department of Computing and Information Systems, Sunway University, Subang Jaya, Malaysia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3110-2782","authenticated-orcid":false,"given":"Kok-Lim Alvin","family":"Yau","sequence":"additional","affiliation":[{"name":"Department of Computing and Information Systems, Sunway University, Subang Jaya, Malaysia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6266-2390","authenticated-orcid":false,"given":"Rafidah Md.","family":"Noor","sequence":"additional","affiliation":[{"name":"Faculty of Computer Science and Information Technology, University of Malaya, Kuala Lumpur, Malaysia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6853-5878","authenticated-orcid":false,"given":"Celimuge","family":"Wu","sequence":"additional","affiliation":[{"name":"Graduate School of Informatics and Engineering, The University of Electro-Communications, Tokyo, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3450-2538","authenticated-orcid":false,"given":"Yeh-Ching","family":"Low","sequence":"additional","affiliation":[{"name":"Department of Computing and Information Systems, Sunway University, Subang Jaya, Malaysia"}]}],"member":"263","reference":[{"key":"ref39","volume":"8","author":"zurada","year":"1992","journal-title":"Introduction to Artificial Neural Systems"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2014.09.003"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCC.2011.2161577"},{"key":"ref32","first-page":"105","article-title":"A survey of intelligence methods in urban traffic signal control","volume":"7","author":"liu","year":"2007","journal-title":"Int J Comput Sci Netw Secur"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-25808-9_4"},{"key":"ref30","article-title":"A survey on traffic signal control methods","author":"wei","year":"2019","journal-title":"arXiv 1904 08117"},{"key":"ref37","volume":"1","author":"bengio","year":"2017","journal-title":"Deep Learning"},{"key":"ref36","doi-asserted-by":"crossref","first-page":"436","DOI":"10.1038\/nature14539","article-title":"Deep learning","volume":"521","author":"lecun","year":"2015","journal-title":"Nature"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2019.2929020"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2018.2815678"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2008.4732718"},{"key":"ref27","first-page":"4","article-title":"Time optimization for traffic signal control using genetic algorithm","volume":"2","author":"singh","year":"2009","journal-title":"Int J Recent Trends Eng"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-008-9062-9"},{"key":"ref20","article-title":"A deep reinforcement learning framework for the financial portfolio management problem","author":"jiang","year":"2017","journal-title":"arXiv 1706 10059"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.17775\/CSEEJPES.2018.00520"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ADCONIP.2017.7983780"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.2352\/ISSN.2470-1173.2017.19.AVM-023"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.128"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1016\/S0965-8564(97)00048-7"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICSMC.2001.973106"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1080\/01431160802549278"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463189"},{"key":"ref50","volume":"135","author":"sutton","year":"1998","journal-title":"Introduction to Reinforcement Learning"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1016\/S1364-6613(99)01331-5"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1155\/2018\/2129393"},{"key":"ref58","first-page":"348","article-title":"Experience replay for continual learning","author":"rolnick","year":"2019","journal-title":"Adv Neural Inf Process Syst"},{"key":"ref57","article-title":"Deep reinforcement learning for cyber security","author":"thi nguyen","year":"2019","journal-title":"arXiv 1906 05799"},{"key":"ref56","first-page":"3200","article-title":"Deep reinforcement learning that matters","author":"henderson","year":"2018","journal-title":"Proc 32nd AAAI Conf Artif Intell"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2019.2897134"},{"key":"ref54","article-title":"Deep reinforcement learning: An overview","author":"li","year":"2017","journal-title":"arXiv 1701 07274"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1613\/jair.301"},{"key":"ref52","volume":"135","author":"sutton","year":"1998","journal-title":"Introduction to Reinforcement Learning"},{"key":"ref40","author":"goodfellow","year":"2016","journal-title":"Deep Learning"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevE.71.026125"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/S0191-2615(01)00006-6"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4471-5113-5_3"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1049\/iet-its.2015.0108"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICNC.2009.374"},{"key":"ref49","first-page":"2661","article-title":"Safe reinforcement learning via shielding","author":"alshiekh","year":"2018","journal-title":"Proc 32nd AAAI Conf Artif Intell"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2013.2255286"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2012.6338911"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"ref45","first-page":"3303","article-title":"Data-efficient hierarchical reinforcement learning","author":"nachum","year":"2018","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref48","article-title":"Reinforcement learning from imperfect demonstrations","author":"gao","year":"2018","journal-title":"arXiv 1802 05313"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1126\/science.aau6249"},{"key":"ref42","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref41","author":"haykin","year":"1994","journal-title":"Neural Networks A Comprehensive Foundation"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1016\/j.tics.2019.02.006"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.2200\/S00268ED1V01Y201005AIM009"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.3390\/e21080744"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1115\/DSCC2019-9076"},{"key":"ref71","article-title":"Deep reinforcement learning for traffic light control in vehicular networks","author":"liang","year":"2018","journal-title":"arXiv 1803 11115"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1049\/iet-its.2018.5170"},{"key":"ref76","article-title":"Deep reinforcement learning for coordination in traffic light control","author":"van der pol","year":"2016"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1016\/j.trip.2019.100020"},{"key":"ref74","article-title":"Coordinated deep reinforcement learners for traffic light control","author":"van der pol","year":"2016","journal-title":"Proc Learn Inference Control Multi-Agent Syst (NIPS)"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2020.03.065"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220096"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2019.2901791"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/NICOInt.2019.00034"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/ICCSNT47585.2019.8962496"},{"key":"ref61","article-title":"Analysis and improvement of adversarial training in DQN agents with adversarially-guided exploration (AGE)","author":"behzadan","year":"2019","journal-title":"arXiv 1906 01119"},{"key":"ref63","article-title":"Data efficient training for reinforcement learning with adaptive behavior policy sharing","author":"liu","year":"2020","journal-title":"arXiv 2002 05229"},{"key":"ref64","article-title":"Deep reinforcement learning with averaged target DQN","author":"anschel","year":"2016","journal-title":"arXiv 1611 01929"},{"key":"ref65","article-title":"An overview of gradient descent optimization algorithms","author":"ruder","year":"2016","journal-title":"arXiv 1609 04747"},{"key":"ref66","article-title":"Using a deep reinforcement learning agent for traffic signal control","author":"genders","year":"2016","journal-title":"arXiv 1611 01142"},{"key":"ref67","doi-asserted-by":"crossref","first-page":"247","DOI":"10.1109\/JAS.2016.7508798","article-title":"Traffic signal timing via deep reinforcement learning","volume":"3","author":"li","year":"2016","journal-title":"IEEE\/CAA Journal of Automatica Sinica"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1049\/iet-its.2017.0153"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/S0968-090X(00)00047-4"},{"key":"ref69","article-title":"Adaptive traffic signal control: Deep reinforcement learning algorithm with experience replay and target network","author":"gao","year":"2017","journal-title":"arXiv 1705 02755"},{"key":"ref1","article-title":"Traffic signal control system","author":"molloy","year":"1973"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1007\/0-387-24109-4_3"},{"key":"ref95","first-page":"2137","article-title":"Learning to communicate with deep multi-agent reinforcement learning","author":"foerster","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref108","first-page":"1","article-title":"VISSIM: A microscopic simulation tool to evaluate actuated signal control including bus priority","volume":"32","author":"fellendorf","year":"1994","journal-title":"Proc 64th Inst Transp Eng Annu Meet"},{"key":"ref94","article-title":"CM3: Cooperative multi-goal multi-stage multi-agent reinforcement learning","author":"yang","year":"2018","journal-title":"arXiv 1809 05188"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4419-6142-6_2"},{"key":"ref93","article-title":"Mean field multi-agent reinforcement learning","author":"yang","year":"2018","journal-title":"arXiv 1802 05438"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1049\/ic:19950249"},{"key":"ref92","article-title":"Fully decentralized multi-agent reinforcement learning with networked agents","author":"zhang","year":"2018","journal-title":"arXiv 1802 08757"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1007\/BF00128098"},{"key":"ref91","first-page":"242","article-title":"Multiagent reinforcement learning: Theoretical framework and an algorithm","volume":"98","author":"hu","year":"1998","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref104","article-title":"SUMO&#x2013;simulation of urban mobility: An overview","author":"behrisch","year":"2011","journal-title":"Proc SIMUL 3rd Int Conf Adv Syst Simulat"},{"key":"ref90","article-title":"Microscopic modeling of traffic flow: Investigation of collision free vehicle dynamics","author":"krau\u00df","year":"1998"},{"key":"ref103","first-page":"183","article-title":"SUMO (Simulation of Urban MObility)-an open-source traffic simulation","author":"krajzewicz","year":"2002","journal-title":"Proc 4th Middle East Symp Simulation Modeling"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1016\/S0968-090X(02)00026-8"},{"key":"ref111","first-page":"128","article-title":"On the message and time complexity of a distributed mobility-adaptive clustering algorithm in wireless ad hoc networks","author":"bettstetter","year":"2002","journal-title":"Proc 4th Eur Wireless Conf"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2011.6083107"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4419-6142-6_5"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1016\/j.trc.2014.02.001"},{"key":"ref99","first-page":"42","article-title":"A deep reinforcement learning approach to adaptive traffic lights management","author":"vidali","year":"2019","journal-title":"Proc Workshop &#x2018;From Objects Agents&#x2019;"},{"key":"ref96","article-title":"Prioritized experience replay","author":"schaul","year":"2015","journal-title":"arXiv 1511 05952"},{"key":"ref97","doi-asserted-by":"crossref","first-page":"156","DOI":"10.1109\/TSMCC.2007.913919","article-title":"A comprehensive survey of multiagent reinforcement learning","volume":"38","author":"bu","year":"2008","journal-title":"IEEE Trans Syst Man Cybern C (Appl Rev )"},{"key":"ref10","first-page":"1","article-title":"Decentralized learning for traffic signal control","author":"j","year":"2015","journal-title":"Proc of Int Conf on Comm Sys and Net (COMSNETS)"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2012.2209904"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1061\/(ASCE)0733-947X(2003)129:3(278)"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3068287"},{"key":"ref14","article-title":"Playing atari with deep reinforcement learning","author":"mnih","year":"2013","journal-title":"arXiv 1312 5602"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1080\/01441649308716854"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1051\/jp1:1992277"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CCAA.2017.8229841"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/T-VT.1980.23833"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1038\/s41591-018-0316-z"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/25.69966"},{"key":"ref114","article-title":"Parameter sharing deep deterministic policy gradient for cooperative multi-agent reinforcement learning","author":"chu","year":"2017","journal-title":"arXiv 1710 00336"},{"key":"ref113","first-page":"1008","article-title":"Actor-critic algorithms","author":"konda","year":"2000","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref116","article-title":"DQN with model-based exploration: Efficient learning on environments with sparse rewards","author":"zhen gou","year":"2019","journal-title":"arXiv 1903 09295"},{"key":"ref80","author":"luttinen","year":"1996","journal-title":"Statistical Analysis of Vehicle Time Headways"},{"key":"ref115","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"arXiv 1707 06347"},{"key":"ref89","first-page":"563","article-title":"Approximate solutions for factored Dec-POMDPs with many agents","author":"oliehoek","year":"2013","journal-title":"Proc AAMAS"},{"key":"ref85","article-title":"Deep reinforcement learning with double q-learning","author":"van hasselt","year":"2015","journal-title":"arXiv 1509 06461 [cs]"},{"key":"ref86","article-title":"Dueling network architectures for deep reinforcement learning","author":"wang","year":"2015","journal-title":"arXiv 1511 06581"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref88","first-page":"1","article-title":"Using the max-plus algorithm for multiagent decision making in coordination graphs","author":"kok","year":"2005","journal-title":"Robot Soccer World Cup"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/8948470\/09241006.pdf?arnumber=9241006","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,8]],"date-time":"2022-09-08T19:45:47Z","timestamp":1662666347000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9241006\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"references-count":116,"URL":"https:\/\/doi.org\/10.1109\/access.2020.3034141","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]}}}