{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T02:44:02Z","timestamp":1775011442517,"version":"3.50.1"},"reference-count":29,"publisher":"Informa UK Limited","issue":"1","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["617630 28"],"award-info":[{"award-number":["617630 28"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Innovation Star\u201d Project for Outstanding Graduate Students in Gansu Province","award":["2021CXZX-515"],"award-info":[{"award-number":["2021CXZX-515"]}]}],"content-domain":{"domain":["www.tandfonline.com"],"crossmark-restriction":true},"short-container-title":["Journal of Control and Decision"],"published-print":{"date-parts":[[2025,1,2]]},"DOI":"10.1080\/23307706.2023.2195408","type":"journal-article","created":{"date-parts":[[2023,5,18]],"date-time":"2023-05-18T11:47:38Z","timestamp":1684410458000},"page":"81-92","update-policy":"https:\/\/doi.org\/10.1080\/tandf_crossmark_01","source":"Crossref","is-referenced-by-count":5,"title":["Multi-agent deep reinforcement learning with traffic flow for traffic signal control"],"prefix":"10.1080","volume":"12","author":[{"given":"Liang","family":"Hou","sequence":"first","affiliation":[{"name":"College of Computer and Communication, Lanzhou University of Technology, Lanzhou, People\u2019s Republic of China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0750-137X","authenticated-orcid":false,"given":"Dailin","family":"Huang","sequence":"additional","affiliation":[{"name":"College of Computer and Communication, Lanzhou University of Technology, Lanzhou, People\u2019s Republic of China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jie","family":"Cao","sequence":"additional","affiliation":[{"name":"College of Computer and Communication, Lanzhou University of Technology, Lanzhou, People\u2019s Republic of China"},{"name":"Engineering Research Center of Manufacturing Information of Gansu Province, Lanzhou, People\u2019s Republic of China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3255-4158","authenticated-orcid":false,"given":"Jialin","family":"Ma","sequence":"additional","affiliation":[{"name":"College of Computer and Communication, Lanzhou University of Technology, Lanzhou, People\u2019s Republic of China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"301","published-online":{"date-parts":[[2023,5,18]]},"reference":[{"key":"e_1_3_2_2_1","unstructured":"Alam J. (2014). Advance traffic light system based on congestion estimation using fuzzy logic."},{"key":"e_1_3_2_3_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.trc.2017.09.020"},{"key":"e_1_3_2_4_1","unstructured":"Casas N. (2017). Deep reinforcement learning for urban traffic light control."},{"key":"e_1_3_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2019.2901791"},{"key":"e_1_3_2_6_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.trc.2017.07.003"},{"key":"e_1_3_2_7_1","doi-asserted-by":"crossref","unstructured":"Gartner N. H. (1983). OPAC: A demand-responsive strategy for traffic signal control.","DOI":"10.23919\/ACC.1982.4787916"},{"key":"e_1_3_2_8_1","doi-asserted-by":"crossref","unstructured":"Henry J.-J. Farges J. L. & Tuffal J. (1984). The PRODYN real time traffic algorithm In Control in transportation systems Elsevier 1984 pp.\u00a0305\u2013310.","DOI":"10.1016\/B978-0-08-029365-3.50048-1"},{"issue":"4","key":"e_1_3_2_9_1","article-title":"The SCOOT on-line traffic signal optimisation technique","volume":"23","author":"Hunt P.","year":"1982","unstructured":"Hunt, P., Robertson, D., Bretherton, R., & Royle, M. C. (1982). The SCOOT on-line traffic signal optimisation technique. Traffic Engineering & Control, 23(4).","journal-title":"Traffic Engineering & Control"},{"key":"e_1_3_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2019.2896943"},{"key":"e_1_3_2_11_1","doi-asserted-by":"crossref","unstructured":"Kulkarni G. H. & Waingankar P. G. (2007). Fuzzy logic based traffic light controller In 2007 International Conference on Industrial and Information Systems 2007 pp.\u00a0107\u2013110.","DOI":"10.1109\/ICIINFS.2007.4579157"},{"key":"e_1_3_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2018.2890726"},{"key":"e_1_3_2_13_1","unstructured":"Lin Y. Dai X. Li L. & Wang F.-Y. (2018). An efficient deep reinforcement learning model for urban traffic control ArXiv Prepr. ArXiv180801876."},{"issue":"1","key":"e_1_3_2_14_1","article-title":"Two traffic-responsive area traffic control methods: SCAT and SCOOT","volume":"25","author":"Luk J.","year":"1984","unstructured":"Luk, J. (1984). Two traffic-responsive area traffic control methods: SCAT and SCOOT. Traffic Engineering & Control, 25(1).","journal-title":"Traffic Engineering & Control"},{"key":"e_1_3_2_15_1","unstructured":"Mnih V. et\u00a0al. (2013). Playing atari with deep reinforcement learning ArXiv Prepr. ArXiv13125602."},{"key":"e_1_3_2_16_1","unstructured":"Mnih V. et\u00a0al. (2016). Asynchronous methods for deep reinforcement learning In International conference on machine learning 2016 pp.\u00a01928\u20131937."},{"key":"e_1_3_2_17_1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"e_1_3_2_18_1","unstructured":"Moalla D. Elkosantini S. & Darmoul S. (2013). An artificial immune network to control traffic at a single intersection In Proceedings of 2013 International Conference on Industrial Engineering and Systems Management (IESM) 2013 pp.\u00a01\u20137."},{"key":"e_1_3_2_19_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.1467-8667.2010.00715.x"},{"key":"e_1_3_2_20_1","unstructured":"Schutera M. Goby N. Smolarek S. & Reischl M. (2018). Distributed traffic light control at uncoupled intersections with real-world topology by deep reinforcement learning ArXiv Prepr. ArXiv181111233."},{"key":"e_1_3_2_21_1","unstructured":"Silver D. Lever G. Heess N. Degris T. Wierstra D. & Riedmiller M. (2014). Deterministic policy gradient algorithms."},{"key":"e_1_3_2_22_1","doi-asserted-by":"crossref","unstructured":"Sun D. Benekohal R. F. & Waller S. T. (2003). Multiobjective traffic signal timing optimization using non-dominated sorting genetic algorithm In IEEE IV2003 Intelligent Vehicles Symposium. Proceedings (Cat. No. 03TH8683) 2003 pp.\u00a0198\u2013203.","DOI":"10.1109\/IVS.2003.1212908"},{"key":"e_1_3_2_23_1","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume":"12","author":"Sutton R. S.","year":"1999","unstructured":"Sutton, R. S., McAllester, D., Singh, S., & Mansour, Y. (1999). Policy gradient methods for reinforcement learning with function approximation. Advances in Neural Information Processing Systems, 12, 1057\u20131063.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2019.2904742"},{"key":"e_1_3_2_25_1","doi-asserted-by":"crossref","unstructured":"Van Hasselt H. Guez A. & Silver D. (2016). Deep reinforcement learning with double q-learning.","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"e_1_3_2_26_1","doi-asserted-by":"publisher","DOI":"10.3390\/e21080744"},{"key":"e_1_3_2_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"e_1_3_2_28_1","doi-asserted-by":"crossref","unstructured":"Wei H. Xu N. Zhang H. et\u00a0al. (2019). Colight: Learning network-level cooperation for traffic signal control. In Proceedings of the 28th ACM International Conference on Information and Knowledge Management pp.\u00a01913\u20131922.","DOI":"10.1145\/3357384.3357902"},{"key":"e_1_3_2_29_1","doi-asserted-by":"crossref","unstructured":"Wei H. Zheng G. Yao H. & Li Z. (2018). Intellilight: A reinforcement learning approach for intelligent traffic light control. In Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining 2018 pp.\u00a02496\u20132505.","DOI":"10.1145\/3219819.3220096"},{"key":"e_1_3_2_30_1","unstructured":"Zheng G. et\u00a0al. (2019). Diagnosing reinforcement learning for traffic signal control ArXiv Prepr. ArXiv190504716."}],"container-title":["Journal of Control and Decision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.tandfonline.com\/doi\/pdf\/10.1080\/23307706.2023.2195408","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,12]],"date-time":"2025-01-12T06:41:35Z","timestamp":1736664095000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.tandfonline.com\/doi\/full\/10.1080\/23307706.2023.2195408"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,18]]},"references-count":29,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,1,2]]}},"alternative-id":["10.1080\/23307706.2023.2195408"],"URL":"https:\/\/doi.org\/10.1080\/23307706.2023.2195408","relation":{},"ISSN":["2330-7706","2330-7714"],"issn-type":[{"value":"2330-7706","type":"print"},{"value":"2330-7714","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,5,18]]},"assertion":[{"value":"The publishing and review policy for this title is described in its Aims & Scope.","order":1,"name":"peerreview_statement","label":"Peer Review Statement"},{"value":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tjcd20","URL":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tjcd20","order":2,"name":"aims_and_scope_url","label":"Aim & Scope"},{"value":"2023-05-18","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}