{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T18:37:48Z","timestamp":1771612668265,"version":"3.50.1"},"reference-count":30,"publisher":"Springer Science and Business Media LLC","issue":"11","license":[{"start":{"date-parts":[[2024,3,29]],"date-time":"2024-03-29T00:00:00Z","timestamp":1711670400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,3,29]],"date-time":"2024-03-29T00:00:00Z","timestamp":1711670400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2024,7]]},"DOI":"10.1007\/s11227-024-06047-3","type":"journal-article","created":{"date-parts":[[2024,3,29]],"date-time":"2024-03-29T17:01:35Z","timestamp":1711731695000},"page":"15161-15182","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Dual experience replay-based TD3 for single intersection signal control"],"prefix":"10.1007","volume":"80","author":[{"given":"Yichao","family":"Gao","sequence":"first","affiliation":[]},{"given":"Dake","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Yaqi","family":"Shen","sequence":"additional","affiliation":[]},{"given":"Xin","family":"Yang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,3,29]]},"reference":[{"key":"6047_CR1","unstructured":"Li L, Feiyue W (2018) A century review and future prospect of ground traffic control. J Autom 44(4):7 (in Chinese)"},{"issue":"1","key":"6047_CR2","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1109\/TITS.2020.3014296","volume":"23","author":"PW Shaikh","year":"2020","unstructured":"Shaikh PW, El-Abd M, Khanafer M, Gao K (2020) A review on swarm intelligence and evolutionary algorithms for solving the traffic signal control problem. IEEE Trans Intell Transp Syst 23(1):48\u201363","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"6047_CR3","unstructured":"Webster FV (1958) Traffic signal settings. Tech. Rep"},{"key":"6047_CR4","unstructured":"Quan L, Jianwei Z, Zongchang Z et al (2018) A review of deep reinforcement learning. J Comput Sci 41(1):27 (in Chinese)"},{"key":"6047_CR5","unstructured":"Sutton RS, Barto AG et\u00a0al (1998) Introduction to reinforcement learning, vol 135. MIT Press, Cambridge"},{"key":"6047_CR6","unstructured":"Dongwei X, Lei Z, Da W et al (2022) A review of urban traffic signal control based on deep reinforcement learning. J Transp Eng Inf, pp 020-001 (in Chinese)"},{"key":"6047_CR7","unstructured":"Fujimoto S, Hoof H, Meger D (2018) Addressing function approximation error in actor-critic methods. In: International Conference on Machine Learning, PMLR, pp 1587\u20131596"},{"key":"6047_CR8","unstructured":"Thorpe TL, Anderson CW (1996) Tra c light control using sarsa with three state representations. Technical report, Citeseer"},{"issue":"2","key":"6047_CR9","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1049\/iet-its.2009.0070","volume":"4","author":"I Arel","year":"2010","unstructured":"Arel I, Liu C, Urbanik T, Kohls AG (2010) Reinforcement learning-based multi-agent system for network traffic signal control. IET Intell Transp Syst 4(2):128\u2013135","journal-title":"IET Intell Transp Syst"},{"issue":"7540","key":"6047_CR10","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"key":"6047_CR11","unstructured":"Genders W, Razavi S (2016) Using a deep reinforcement learning agent for traffic signal control. arXiv preprint arXiv:1611.01142"},{"issue":"7","key":"6047_CR12","doi-asserted-by":"publisher","first-page":"417","DOI":"10.1049\/iet-its.2017.0153","volume":"11","author":"SS Mousavi","year":"2017","unstructured":"Mousavi SS, Schukat M, Howley E (2017) Traffic light control using deep policy-gradient and value-function-based reinforcement learning. IET Intell Transp Syst 11(7):417\u2013423","journal-title":"IET Intell Transp Syst"},{"issue":"6","key":"6047_CR13","doi-asserted-by":"publisher","first-page":"2687","DOI":"10.1109\/TCYB.2019.2904742","volume":"50","author":"T Tan","year":"2019","unstructured":"Tan T, Bao F, Deng Y, Jin A, Dai Q, Wang J (2019) Cooperative deep reinforcement learning for large-scale traffic grid signal control. IEEE Trans Cybern 50(6):2687\u20132700","journal-title":"IEEE Trans Cybern"},{"key":"6047_CR14","doi-asserted-by":"publisher","first-page":"108497","DOI":"10.1016\/j.asoc.2022.108497","volume":"119","author":"G Kim","year":"2022","unstructured":"Kim G, Sohn K (2022) Area-wide traffic signal control based on a deep graph Q-network (DGQN) trained in an asynchronous manner. Appl Soft Comput 119:108497","journal-title":"Appl Soft Comput"},{"issue":"2","key":"6047_CR15","doi-asserted-by":"publisher","first-page":"1243","DOI":"10.1109\/TVT.2018.2890726","volume":"68","author":"X Liang","year":"2019","unstructured":"Liang X, Du X, Wang G, Han Z (2019) A deep reinforcement learning network for traffic light cycle control. IEEE Trans Veh Technol 68(2):1243\u20131253","journal-title":"IEEE Trans Veh Technol"},{"key":"6047_CR16","unstructured":"Zhi L, Shipeng C, Yang S et al (2020) Single intersection signal control based on improved deep reinforcement learning method. Comput Sci 47(12):7 (in Chinese)"},{"key":"6047_CR17","unstructured":"Lijun L, Zhou W, Zhen Y (2021) An improved deep deterministic policy gradient network traffic signal control system. J Sichuan Univ (Nat Sci Edn) 058(004):87\u201393 (in Chinese)"},{"issue":"10","key":"6047_CR18","doi-asserted-by":"publisher","first-page":"1269","DOI":"10.1049\/itr2.12208","volume":"16","author":"M Mileti\u0107","year":"2022","unstructured":"Mileti\u0107 M, Ivanjko E, Greguri\u0107 M, Ku\u0161i\u0107 K (2022) A review of reinforcement learning applications in adaptive traffic signal control. IET Intell Transp Syst 16(10):1269\u20131285","journal-title":"IET Intell Transp Syst"},{"key":"6047_CR19","doi-asserted-by":"crossref","unstructured":"Van Otterlo M, Wiering M (2012) Reinforcement learning and markov decision processes. In: Reinforcement learning: state-of-the-art. Springer, Berlin, pp 3\u201342","DOI":"10.1007\/978-3-642-27645-3_1"},{"key":"6047_CR20","unstructured":"Wang Z, Bapst V, Heess N, Mnih V, Munos R, Kavukcuoglu K, de\u00a0Freitas N (2016) Sample efficient actor-critic with experience replay. arXiv preprint arXiv:1611.01224"},{"key":"6047_CR21","unstructured":"Munos R, Stepleton T, Harutyunyan A, Bellemare M (2016) Safe and efficient off-policy reinforcement learning. Adv Neural Inf Process Syst 29"},{"key":"6047_CR22","unstructured":"Wu Y, Mansimov E, Grosse RB, Liao S, Ba J (2017) Scalable trust-region method for deep reinforcement learning using kronecker-factored approximation. Adv Neural Inf Process Syst 30"},{"key":"6047_CR23","unstructured":"Doerr A, Volpp M, Toussaint M, Sebastian T, Daniel C (2019) Trajectory-based off-policy deep reinforcement learning. In: International conference on machine learning, PMLR, pp 1636\u20131645"},{"key":"6047_CR24","doi-asserted-by":"publisher","first-page":"108875","DOI":"10.1016\/j.patcog.2022.108875","volume":"131","author":"M Li","year":"2022","unstructured":"Li M, Huang T, Zhu W (2022) Clustering experience replay for the effective exploitation in reinforcement learning. Pattern Recogn 131:108875","journal-title":"Pattern Recogn"},{"issue":"24","key":"6047_CR25","doi-asserted-by":"publisher","first-page":"4192","DOI":"10.3390\/electronics11244192","volume":"11","author":"SW Beyene","year":"2022","unstructured":"Beyene SW, Han J-H (2022) Prioritized hindsight with dual buffer for meta-reinforcement learning. Electronics 11(24):4192","journal-title":"Electronics"},{"key":"6047_CR26","doi-asserted-by":"crossref","unstructured":"Wei H, Chen C, Zheng, G Wu K, Gayah V, Xu K, Li Z (2019) Presslight: learning max pressure control to coordinate traffic signals in arterial network. In: Proceedings of the 25th ACM SIGKDD international conference on knowledge discovery & data mining, pp 1290\u20131298","DOI":"10.1145\/3292500.3330949"},{"issue":"8","key":"6047_CR27","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neural Comput 9(8):1735\u20131780","journal-title":"Neural Comput"},{"key":"6047_CR28","doi-asserted-by":"crossref","unstructured":"Luong M-T, Pham H, Manning CD (2015) Effective approaches to attention-based neural machine translation. arXiv preprint arXiv:1508.04025","DOI":"10.18653\/v1\/D15-1166"},{"key":"6047_CR29","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O (2017) Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347"},{"key":"6047_CR30","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2015) Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-024-06047-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-024-06047-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-024-06047-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,25]],"date-time":"2024-06-25T11:24:42Z","timestamp":1719314682000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-024-06047-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,29]]},"references-count":30,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2024,7]]}},"alternative-id":["6047"],"URL":"https:\/\/doi.org\/10.1007\/s11227-024-06047-3","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,3,29]]},"assertion":[{"value":"5 March 2024","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 March 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}}]}}