{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T21:19:19Z","timestamp":1757452759711,"version":"3.40.3"},"publisher-location":"Cham","reference-count":19,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030898984"},{"type":"electronic","value":"9783030898991"}],"license":[{"start":{"date-parts":[[2021,10,20]],"date-time":"2021-10-20T00:00:00Z","timestamp":1634688000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,10,20]],"date-time":"2021-10-20T00:00:00Z","timestamp":1634688000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-030-89899-1_21","type":"book-chapter","created":{"date-parts":[[2021,10,19]],"date-time":"2021-10-19T22:38:52Z","timestamp":1634683132000},"page":"207-216","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Learning to Communicate with Reinforcement Learning for an Adaptive Traffic Control System"],"prefix":"10.1007","author":[{"given":"Simon","family":"Vanneste","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gauthier","family":"de Borrekens","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Stig","family":"Bosmans","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Astrid","family":"Vanneste","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kevin","family":"Mets","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Siegfried","family":"Mercelis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Steven","family":"Latr\u00e9","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peter","family":"Hellinckx","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,10,20]]},"reference":[{"key":"21_CR1","unstructured":"Commission, E.: Roadmap to a Single European Transport Area: Towards a Competitive and Resource Efficient Transport System: White Paper. Publications Office of the European Union (2011)"},{"issue":"3","key":"21_CR2","doi-asserted-by":"publisher","first-page":"1140","DOI":"10.1109\/TITS.2013.2255286","volume":"14","author":"S El-Tantawy","year":"2013","unstructured":"El-Tantawy, S., Abdulhai, B., Abdelgawad, H.: Multiagent reinforcement learning for integrated network of adaptive traffic signal controllers (marlin-atsc): methodology and large-scale application on downtown toronto. IEEE Trans. Intell. Transp. Syst. 14(3), 1140\u20131150 (2013)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"21_CR3","unstructured":"Foerster, J.N., Assael, Y.M., De\u00a0Freitas, N., Whiteson, S.: Learning to communicate with deep multi-agent reinforcement learning. arXiv preprint arXiv:1605.06676 (2016)"},{"key":"21_CR4","doi-asserted-by":"crossref","unstructured":"Kok, J.R., Vlassis, N.: Using the max-plus algorithm for multiagent decision making in coordination graphs. In: Robot Soccer World Cup, pp. 1\u201312. Springer (2005)","DOI":"10.1007\/11780519_1"},{"key":"21_CR5","unstructured":"Liang, E., et al.: RLlib: abstractions for distributed reinforcement learning. In: International Conference on Machine Learning (ICML) (2018)"},{"key":"21_CR6","doi-asserted-by":"crossref","unstructured":"Lopez, P.A., et al.: Microscopic traffic simulation using sumo. In: The 21st IEEE International Conference on Intelligent Transportation Systems. IEEE (2018). https:\/\/elib.dlr.de\/124092\/","DOI":"10.1109\/ITSC.2018.8569938"},{"key":"21_CR7","unstructured":"Lowe, R., Wu, Y., Tamar, A., Harb, J., Abbeel, P., Mordatch, I.: Multi-agent actor-critic for mixed cooperative-competitive environments. arXiv preprint arXiv:1706.02275 (2017)"},{"key":"21_CR8","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., Riedmiller, M.: Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)"},{"key":"21_CR9","doi-asserted-by":"crossref","unstructured":"Oliehoek, F.A., Amato, C.: A concise introduction to decentralized POMDPs. Springer (2016)","DOI":"10.1007\/978-3-319-28929-8"},{"key":"21_CR10","unstructured":"Van\u00a0der Pol, E., Oliehoek, F.A.: Coordinated deep reinforcement learners for traffic light control. In: Proceedings of Learning, Inference and Control of Multi-Agent Systems (at NIPS 2016) (2016)"},{"key":"21_CR11","unstructured":"Sukhbaatar, S., Szlam, A., Fergus, R.: Learning multiagent communication with backpropagation. arXiv preprint arXiv:1605.07736 (2016)"},{"key":"21_CR12","unstructured":"Sunehag, P., et\u00a0al.: Value-decomposition networks for cooperative multi-agent learning. arXiv preprint arXiv:1706.05296 (2017)"},{"key":"21_CR13","doi-asserted-by":"crossref","unstructured":"Tan, M.: Multi-agent reinforcement learning: Independent vs. cooperative agents. In: Proceedings of the Tenth International Conference on Machine Learning, pp. 330\u2013337 (1993)","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"issue":"6","key":"21_CR14","doi-asserted-by":"publisher","first-page":"2687","DOI":"10.1109\/TCYB.2019.2904742","volume":"50","author":"T Tan","year":"2019","unstructured":"Tan, T., Bao, F., Deng, Y., Jin, A., Dai, Q., Wang, J.: Cooperative deep reinforcement learning for large-scale traffic grid signal control. IEEE Trans. Cybern. 50(6), 2687\u20132700 (2019)","journal-title":"IEEE Trans. Cybern."},{"key":"21_CR15","unstructured":"Thorpe, T.L.: Vehicle traffic light control using sarsa. Technical report. citeseer.ist.psu.edu\/thorpe97vehicle.html (1997)"},{"key":"21_CR16","doi-asserted-by":"crossref","unstructured":"Vanneste, S., Vanneste, A., Bosmans, S., Mercelis, S., Hellinckx, P.: Learning to communicate with multi-agent reinforcement learning using value-decomposition networks. In: International Conference on P2P, Parallel, Grid, Cloud and Internet Computing, pp. 736\u2013745. Springer (2019)","DOI":"10.1007\/978-3-030-33509-0_69"},{"key":"21_CR17","unstructured":"Vanneste, S., Vanneste, A., Mercelis, S., Hellinckx, P.: Learning to communicate using counterfactual reasoning. arXiv preprint arXiv:2006.07200 (2020)"},{"key":"21_CR18","unstructured":"Wu, C., Kreidieh, A., Parvate, K., Vinitsky, E., Bayen, A.M.: Flow: A modular learning framework for autonomy in traffic. arXiv preprint arXiv:1710.05465 (2017)"},{"key":"21_CR19","unstructured":"Zheng, G., et al.: Diagnosing reinforcement learning for traffic signal control. arXiv preprint arXiv:1905.04716 (2019)"}],"container-title":["Lecture Notes in Networks and Systems","Advances on P2P, Parallel, Grid, Cloud and Internet Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-89899-1_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,10,20]],"date-time":"2021-10-20T00:16:33Z","timestamp":1634688993000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-89899-1_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10,20]]},"ISBN":["9783030898984","9783030898991"],"references-count":19,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-89899-1_21","relation":{},"ISSN":["2367-3370","2367-3389"],"issn-type":[{"type":"print","value":"2367-3370"},{"type":"electronic","value":"2367-3389"}],"subject":[],"published":{"date-parts":[[2021,10,20]]},"assertion":[{"value":"20 October 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"3PGCIC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on P2P, Parallel, Grid, Cloud and Internet Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Fukuoka","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 October 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 October 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"pgcic2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/voyager.ce.fit.ac.jp\/conf\/3pgcic\/2021\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}