{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T18:18:09Z","timestamp":1771697889099,"version":"3.50.1"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030319779","type":"print"},{"value":"9783030319786","type":"electronic"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-31978-6_8","type":"book-chapter","created":{"date-parts":[[2019,9,24]],"date-time":"2019-09-24T09:03:16Z","timestamp":1569315796000},"page":"90-105","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["Deep Multi-agent Reinforcement Learning in a Homogeneous Open Population"],"prefix":"10.1007","author":[{"given":"Roxana","family":"R\u0103dulescu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Manon","family":"Legrand","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kyriakos","family":"Efthymiadis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Diederik M.","family":"Roijers","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ann","family":"Now\u00e9","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,9,25]]},"reference":[{"key":"8_CR1","doi-asserted-by":"crossref","unstructured":"Amato, C., Oliehoek, F.A.: Scalable planning and learning for multiagent POMDPs. In: AAAI, pp. 1995\u20132002 (2015)","DOI":"10.1609\/aaai.v29i1.9439"},{"key":"8_CR2","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1007\/978-3-642-29946-9_25","volume-title":"Recent Advances in Reinforcement Learning","author":"G Boutsioukis","year":"2012","unstructured":"Boutsioukis, G., Partalas, I., Vlahavas, I.: Transfer learning in multi-agent reinforcement learning domains. In: Sanner, S., Hutter, M. (eds.) EWRL 2011. LNCS (LNAI), vol. 7188, pp. 249\u2013260. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-29946-9_25"},{"issue":"2","key":"8_CR3","doi-asserted-by":"publisher","first-page":"156","DOI":"10.1109\/TSMCC.2007.913919","volume":"38","author":"L Busoniu","year":"2008","unstructured":"Busoniu, L., Babuska, R., Schutter, B.D.: A comprehensive survey of multiagent reinforcement learning. IEEE Trans Syst. Man Cybern. Part C 38(2), 156\u2013172 (2008)","journal-title":"IEEE Trans Syst. Man Cybern. Part C"},{"key":"8_CR4","unstructured":"De Hauwere, Y.M.: Sparse interactions in multi-agent reinforcement learning. Ph.D. thesis, Vrije Universiteit Brussel (2011)"},{"issue":"Jul","key":"8_CR5","first-page":"2121","volume":"12","author":"J Duchi","year":"2011","unstructured":"Duchi, J., Hazan, E., Singer, Y.: Adaptive subgradient methods for online learning and stochastic optimization. J. Mach. Learn. Res. 12(Jul), 2121\u20132159 (2011)","journal-title":"J. Mach. Learn. Res."},{"key":"8_CR6","unstructured":"Espeholt, L., et al.: IMPALA: scalable distributed deep-RL with importance weighted actor-learner architectures. arXiv preprint arXiv:1802.01561 (2018)"},{"key":"8_CR7","unstructured":"Foerster, J., Assael, Y.M., de Freitas, N., Whiteson, S.: Learning to communicate with deep multi-agent reinforcement learning. In: Advances in Neural Information Processing Systems, pp. 2137\u20132145 (2016)"},{"key":"8_CR8","unstructured":"Foerster, J., et al.: Stabilising experience replay for deep multi-agent reinforcement learning. arXiv preprint arXiv:1702.08887 (2017)"},{"key":"8_CR9","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1007\/978-3-319-71682-4_5","volume-title":"Autonomous Agents and Multiagent Systems","author":"JK Gupta","year":"2017","unstructured":"Gupta, J.K., Egorov, M., Kochenderfer, M.: Cooperative multi-agent control using deep reinforcement learning. In: Sukthankar, G., Rodriguez-Aguilar, J.A. (eds.) AAMAS 2017. LNCS (LNAI), vol. 10642, pp. 66\u201383. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-71682-4_5"},{"key":"8_CR10","doi-asserted-by":"crossref","unstructured":"Heinerman, J., Rango, M., Eiben, A.E.: Evolution, individual learning, and social learning in a swarm of real robots. In: 2015 IEEE Symposium Series on Computational Intelligence, pp. 1055\u20131062. IEEE (2015)","DOI":"10.1109\/SSCI.2015.152"},{"key":"8_CR11","unstructured":"Legrand, M.: Deep reinforcement learning for autonomous vehicle control among human drivers. Master dissertation, Vrije Universiteit Brussel (2017). http:\/\/ai.vub.ac.be\/sites\/default\/files\/thesis_legrand.pdf"},{"key":"8_CR12","first-page":"394","volume":"2017","author":"M Legrand","year":"2017","unstructured":"Legrand, M., R\u0103dulescu, R., Roijers, D.M., Now\u00e9, A.: The SimuLane highway traffic simulator for multi-agent reinforcement learning. BNAIC 2017, 394\u2013395 (2017)","journal-title":"BNAIC"},{"key":"8_CR13","unstructured":"Lillicrap, T.P., et al.: Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971 (2015)"},{"issue":"3\u20134","key":"8_CR14","first-page":"293","volume":"8","author":"LJ Lin","year":"1992","unstructured":"Lin, L.J.: Self-improving reactive agents based on reinforcement learning, planning and teaching. Mach. Learn. 8(3\u20134), 293\u2013321 (1992)","journal-title":"Mach. Learn."},{"issue":"1","key":"8_CR15","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1016\/S1389-0417(01)00015-8","volume":"2","author":"ML Littman","year":"2001","unstructured":"Littman, M.L.: Value-function reinforcement learning in Markov games. Cogn. Syst. Res. 2(1), 55\u201366 (2001)","journal-title":"Cogn. Syst. Res."},{"key":"8_CR16","unstructured":"Lowe, R., Wu, Y., Tamar, A., Harb, J., Abbeel, O.P., Mordatch, I.: Multi-agent actor-critic for mixed cooperative-competitive environments. In: Advances in Neural Information Processing Systems, pp. 6382\u20136393 (2017)"},{"key":"8_CR17","unstructured":"Mnih, V., et al.: Asynchronous methods for deep reinforcement learning. CoRR abs\/1602.01783 (2016)"},{"key":"8_CR18","unstructured":"Mnih, V., et al.: Playing Atari with deep reinforcement learning. CoRR abs\/1312.5602 (2013)"},{"issue":"7540","key":"8_CR19","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"key":"8_CR20","doi-asserted-by":"crossref","unstructured":"Mordatch, I., Abbeel, P.: Emergence of grounded compositional language in multi-agent populations. arXiv preprint arXiv:1703.04908 (2017)","DOI":"10.1609\/aaai.v32i1.11492"},{"key":"8_CR21","unstructured":"Mossalam, H., Assael, Y., Roijers, D., Whiteson, S.: Multi-objective deep reinforcement learning. In: NIPS Workshop on Deep RL (2016)"},{"key":"8_CR22","doi-asserted-by":"publisher","first-page":"441","DOI":"10.1007\/978-3-642-27645-3_14","volume-title":"Reinforcement Learning: State of the Art","author":"A Now\u00e9","year":"2012","unstructured":"Now\u00e9, A., Vrancx, P., De Hauwere, Y.M.: Game theory and multi-agent reinforcement learning. In: Wiering, M., van Otterlo, M. (eds.) Reinforcement Learning: State of the Art, pp. 441\u2013470. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-27645-3_14"},{"key":"8_CR23","unstructured":"Rusu, A.A., et al.: Progressive neural networks. arXiv preprint arXiv:1606.04671 (2016)"},{"key":"8_CR24","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M., Moritz, P.: Trust region policy optimization. In: International Conference on Machine Learning, pp. 1889\u20131897 (2015)"},{"issue":"7587","key":"8_CR25","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"key":"8_CR26","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava, N., Hinton, G., Krizhevsky, A., Sutskever, I., Salakhutdinov, R.: Dropout: a simple way to prevent neural networks from overfitting. J. Mach. Learn. Res. 15, 1929\u20131958 (2014)","journal-title":"J. Mach. Learn. Res."},{"key":"8_CR27","first-page":"4099","volume":"2018","author":"D Steckelmacher","year":"2018","unstructured":"Steckelmacher, D., Roijers, D.M., Harutyunyan, A., Vrancx, P., Plisnier, H., Now\u00e9, A.: Reinforcement learning in POMDPs with memoryless options and option-observation initiation sets. AAAI 2018, 4099\u20134106 (2018)","journal-title":"AAAI"},{"key":"8_CR28","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"issue":"Jul","key":"8_CR29","first-page":"1633","volume":"10","author":"ME Taylor","year":"2009","unstructured":"Taylor, M.E., Stone, P.: Transfer learning for reinforcement learning domains: a survey. J. Mach. Learn. Res. 10(Jul), 1633\u20131685 (2009)","journal-title":"J. Mach. Learn. Res."},{"key":"8_CR30","first-page":"2094","volume":"16","author":"H Hasselt Van","year":"2016","unstructured":"Van Hasselt, H., Guez, A., Silver, D.: Deep reinforcement learning with double Q-learning. AAAI 16, 2094\u20132100 (2016)","journal-title":"AAAI"},{"key":"8_CR31","unstructured":"Watkins, C.J.C.H.: Learning from delayed rewards. Ph.D. thesis, University of Cambridge England (1989)"},{"key":"8_CR32","unstructured":"Wiggers, A.J., Oliehoek, F.A., Roijers, D.M.: Structure in the value function of two-player zero-sum games of incomplete information. In: ECAI 2016, pp. 1628\u20131629 (2016)"},{"key":"8_CR33","unstructured":"Zhang, C., Lesser, V.: Coordinating multi-agent reinforcement learning with limited communication. In: Proceedings of the 2013 International Conference on Autonomous Agents and Multi-agent Systems, pp. 1101\u20131108 (2013)"}],"container-title":["Communications in Computer and Information Science","Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-31978-6_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,29]],"date-time":"2022-09-29T14:05:47Z","timestamp":1664460347000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-31978-6_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030319779","9783030319786"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-31978-6_8","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"value":"1865-0929","type":"print"},{"value":"1865-0937","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"25 September 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"BNAIC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Benelux Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"\u2018s-Hertogenbosch","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"The Netherlands","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 November 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 November 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"bnaic2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/bnaic2018.nl","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"31","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"9","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"29% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}