{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T04:02:31Z","timestamp":1773460951565,"version":"3.50.1"},"publisher-location":"Cham","reference-count":19,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319653396","type":"print"},{"value":"9783319653402","type":"electronic"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-65340-2_11","type":"book-chapter","created":{"date-parts":[[2017,8,8]],"date-time":"2017-08-08T11:49:29Z","timestamp":1502192969000},"page":"123-134","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Multi-agent Double Deep Q-Networks"],"prefix":"10.1007","author":[{"given":"David","family":"Sim\u00f5es","sequence":"first","affiliation":[]},{"given":"Nuno","family":"Lau","sequence":"additional","affiliation":[]},{"given":"Lu\u00eds Paulo","family":"Reis","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,8,9]]},"reference":[{"key":"11_CR1","doi-asserted-by":"crossref","unstructured":"Becker, R., Zilberstein, S., Lesser, V., Goldman, C.V.: Transition-independent decentralized markov decision processes. In: Proceedings of the Second International Joint Conference on Autonomous Agents and Multiagent Systems, AAMAS 2003, pp. 41\u201348. ACM, New York (2003)","DOI":"10.1145\/860575.860583"},{"issue":"2","key":"11_CR2","doi-asserted-by":"publisher","first-page":"156","DOI":"10.1109\/TSMCC.2007.913919","volume":"38","author":"L Busoniu","year":"2008","unstructured":"Busoniu, L., Babuska, R., De Schutter, B.: A comprehensive survey of multiagent reinforcement learning. Trans. Syst. Man Cybern. Part C 38(2), 156\u2013172 (2008)","journal-title":"Trans. Syst. Man Cybern. Part C"},{"key":"11_CR3","unstructured":"Claus, C., Boutilier, C.: The dynamics of reinforcement learning in cooperative multiagent systems. In: Innovative Applications of Artificial Intelligence, IAAI 1998, pp. 746\u2013752. American Association for Artificial Intelligence (1998)"},{"key":"11_CR4","unstructured":"Egorov, M.: Multi-agent deep reinforcement learning. University of Stanford, Department of Computer Science, Technical report (2016)"},{"key":"11_CR5","unstructured":"Foerster, J.N., Assael, Y.M., de Freitas, N., Whiteson, S.: Learning to communicate to solve riddles with deep distributed recurrent q-networks. CoRR abs\/1602.02672 (2016)"},{"key":"11_CR6","unstructured":"Glorot, X., Bengio, Y.: Understanding the difficulty of training deep feedforward neural networks. In: AISTATS, vol. 9, pp. 249\u2013256 (2010)"},{"key":"11_CR7","unstructured":"van Hasselt, H., Guez, A., Silver, D.: Deep reinforcement learning with double q-learning. CoRR abs\/1509.06461 (2015)"},{"key":"11_CR8","unstructured":"Kapetanakis, S., Kudenko, D.: Reinforcement learning of coordination in cooperative multi-agent systems. In: Eighteenth National Conference on Artificial Intelligence, Menlo Park, CA, USA, pp. 326\u2013331. American Association for Artificial Intelligence (2002)"},{"key":"11_CR9","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. CoRR abs\/1412.6980 (2014)"},{"key":"11_CR10","doi-asserted-by":"crossref","unstructured":"Lau, N., Reis, L.P.: FC Portugal - high-level coordination methodologies in soccer robotics. InTech Education and Publishing, Vienna, December 2007","DOI":"10.5772\/5130"},{"key":"11_CR11","unstructured":"Lauer, M., Riedmiller, M.: An algorithm for distributed reinforcement learning in cooperative multi-agent systems. In: Proceedings of the Seventeenth International Conference on Machine Learning, pp. 535\u2013542. Morgan Kaufmann (2000)"},{"key":"11_CR12","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., Riedmiller, M.: Playing atari with deep reinforcement learning. CoRR abs\/1312.5602 (2013)"},{"key":"11_CR13","unstructured":"Nair, R., Tambe, M., Yokoo, M., Pynadath, D., Marsella, S., Nair, R., Tambe, M.: Taming decentralized pomdps: towards efficient policy computation for multiagent settings. In: IJCAI, pp. 705\u2013711 (2003)"},{"key":"11_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"175","DOI":"10.1007\/3-540-44568-4_11","volume-title":"Balancing Reactivity and Social Deliberation in Multi-Agent Systems","author":"LP Reis","year":"2001","unstructured":"Reis, L.P., Lau, N., Oliveira, E.C.: Situation based strategic positioning for coordinating a team of homogeneous agents. BRSDMAS 2000. LNCS, vol. 2103, pp. 175\u2013197. Springer, Heidelberg (2001). doi:10.1007\/3-540-44568-4_11"},{"key":"11_CR15","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/4151.001.0001","volume-title":"Layered Learning in Multiagent Systems: A Winning Approach to Robotic Soccer","author":"P Stone","year":"2000","unstructured":"Stone, P.: Layered Learning in Multiagent Systems: A Winning Approach to Robotic Soccer. MIT Press, Cambridge (2000)"},{"issue":"3","key":"11_CR16","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1023\/A:1008942012299","volume":"8","author":"P Stone","year":"2000","unstructured":"Stone, P., Veloso, M.: Multiagent systems: a survey from a machine learning perspective. Auton. Robot. 8(3), 345\u2013383 (2000)","journal-title":"Auton. Robot."},{"key":"11_CR17","unstructured":"Tampuu, A., Matiisen, T., Kodelja, D., Kuzovkin, I., Korjus, K., Aru, J., Aru, J., Vicente, R.: Multiagent cooperation and competition with deep reinforcement learning. CoRR abs\/1511.08779 (2015)"},{"issue":"1","key":"11_CR18","first-page":"1633","volume":"10","author":"ME Taylor","year":"2009","unstructured":"Taylor, M.E., Stone, P.: Transfer learning for reinforcement learning domains: a survey. J. Mach. Learn. Res. 10(1), 1633\u20131685 (2009)","journal-title":"J. Mach. Learn. Res."},{"issue":"3\u20134","key":"11_CR19","first-page":"279","volume":"8","author":"CJ Watkins","year":"1992","unstructured":"Watkins, C.J., Dayan, P.: Q-learning. Mach. Learn. 8(3\u20134), 279\u2013292 (1992)","journal-title":"Mach. Learn."}],"container-title":["Lecture Notes in Computer Science","Progress in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-65340-2_11","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T07:53:14Z","timestamp":1772783594000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-65340-2_11"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319653396","9783319653402"],"references-count":19,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-65340-2_11","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017]]},"assertion":[{"value":"9 August 2017","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"EPIA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"EPIA Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Porto","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Portugal","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2017","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 September 2017","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2017","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"epia2017","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/web.fe.up.pt\/~epia2017\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}