{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T02:45:59Z","timestamp":1725936359261},"publisher-location":"Cham","reference-count":18,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319708355"},{"type":"electronic","value":"9783319708362"}],"license":[{"start":{"date-parts":[[2017,12,21]],"date-time":"2017-12-21T00:00:00Z","timestamp":1513814400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-70836-2_11","type":"book-chapter","created":{"date-parts":[[2017,12,20]],"date-time":"2017-12-20T18:19:38Z","timestamp":1513793978000},"page":"129-140","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Mixed-Policy Asynchronous Deep Q-Learning"],"prefix":"10.1007","author":[{"given":"David","family":"Sim\u00f5es","sequence":"first","affiliation":[]},{"given":"Nuno","family":"Lau","sequence":"additional","affiliation":[]},{"given":"Lu\u00eds Paulo","family":"Reis","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,12,21]]},"reference":[{"key":"11_CR1","unstructured":"Abdallah, S., Lesser, V.R.: A multiagent reinforcement learning algorithm with non-linear dynamics. CoRR abs\/1401.3454 (2014)"},{"key":"11_CR2","doi-asserted-by":"crossref","unstructured":"Abdolmaleki, A., Simoes, D., Lau, N., Reis, L.P., Neumann, G.: Learning a humanoid kick with controlled distance. In: Behnke, S., Lee, D.D., Sariel, S., Sheh, R. (eds.) RoboCup 2016: Robot World Cup XX. Lecture Notes in Artificial Intelligence, Leipzig, Germany. Springer (2016)","DOI":"10.1007\/978-3-319-68792-6_4"},{"key":"11_CR3","doi-asserted-by":"crossref","unstructured":"Awheda, M.D., Schwartz, H.M.: Exponential moving average q-learning algorithm. In: 2013 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL), pp. 31\u201338, April 2013","DOI":"10.1109\/ADPRL.2013.6614986"},{"issue":"3","key":"11_CR4","doi-asserted-by":"crossref","first-page":"281","DOI":"10.1007\/s10458-007-9013-x","volume":"15","author":"B Banerjee","year":"2007","unstructured":"Banerjee, B., Peng, J.: Generalized multiagent learning with performance bound. Auton. Agents Multi Agent Syst. 15(3), 281\u2013312 (2007)","journal-title":"Auton. Agents Multi Agent Syst."},{"key":"11_CR5","unstructured":"Bowling, M.: Convergence and no-regret in multiagent learning. In: Proceedings of the 17th International Conference on Neural Information Processing Systems, NIPS 2004, pp. 209\u2013216. MIT Press, Cambridge (2004)"},{"key":"11_CR6","unstructured":"Bowling, M., Veloso, M.: Rational and convergent learning in stochastic games. In: International Joint Conference on Artificial Intelligence, vol. 17, pp. 1021\u20131026. Lawrence Erlbaum Associates Ltd. (2001)"},{"issue":"2","key":"11_CR7","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1016\/S0004-3702(02)00121-2","volume":"136","author":"M Bowling","year":"2002","unstructured":"Bowling, M., Veloso, M.: Multiagent learning using a variable learning rate. Artif. Intell. 136(2), 215\u2013250 (2002)","journal-title":"Artif. Intell."},{"key":"11_CR8","unstructured":"Clevert, D.A., Unterthiner, T., Hochreiter, S.: Fast and accurate deep network learning by exponential linear units (elus) (2015). arXiv preprint: arXiv:1511.07289"},{"issue":"1\u20132","key":"11_CR9","doi-asserted-by":"crossref","first-page":"23","DOI":"10.1007\/s10994-006-0143-1","volume":"67","author":"V Conitzer","year":"2007","unstructured":"Conitzer, V., Sandholm, T.: Awesome: a general multiagent learning algorithm that converges in self-play and learns a best response against stationary opponents. Mach. Learn. 67(1\u20132), 23\u201343 (2007)","journal-title":"Mach. Learn."},{"key":"11_CR10","doi-asserted-by":"crossref","unstructured":"Dorigo, M., Gambardella, L.: Ant-q: a reinforcement learning approach to the traveling salesman problem. In: Proceedings of ML 1995, Twelfth International Conference on Machine Learning, pp. 252\u2013260 (2016)","DOI":"10.1016\/B978-1-55860-377-6.50039-6"},{"key":"11_CR11","unstructured":"Glorot, X., Bengio, Y.: Understanding the difficulty of training deep feedforward neural networks. In: Aistats, vol. 9, pp. 249\u2013256 (2010)"},{"key":"11_CR12","first-page":"1039","volume":"4","author":"J Hu","year":"2003","unstructured":"Hu, J., Wellman, M.P.: Nash q-learning for general-sum stochastic games. J. Mach. Learn. Res. 4, 1039\u20131069 (2003)","journal-title":"J. Mach. Learn. Res."},{"key":"11_CR13","unstructured":"Kingma, D.P., Ba, J.: Adam: A method for stochastic optimization. CoRR abs\/1412.6980 (2014)"},{"key":"11_CR14","unstructured":"Mnih, V., Badia, A.P., Mirza, M., Graves, A., Lillicrap, T., Harley, T., Silver, D., Kavukcuoglu, K.: Asynchronous methods for deep reinforcement learning. In: International Conference on Machine Learning, pp. 1928\u20131937 (2016)"},{"issue":"7540","key":"11_CR15","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A.A., Veness, J., Bellemare, M.G., Graves, A., Riedmiller, M., Fidjeland, A.K., Ostrovski, G., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"issue":"28","key":"11_CR16","first-page":"307","volume":"2","author":"LS Shapley","year":"1953","unstructured":"Shapley, L.S.: A value for n-person games. Contrib. Theor. Games 2(28), 307\u2013317 (1953)","journal-title":"Contrib. Theor. Games"},{"key":"11_CR17","doi-asserted-by":"crossref","unstructured":"Simoes, D., Lau, N., Reis, L.P.: Multi-agent double deep q-networks. In: Portuguese Conference on Artificial Intelligence. Springer (2017)","DOI":"10.1007\/978-3-319-65340-2_11"},{"key":"11_CR18","doi-asserted-by":"crossref","unstructured":"Zhang, C., Lesser, V.: Multi-agent learning with policy prediction. In: Proceedings of the Twenty-Fourth AAAI Conference on Artificial Intelligence, AAAI 2010, pp. 927\u2013934. AAAI Press (2010)","DOI":"10.1609\/aaai.v24i1.7639"}],"container-title":["Advances in Intelligent Systems and Computing","ROBOT 2017: Third Iberian Robotics Conference"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-70836-2_11","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,30]],"date-time":"2023-08-30T03:11:52Z","timestamp":1693365112000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-70836-2_11"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,12,21]]},"ISBN":["9783319708355","9783319708362"],"references-count":18,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-70836-2_11","relation":{},"ISSN":["2194-5357","2194-5365"],"issn-type":[{"type":"print","value":"2194-5357"},{"type":"electronic","value":"2194-5365"}],"subject":[],"published":{"date-parts":[[2017,12,21]]}}}