{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T07:24:24Z","timestamp":1725953064689},"publisher-location":"Cham","reference-count":14,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319768915"},{"type":"electronic","value":"9783319768922"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-76892-2_11","type":"book-chapter","created":{"date-parts":[[2018,2,24]],"date-time":"2018-02-24T02:43:13Z","timestamp":1519440193000},"page":"145-160","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Learning to Play Donkey Kong Using Neural Networks and Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Paul","family":"Ozkohen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jelle","family":"Visser","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Martijn","family":"van Otterlo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marco","family":"Wiering","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,2,25]]},"reference":[{"key":"11_CR1","unstructured":"Atkeson, C.G., Schaal, S.: Robot learning from demonstration. In: Proceedings of the International Conference on Machine Learning, pp. 12\u201320 (1997)"},{"key":"11_CR2","doi-asserted-by":"crossref","unstructured":"Baird, L.: Residual algorithms: reinforcement learning with function approximation. In: Proceedings of the Twelfth International Conference on Machine Learning, pp. 30\u201337 (1995)","DOI":"10.1016\/B978-1-55860-377-6.50013-X"},{"key":"11_CR3","doi-asserted-by":"crossref","unstructured":"Bom, L., Henken, R., Wiering, M.: Reinforcement learning to train Ms. Pac-Man using higher-order action-relative inputs. In: IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (2013)","DOI":"10.1109\/ADPRL.2013.6615002"},{"key":"11_CR4","unstructured":"Donkey Kong fansite wiki. http:\/\/donkeykong.wikia.com\/wiki\/Nintendo . Accessed Sept 2017"},{"key":"11_CR5","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., Riedmiller, M.: Human-level control through deep reinforcement learning. Nature 518, 529\u2013533 (2015)","journal-title":"Nature"},{"key":"11_CR6","doi-asserted-by":"crossref","unstructured":"Shantia, A., Begue, E., Wiering, M.: Connectionist reinforcement learning for intelligent unit micro management in Starcraft. In: The 2011 International Joint Conference on Neural Networks, pp. 1794\u20131801 (2011)","DOI":"10.1109\/IJCNN.2011.6033442"},{"issue":"7587","key":"11_CR7","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., Huang, A., Maddison, C.J., Guez, A., Sifre, L., van den Driessche, G., Schrittwieser, J., Antonoglou, I., Panneershelvam, V., Lanctot, M., Dieleman, S., Grewe, D., Nham, J., Kalchbrenner, N., Sutskever, I., Lillicrap, T., Leach, M., Kavukcuoglu, K., Graepel, T., Hassabis, D.: Mastering the game of Go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"issue":"1","key":"11_CR8","first-page":"9","volume":"3","author":"RS Sutton","year":"1988","unstructured":"Sutton, R.S.: Learning to predict by the methods of temporal differences. Mach. Learn. 3(1), 9\u201344 (1988)","journal-title":"Mach. Learn."},{"key":"11_CR9","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. The MIT Press, Cambridge (1998)"},{"issue":"1","key":"11_CR10","doi-asserted-by":"crossref","first-page":"86","DOI":"10.3389\/neuro.01.014.2008","volume":"2","author":"Y Takahashi","year":"2008","unstructured":"Takahashi, Y., Schoenbaum, G., Niv, Y.: Silencing the critics: understanding the effects of cocaine sensitization on dorsolateral and ventral striatum in the context of an actor\/critic model. Front. Neurosci. 2(1), 86\u201399 (2008)","journal-title":"Front. Neurosci."},{"key":"11_CR11","unstructured":"van Seijen, H., Fatemi, M., Romoff, J., Laroche, R., Barnes, T., Tsang, J.: Hybrid reward architecture for reinforcement learning (2017). https:\/\/arxiv.org\/abs\/1706.04208"},{"key":"11_CR12","unstructured":"Watkins, C.J.: Learning from delayed rewards. Ph.D. Thesis, University of Cambridge, England (1989)"},{"key":"11_CR13","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3","volume-title":"Reinforcement Learning: State of the Art","author":"M Wiering","year":"2012","unstructured":"Wiering, M., van Otterlo, M.: Reinforcement Learning: State of the Art. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-27645-3"},{"key":"11_CR14","doi-asserted-by":"crossref","unstructured":"Wiering, M.A., Van Hasselt, H.: Two novel on-policy reinforcement learning algorithms based on TD( $$\\lambda $$ \u03bb )-methods. In: 2007 IEEE International Symposium on Approximate Dynamic Programming and Reinforcement Learning, pp. 280\u2013287 (2007)","DOI":"10.1109\/ADPRL.2007.368200"}],"container-title":["Communications in Computer and Information Science","Artificial Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-76892-2_11","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,10,11]],"date-time":"2019-10-11T19:40:55Z","timestamp":1570822855000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-76892-2_11"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783319768915","9783319768922"],"references-count":14,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-76892-2_11","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2018]]}}}