{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T00:49:12Z","timestamp":1740098952260,"version":"3.37.3"},"publisher-location":"Cham","reference-count":15,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319694559"},{"type":"electronic","value":"9783319694566"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-69456-6_18","type":"book-chapter","created":{"date-parts":[[2017,10,18]],"date-time":"2017-10-18T10:04:34Z","timestamp":1508321074000},"page":"213-222","source":"Crossref","is-referenced-by-count":0,"title":["What Does a Policy Network Learn After Mastering a Pong Game?"],"prefix":"10.1007","author":[{"given":"Somnuk","family":"Phon-Amnuaisuk","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,10,19]]},"reference":[{"key":"18_CR1","doi-asserted-by":"crossref","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction, 2nd edn. MIT Press, Cambridge (2017)","DOI":"10.1007\/978-1-4899-7687-1_720"},{"issue":"2","key":"18_CR2","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1162\/neco.1994.6.2.215","volume":"6","author":"G Tesauro","year":"1994","unstructured":"Tesauro, G.: TD-Gammon, a self-teaching backgammon program, achieves master-level play. Neural Comput. 6(2), 215\u2013219 (1994)","journal-title":"Neural Comput."},{"key":"18_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"133","DOI":"10.1007\/978-3-642-20525-5_14","volume-title":"Applications of Evolutionary Computation","author":"S Phon-Amnuaisuk","year":"2011","unstructured":"Phon-Amnuaisuk, S.: Learning chasing behaviours of non-player characters in games using SARSA. In: Chio, C., et al. (eds.) EvoApplications 2011. LNCS, vol. 6624, pp. 133\u2013142. Springer, Heidelberg (2011). doi: 10.1007\/978-3-642-20525-5_14"},{"issue":"7587","key":"18_CR4","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., Huang, A., Maddison, C.J., Guez, A., Sifre, L., van den Driessche, G., Schrittweiser, J., Antonofglou, I., Panneershelvam, V., Lanctot, M., et al.: Mastering the game of Go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"key":"18_CR5","doi-asserted-by":"crossref","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun, Y., Bengio, Y., Hinton, G.: Deep learning. Nature 521, 436\u2013444 (2015)","journal-title":"Nature"},{"key":"18_CR6","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A.A., Veness, J., Bellemare, M.G., Graves, A., Riedmiller, M., Fidjeland, A.K., Ostrovski, G., Petersen, S., Beattie, C., Sadik, A., Antonoglou, I., King, H., Kumaran, D., Wierstra, D., Legg, S., Hassabis, D.: Human-level control through deep reinforcement learning. Nature 518, 529\u2013533 (2015)","journal-title":"Nature"},{"key":"18_CR7","doi-asserted-by":"crossref","first-page":"253","DOI":"10.1613\/jair.3912","volume":"47","author":"MG Bellemare","year":"2013","unstructured":"Bellemare, M.G., Naddaf, Y., Veness, J., Bowling, M.: The arcade learning environment: an evaluation platform for general agents. J. Artif. Intell. Res. 47, 253\u2013279 (2013)","journal-title":"J. Artif. Intell. Res."},{"key":"18_CR8","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. In: Advances in Neural Information Processing Systems, pp. 1097\u20131105 (2012)"},{"key":"18_CR9","unstructured":"Sukhbaatar, S., Szlam, A., Weston, J., Fergus, R.: End-to-end memory networks. In: Advances in Neural Information Processing Systems, pp. 2440\u20132448 (2015)"},{"key":"18_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"818","DOI":"10.1007\/978-3-319-10590-1_53","volume-title":"Computer Vision \u2013 ECCV 2014","author":"MD Zeiler","year":"2014","unstructured":"Zeiler, M.D., Fergus, R.: Visualizing and understanding convolutional networks. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8689, pp. 818\u2013833. Springer, Cham (2014). doi: 10.1007\/978-3-319-10590-1_53"},{"issue":"3","key":"18_CR11","first-page":"229","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams, R.J.: Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach. Learn. 8(3), 229\u2013256 (1992)","journal-title":"Mach. Learn."},{"key":"18_CR12","doi-asserted-by":"crossref","unstructured":"Phon-Amnuaisuk, S.: Evolving and discovering Tetris gameplay strategies. In: Proceedings of the 19th Annual Conference on Knowledge-Based and Intelligent Information and Engineering Systems (KES 2015), vol. 60, pp. 458\u2013467 (2015). Procedia Comput. Sci","DOI":"10.1016\/j.procs.2015.08.167"},{"key":"18_CR13","unstructured":"Silver, D., Lever, G., Heess, N., Degris, T., Wierstra, D., Riedmiller, M.: Deterministic policy gradient algorithms. In: Proceedings of the International Conference on Machine Learning (ICML), pp. 387\u2013395 (2014)"},{"key":"18_CR14","unstructured":"Andrychowicz, M., Denil, M., Colmenarejo, S.G., Hoffman, M.W., Pfau, D., Schaul, T., Shillingford, B., de Freitas, N.: Learning to learn by gradient descent by gradient descent. In: Advances in Neural Information Processing Systems, pp. 3981\u20133989 (2016)"},{"key":"18_CR15","doi-asserted-by":"crossref","unstructured":"Williams, R.J.: On the use of backpropagation in associative reinforcement learning. In: Proceedings of the IEEE International Conference on Neural Networks, vol. I, pp. 263\u2013270 (1988)","DOI":"10.1109\/ICNN.1988.23856"}],"container-title":["Lecture Notes in Computer Science","Multi-disciplinary Trends in Artificial Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-69456-6_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,10,4]],"date-time":"2019-10-04T16:58:37Z","timestamp":1570208317000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-69456-6_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319694559","9783319694566"],"references-count":15,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-69456-6_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017]]}}}