{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T23:25:42Z","timestamp":1742945142224,"version":"3.40.3"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319759302"},{"type":"electronic","value":"9783319759319"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-75931-9_4","type":"book-chapter","created":{"date-parts":[[2018,2,14]],"date-time":"2018-02-14T04:27:50Z","timestamp":1518582470000},"page":"46-60","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Deep Reinforcement Learning with Hidden Layers on Future States"],"prefix":"10.1007","author":[{"given":"Hirotaka","family":"Kameko","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jun","family":"Suzuki","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Naoki","family":"Mizukami","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yoshimasa","family":"Tsuruoka","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,2,15]]},"reference":[{"key":"4_CR1","doi-asserted-by":"crossref","first-page":"253","DOI":"10.1613\/jair.3912","volume":"47","author":"MG Bellemare","year":"2013","unstructured":"Bellemare, M.G., Naddaf, Y., Veness, J., Bowling, M.: The arcade learning environment: an evaluation platform for general agents. J. Artif. Intell. Res. 47, 253\u2013279 (2013)","journal-title":"J. Artif. Intell. Res."},{"unstructured":"Bellemare, M., Srinivasan, S., Ostrovski, G., Schaul, T., Saxton, D., Munos, R.: Unifying count-based exploration and intrinsic motivation. In: Lee, D.D., Sugiyama, M., Luxburg, U.V., Guyon, I., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol. 29, pp. 1471\u20131479. Curran Associates, Inc. (2016)","key":"4_CR2"},{"doi-asserted-by":"crossref","unstructured":"Bellemare, M., Veness, J., Bowling, M.: Investigating contingency awareness using Atari 2600 games. In: AAAI Conference on Artificial Intelligence, pp. 864\u2013871 (2012)","key":"4_CR3","DOI":"10.1609\/aaai.v26i1.8321"},{"unstructured":"Guo, X., Singh, S., Lewis, R., Lee, H.: Deep learning for reward design to improve Monte Carlo tree search in Atari games. In: Proceedings of 25th International Joint Conference on Artificial Intelligence, pp. 1519\u20131525 (2016)","key":"4_CR4"},{"doi-asserted-by":"crossref","unstructured":"Hausknecht, M., Stone, P.: Deep recurrent Q-learning for partially observable MDPs. In: 2015 AAAI Fall Symposium Series, pp. 29\u201337 (2015)","key":"4_CR5","DOI":"10.1609\/aimag.v37i2.2661"},{"unstructured":"Jaderberg, M., Mnih, V., Czarnecki, W.M., Schaul, T., Leibo, J.Z., Silver, D., Kavukcuoglu, K.: Reinforcement learning with unsupervised auxiliary tasks. CoRR abs\/1611.05397 (2016). http:\/\/arxiv.org\/abs\/1611.05397","key":"4_CR6"},{"key":"4_CR7","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"282","DOI":"10.1007\/11871842_29","volume-title":"Machine Learning: ECML 2006","author":"L Kocsis","year":"2006","unstructured":"Kocsis, L., Szepesv\u00e1ri, C.: Bandit based Monte-Carlo planning. In: F\u00fcrnkranz, J., Scheffer, T., Spiliopoulou, M. (eds.) ECML 2006. LNCS (LNAI), vol. 4212, pp. 282\u2013293. Springer, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11871842_29"},{"unstructured":"Lin, L.J.: Reinforcement learning for robots using neural networks. Ph.D. thesis, Carnegie Mellon University (1992)","key":"4_CR8"},{"unstructured":"Mnih, V., Badia, A.P., Mirza, M., Graves, A., Lillicrap, T.P., Harley, T., Silver, D., Kavukcuoglu, K.: Asynchronous methods for deep reinforcement learning. In: Proceedings of the 33rd International Conference on Machine Learning (ICML), pp. 1928\u20131937 (2016)","key":"4_CR9"},{"unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., Riedmiller, M.: Playing Atari with deep reinforcement learning. In: NIPS Deep Learning Workshop (2013)","key":"4_CR10"},{"issue":"7540","key":"4_CR11","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A.A., Veness, J., Bellemare, M.G., Graves, A., Riedmiller, M., Fidjeland, A.K., Ostrovski, G., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"unstructured":"Nair, A., Srinivasan, P., Blackwell, S., Alcicek, C., Fearon, R., De Maria, A., Panneershelvam, V., Suleyman, M., Beattie, C., Petersen, S., et al.: Massively parallel methods for deep reinforcement learning. In: ICML Deep Learning Workshop (2015)","key":"4_CR12"},{"unstructured":"Nair, V., Hinton, G.E.: Rectified linear units improve restricted Boltzmann machines. In: Proceedings of the 27th International Conference on Machine Learning (ICML-2010), pp. 807\u2013814 (2010)","key":"4_CR13"},{"unstructured":"Osband, I., Blundell, C., Pritzel, A., Van Roy, B.: Deep exploration via bootstrapped DQN. In: Lee, D.D., Sugiyama, M., Luxburg, U.V., Guyon, I., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol. 29, pp. 4026\u20134034. Curran Associates, Inc. (2016)","key":"4_CR14"},{"unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M., Moritz, P.: Trust region policy optimization. In: Proceedings of the 32nd International Conference on Machine Learning (ICML 2015), pp. 1889\u20131897 (2015)","key":"4_CR15"},{"issue":"7587","key":"4_CR16","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., Huang, A., Maddison, C.J., Guez, A., Sifre, L., van den Driessche, G., Schrittwieser, J., Antonoglou, I., Panneershelvam, V., Lanctot, M., Dieleman, S., Grewe, D., Nham, J., Kalchbrenner, N., Sutskever, I., Lillicrap, T., Leach, M., Kavukcuoglu, K., Graepel, T., Hassabis, D.: Mastering the game of Go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"doi-asserted-by":"crossref","unstructured":"Sutton, R.S.: Integrated architectures for learning, planning, and reacting based on approximating dynamic programming. In: Proceedings of the 7th International Conference on Machine Learning, pp. 216\u2013224 (1990)","key":"4_CR17","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"issue":"4","key":"4_CR18","doi-asserted-by":"publisher","first-page":"160","DOI":"10.1145\/122344.122377","volume":"2","author":"RS Sutton","year":"1991","unstructured":"Sutton, R.S.: Dyna, an integrated architecture for learning, planning, and reacting. SIGART Bull. 2(4), 160\u2013163 (1991)","journal-title":"SIGART Bull."},{"unstructured":"Tang, H., Houthooft, R., Foote, D., Stooke, A., Chen, X., Duan, Y., Schulman, J., De Turck, F., Abbeel, P.: #Exploration: a study of count-based exploration for deep reinforcement learning. In: NIPS Deep Reinforcement Learning Workshop (2016)","key":"4_CR19"},{"unstructured":"Tieleman, T., Hinton, G.: Lecture 6e RMSprop: divide the gradient by a running average of its recent magnitude. Coursera: Neural Networks for Machine Learning (2012)","key":"4_CR20"},{"unstructured":"Tokui, S., Oono, K., Hido, S., Clayton, J.: Chainer: a next-generation open source framework for deep learning. In: Proceedings of Workshop on Machine Learning Systems (LearningSys) in The Twenty-Ninth Annual Conference on Neural Information Processing Systems (NIPS) (2015)","key":"4_CR21"},{"doi-asserted-by":"crossref","unstructured":"Van Hasselt, H., Guez, A., Silver, D.: Deep reinforcement learning with double Q-learning. In: AAAI Conference on Artificial Intelligence, pp. 2094\u20132100 (2016)","key":"4_CR22","DOI":"10.1609\/aaai.v30i1.10295"},{"unstructured":"Wang, Z., de Freitas, N., Lanctot, M.: Dueling network architectures for deep reinforcement learning. CoRR abs\/1511.06581 (2015). http:\/\/arxiv.org\/abs\/1511.06581","key":"4_CR23"},{"unstructured":"Watkins, C.J.C.H.: Learning from delayed rewards. Ph.D. thesis, University of Cambridge, England (1989)","key":"4_CR24"},{"key":"4_CR25","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"818","DOI":"10.1007\/978-3-319-10590-1_53","volume-title":"Computer Vision \u2013 ECCV 2014","author":"MD Zeiler","year":"2014","unstructured":"Zeiler, M.D., Fergus, R.: Visualizing and understanding convolutional networks. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8689, pp. 818\u2013833. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10590-1_53"}],"container-title":["Communications in Computer and Information Science","Computer Games"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-75931-9_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,14]],"date-time":"2022-08-14T09:05:33Z","timestamp":1660467933000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-75931-9_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783319759302","9783319759319"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-75931-9_4","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"15 February 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}