{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T02:39:21Z","timestamp":1725935961997},"publisher-location":"Cham","reference-count":14,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319716480"},{"type":"electronic","value":"9783319716497"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-71649-7_14","type":"book-chapter","created":{"date-parts":[[2017,12,21]],"date-time":"2017-12-21T18:46:24Z","timestamp":1513881984000},"page":"165-175","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Exploration Bonuses Based on Upper Confidence Bounds for Sparse Reward Games"],"prefix":"10.1007","author":[{"given":"Naoki","family":"Mizukami","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jun","family":"Suzuki","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hirotaka","family":"Kameko","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yoshimasa","family":"Tsuruoka","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,12,22]]},"reference":[{"key":"14_CR1","unstructured":"Bellemare, M., Srinivasan, S., Ostrovski, G., Schaul, T., Saxton, D., Munos, R.: Unifying count-based exploration and intrinsic motivation. In: Advances in Neural Information Processing Systems, NIPS, vol. 29, pp. 1471\u20131479 (2016)"},{"issue":"7540","key":"14_CR2","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A.A., Veness, J., Bellemare, M.G., Graves, A., Riedmiller, M., Fidjeland, A.K., Ostrovski, G., Petersen, S., Beattie, C., Sadik, A., Antonoglou, I., King, H., Kumaran, D., Wierstra, D., Legg, S., Hassabis, D.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"key":"14_CR3","unstructured":"Tang, H., Houthooft, R., Foote, D., Stooke, A., Chen, X., Duan, Y., Schulman, J., De Turck, F., Abbeel, P.: # exploration: a study of count-based exploration for deep reinforcement learning. arXiv preprint arXiv:1611.04717 (2016)"},{"key":"14_CR4","doi-asserted-by":"crossref","first-page":"1309","DOI":"10.1016\/j.jcss.2007.08.009","volume":"74","author":"AL Strehl","year":"2008","unstructured":"Strehl, A.L., Littman, M.L.: An analysis of model-based interval estimation for Markov decision processes. J. Comput. Syst. Sci. 74, 1309\u20131331 (2008)","journal-title":"J. Comput. Syst. Sci."},{"key":"14_CR5","doi-asserted-by":"crossref","first-page":"4","DOI":"10.1016\/0196-8858(85)90002-8","volume":"6","author":"TL Lai","year":"1985","unstructured":"Lai, T.L., Robbins, H.: Asymptotically efficient adaptive allocation rules. Adv. Appl. Math. 6, 4\u201322 (1985)","journal-title":"Adv. Appl. Math."},{"key":"14_CR6","unstructured":"Mnih, V., Badia, A.P., Mirza, M., Graves, A., Lillicrap, T.P., Harley, T., Silver, D., Kavukcuoglu, K.: Asynchronous methods for deep reinforcement learning. In: Proceedings of the 33rd International Conference on Machine Learning, JMLR (2016)"},{"key":"14_CR7","doi-asserted-by":"crossref","first-page":"253","DOI":"10.1613\/jair.3912","volume":"47","author":"MG Bellemare","year":"2013","unstructured":"Bellemare, M.G., Naddaf, Y., Veness, J., Bowling, M.: The arcade learning environment: an evaluation platform for general agents. J. Artif. Intell. Res. 47, 253\u2013279 (2013)","journal-title":"J. Artif. Intell. Res."},{"key":"14_CR8","doi-asserted-by":"crossref","unstructured":"Andoni, A., Indyk, P.: Near-optimal hashing algorithms for approximate nearest neighbor in high dimensions. In: Proceedings of the 47th Annual IEEE Symposium on Foundations of Computer Science, pp. 459\u2013468. IEEE (2006)","DOI":"10.1109\/FOCS.2006.49"},{"key":"14_CR9","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., Riedmiller, M.: Playing atari with deep reinforcement learning. In: NIPS Deep Learning Workshop, NIPS (2013)"},{"key":"14_CR10","unstructured":"Nair, V., Hinton, G.E.: Rectified linear units improve restricted Boltzmann machines. In: Proceedings of the 27th International Conference on Machine Learning, Omnipress, pp. 807\u2013814 (2010)"},{"key":"14_CR11","doi-asserted-by":"crossref","unstructured":"Van Hasselt, H., Guez, A., Silver, D.: Deep reinforcement learning with double Q-learning. In: Proceedings of the 30th AAAI Conference on Artificial Intelligence, AAAI, pp. 2094\u20132100 (2016)","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"14_CR12","unstructured":"Nair, A., Srinivasan, P., Blackwell, S., Alcicek, C., Fearon, R., De Maria, A., Panneershelvam, V., Suleyman, M., Beattie, C., Petersen, S., Legg, S., Mnih, V., Kavukcuoglu, K., Silver, D.: Massively parallel methods for deep reinforcement learning. In: ICML Deep Learning Workshop (2015)"},{"key":"14_CR13","unstructured":"Osband, I., Blundell, C., Pritzel, A., Van Roy, B.: Deep exploration via bootstrapped DQN. In: Advances in Neural Information Processing Systems, NIPS 29, pp. 4026\u20134034 (2016)"},{"key":"14_CR14","unstructured":"Wang, Z., Schaul, T., Hessel, M., van Hasselt, H., Lanctot, M., de Freitas, N.: Dueling network architectures for deep reinforcement learning. In: Proceedings of the 33rd International Conference on Machine Learning, JLMR, pp. 1995\u20132003 (2016)"}],"container-title":["Lecture Notes in Computer Science","Advances in Computer Games"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-71649-7_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,11]],"date-time":"2022-08-11T00:29:12Z","timestamp":1660177752000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-71649-7_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319716480","9783319716497"],"references-count":14,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-71649-7_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017]]}}}