{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T00:51:19Z","timestamp":1740099079551,"version":"3.37.3"},"publisher-location":"Cham","reference-count":24,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319976273"},{"type":"electronic","value":"9783319976280"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-97628-0_17","type":"book-chapter","created":{"date-parts":[[2018,7,25]],"date-time":"2018-07-25T14:24:03Z","timestamp":1532528643000},"page":"201-211","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Deep Reinforcement Learning with Risk-Seeking Exploration"],"prefix":"10.1007","author":[{"given":"Nat","family":"Dilokthanakul","sequence":"first","affiliation":[]},{"given":"Murray","family":"Shanahan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,7,26]]},"reference":[{"key":"17_CR1","first-page":"397","volume":"3","author":"P Auer","year":"2002","unstructured":"Auer, P.: Using confidence bounds for exploitation-exploration trade-offs. J. Mach. Learn. Res. 3, 397\u2013422 (2002)","journal-title":"J. Mach. Learn. Res."},{"key":"17_CR2","unstructured":"Bellemare, M., Srinivasan, S., Ostrovski, G., Schaul, T., Saxton, D., Munos, R.: Unifying count-based exploration and intrinsic motivation. In: Advances in Neural Information Processing Systems, pp. 1471\u20131479 (2016)"},{"key":"17_CR3","unstructured":"Bellemare, M.G., Dabney, W., Munos, R.: A distributional perspective on reinforcement learning. In: International Conference on Machine Learning, pp. 449\u2013458 (2017)"},{"key":"17_CR4","unstructured":"Bertsekas, D.P.: Dynamic Programming and Optimal Control. Athena Scientific, 2nd edn. (2000)"},{"key":"17_CR5","unstructured":"Blundell, C., Cornebise, J., Kavukcuoglu, K., Wierstra, D.: Weight uncertainty in neural network. In: International Conference on Machine Learning, pp. 1613\u20131622 (2015)"},{"key":"17_CR6","first-page":"213","volume":"3","author":"RI Brafman","year":"2002","unstructured":"Brafman, R.I., Tennenholtz, M.: R-MAX-A general polynomial time algorithm for near-optimal reinforcement learning. J. Mach. Learn. Res. 3, 213\u2013231 (2002)","journal-title":"J. Mach. Learn. Res."},{"key":"17_CR7","doi-asserted-by":"crossref","unstructured":"Dabney, W., Rowland, M., Bellemare, M.G., Munos, R.: Distributional reinforcement learning with quantile regression. arXiv preprint arXiv:1710.10044 (2017)","DOI":"10.1609\/aaai.v32i1.11791"},{"key":"17_CR8","unstructured":"Deisenroth, M., Rasmussen, C.E.: Pilco: A model-based and data-efficient approach to policy search. In: Proceedings of the 28th International Conference on machine learning (ICML 2011), pp. 465\u2013472 (2011)"},{"key":"17_CR9","unstructured":"Engel, Y., Mannor, S., Meir, R.: Bayes meets bellman: The Gaussian process approach to temporal difference learning. In: Proceedings of the 20th International Conference on Machine Learning (ICML 2003), pp. 154\u2013161 (2003)"},{"key":"17_CR10","doi-asserted-by":"crossref","unstructured":"Ghavamzadeh, M., Mannor, S., Pineau, J., Tamar, A., et\u00a0al.: Bayesian reinforcement learning: a survey. Found. Trends Mach. Learn. 8(5-6), 359\u2013483 (2015)","DOI":"10.1561\/2200000049"},{"key":"17_CR11","doi-asserted-by":"crossref","unstructured":"Hessel, M., Modayil, J., Van Hasselt, H., Schaul, T., Ostrovski, G., Dabney, W., Horgan, D., Piot, B., Azar, M., Silver, D.: Rainbow: combining improvements in deep reinforcement learning. arXiv preprint arXiv:1710.02298 (2017)","DOI":"10.1609\/aaai.v32i1.11796"},{"key":"17_CR12","unstructured":"Houthooft, R., Chen, X., Duan, Y., Schulman, J., De Turck, F., Abbeel, P.: Vime: Variational information maximizing exploration. In: Advances in Neural Information Processing Systems, pp. 1109\u20131117 (2016)"},{"key":"17_CR13","unstructured":"Mnih, V., Badia, A.P., Mirza, M., Graves, A., Lillicrap, T., Harley, T., Silver, D., Kavukcuoglu, K.: Asynchronous methods for deep reinforcement learning. In: International Conference on Machine Learning, pp. 1928\u20131937 (2016)"},{"issue":"7540","key":"17_CR14","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A.A., Veness, J., Bellemare, M.G., Graves, A., Riedmiller, M., Fidjeland, A.K., Ostrovski, G., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"key":"17_CR15","unstructured":"Moerland, T.M., Broekens, J., Jonker, C.M.: Efficient exploration with double uncertain value networks. In: Deep Reinforcement Learning Symposium @ Conference on Neural Information Processing Systems (2017)"},{"key":"17_CR16","unstructured":"O\u2019Donoghue, B., Osband, I., Munos, R., Mnih, V.: The uncertainty bellman equation and exploration. arXiv preprint arXiv:1709.05380 (2017)"},{"key":"17_CR17","unstructured":"Pawlowski, N., Rajchl, M., Glocker, B.: Implicit weight uncertainty in neural networks. In: Bayesian Deep Learning Workshop at NIPS 2017 (2017)"},{"key":"17_CR18","doi-asserted-by":"crossref","unstructured":"Schmidhuber, J.: Curious model-building control systems. In: 1991 IEEE International Joint Conference on Neural Networks, pp. 1458\u20131463. IEEE (1991)","DOI":"10.1109\/IJCNN.1991.170605"},{"key":"17_CR19","unstructured":"Stadie, B.C., Levine, S., Abbeel, P.: Incentivizing exploration in reinforcement learning with deep predictive models. arXiv preprint arXiv:1507.00814 (2015)"},{"key":"17_CR20","doi-asserted-by":"crossref","unstructured":"Strehl, A.L., Li, L., Wiewiora, E., Langford, J., Littman, M.L.: PAC model-free reinforcement learning. In: Proceedings of the 23rd International Conference on Machine Learning, pp. 881\u2013888. ACM (2006)","DOI":"10.1145\/1143844.1143955"},{"key":"17_CR21","volume-title":"Reinforcement Learning","author":"R Sutton","year":"1998","unstructured":"Sutton, R., Barto, A.: Reinforcement Learning. MIT Press, Cambridge (1998)"},{"issue":"13","key":"17_CR22","first-page":"1","volume":"17","author":"A Tamar","year":"2016","unstructured":"Tamar, A., Di Castro, D., Mannor, S.: Learning the variance of the reward-to-go. J. Mach. Learn. Res. 17(13), 1\u201336 (2016)","journal-title":"J. Mach. Learn. Res."},{"key":"17_CR23","unstructured":"Tang, H., Houthooft, R., Foote, D., Stooke, A., Chen, O.X., Duan, Y., Schulman, J., DeTurck, F., Abbeel, P.: # exploration: a study of count-based exploration for deep reinforcement learning. In: Advances in Neural Information Processing Systems, pp. 2750\u20132759 (2017)"},{"key":"17_CR24","doi-asserted-by":"crossref","unstructured":"Van Hasselt, H., Guez, A., Silver, D.: Deep reinforcement learning with double q-learning (2016)","DOI":"10.1609\/aaai.v30i1.10295"}],"container-title":["Lecture Notes in Computer Science","From Animals to Animats 15"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-97628-0_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,28]],"date-time":"2022-08-28T05:12:25Z","timestamp":1661663545000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-97628-0_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783319976273","9783319976280"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-97628-0_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]}}}