{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T16:12:21Z","timestamp":1743005541081,"version":"3.40.3"},"publisher-location":"Cham","reference-count":31,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031579622"},{"type":"electronic","value":"9783031579639"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-57963-9_3","type":"book-chapter","created":{"date-parts":[[2024,4,23]],"date-time":"2024-04-23T07:02:12Z","timestamp":1713855732000},"page":"28-43","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Bag of\u00a0Policies for\u00a0Distributional Deep Exploration"],"prefix":"10.1007","author":[{"given":"Asen","family":"Nachkov","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6679-5948","authenticated-orcid":false,"given":"Luchen","family":"Li","sequence":"additional","affiliation":[]},{"given":"Giulia","family":"Luise","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9824-9378","authenticated-orcid":false,"given":"Filippo","family":"Valdettaro","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0813-7207","authenticated-orcid":false,"given":"A. Aldo","family":"Faisal","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,4,24]]},"reference":[{"key":"3_CR1","unstructured":"Barth-Maron, G., et al.: Distributed distributional deterministic policy gradients. In: Proceedings of the 6th International Conference on Learning Representations (ICLR) (2018)"},{"key":"3_CR2","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1613\/jair.3912","volume":"47","author":"MG Bellemare","year":"2013","unstructured":"Bellemare, M.G., Naddaf, Y., Veness, J., Bowling, M.: The arcade learning environment: an evaluation platform for general agents. J. Artif. Intell. Res. 47, 253\u2013279 (2013)","journal-title":"J. Artif. Intell. Res."},{"key":"3_CR3","unstructured":"Bellemare, M.G., Dabney, W., Munos, R.: A distributional perspective on reinforcement learning. In: Proceedings of the 34th International Conference on Machine Learning, vol. 70, pp. 449\u2013458 (2017)"},{"key":"3_CR4","unstructured":"Chen, R.Y., Sidor, S., Abbeel, P., Schulman, J.: UCB exploration via q-ensembles (2017)"},{"key":"3_CR5","doi-asserted-by":"crossref","unstructured":"Choi, Y., Lee, K., Oh, S.: Distributional deep reinforcement learning with a mixture of Gaussians. In: 2019 International Conference on Robotics and Automation (ICRA), pp. 9791\u20139797 (2019)","DOI":"10.1109\/ICRA.2019.8793505"},{"key":"3_CR6","unstructured":"Dabney, W., Ostrovski, G., Silver, D., Munos, R.: Implicit quantile networks for distributional reinforcement learning. In: Proceedings of the 35th International Conference on Machine Learning, vol. 80, pp. 1096\u20131105 (2018a)"},{"key":"3_CR7","doi-asserted-by":"crossref","unstructured":"Dabney, W., Rowland, M., Bellemare, M.G., Munos, R.: Distributional reinforcement learning with quantile regression. In: Proceedings of the AAAI Conference on Artificial Intelligence (2018b)","DOI":"10.1609\/aaai.v32i1.11791"},{"key":"3_CR8","unstructured":"Doan, T., Mazoure, B., Lyle, C.: GAN q-learning (2018)"},{"key":"3_CR9","unstructured":"Donahue, J., Kr\u00e4henb\u00fchl, P., Darrell, T.: Adversarial feature learning. In: Proceedings of the 5th International Conference on Learning Representations (ICLR) (2017)"},{"key":"3_CR10","doi-asserted-by":"publisher","DOI":"10.1201\/9780429246593","volume-title":"An Introduction to the Bootstrap","author":"B Efron","year":"1994","unstructured":"Efron, B., Tibshirani, R.J.: An Introduction to the Bootstrap. CRC Press, Boca Raton (1994)"},{"key":"3_CR11","unstructured":"Espeholt, L., et al.:. IMPALA: scalable distributed deep-RL with importance weighted actor-learner architectures. In: Proceedings of the 35th International Conference on Machine Learning, vol. 80, pp. 1407\u20131416, Stockholmsm\u00e4ssan, Stockholm (2018)"},{"key":"3_CR12","unstructured":"Freirich, D., Shimkin, T., Meir, R., Tamar, A.: Distributional multivariate policy evaluation and exploration with the bellman GAN. In: Proceedings of the 36th International Conference on Machine Learning (ICML), Long Beach, CA, USA, vol. 97, pp. 1983\u20131992 (2019)"},{"key":"3_CR13","unstructured":"Kuznetsov, A., Shvechikov, P., Grishin, A., Vetrov, D.: Controlling overestimation bias with truncated mixture of continuous distributional quantile critics. In: Proceedings of the 37th International Conference on Machine Learning (2020)"},{"key":"3_CR14","doi-asserted-by":"crossref","unstructured":"Li, L., Faisal, A.: Bayesian distributional policy gradients. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 35, no. 1, pp. 8429\u20138437 (2021)","DOI":"10.1609\/aaai.v35i10.17024"},{"key":"3_CR15","unstructured":"Liang, J., Makoviychuk, V., Handa, A., Chentanez, N., Macklin, M., Fox, D.: GPU-accelerated robotic simulation for distributed reinforcement learning. In: Conference on Robot Learning, pp. 270\u2013282. PMLR (2018)"},{"key":"3_CR16","doi-asserted-by":"crossref","unstructured":"Lyle, C., Bellemare, M.G., Castro, P.S.: A comparative analysis of expected and distributional reinforcement learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, pp. 4504\u20134511 (2019)","DOI":"10.1609\/aaai.v33i01.33014504"},{"key":"3_CR17","unstructured":"Martin, J., Lyskawinski, M., Li, X., Englot, B.: Stochastically dominant distributional reinforcement learning. In: Proceedings of the 37th International Conference on Machine Learning (2020)"},{"key":"3_CR18","unstructured":"Mavrin, B., et\u00a0al.: Distributional reinforcement learning for efficient exploration. In: Proceedings of the 36th International Conference on Machine Learning, vol. 97, pp. 4424\u20134434 (2019)"},{"key":"3_CR19","unstructured":"Mnih, V., et\u00a0al.: Asynchronous methods for deep reinforcement learning. In: Proceedings of The 33rd International Conference on Machine Learning, vol. 48, pp. 1928\u20131937 (2016)"},{"key":"3_CR20","unstructured":"O\u2019Donoghue, B., Osband, I., Munos, R., Mnih, V.: The uncertainty Bellman equation and exploration. In: Proceedings of the 35th International Conference on Machine Learning, vol. 80, pp. 3839\u20133848. Stockholmsm\u00e4ssan, Stockholm (2018)"},{"key":"3_CR21","first-page":"4026","volume":"29","author":"I Osband","year":"2016","unstructured":"Osband, I., Blundell, C., Pritzel, A., Van Roy, B.: Deep exploration via Bootstrapped DQN. Adv. Neural. Inf. Process. Syst. 29, 4026\u20134034 (2016)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"3_CR22","first-page":"1","volume":"20","author":"I Osband","year":"2019","unstructured":"Osband, I., Van Roy, B., Russo, D.J., Wen, Z.: Deep exploration via randomized value functions. J. Mach. Learn. Res. 20, 1\u201362 (2019)","journal-title":"J. Mach. Learn. Res."},{"key":"3_CR23","doi-asserted-by":"publisher","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"ML Puterman","year":"1994","unstructured":"Puterman, M.L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. Wiley, Hoboken (1994)"},{"key":"3_CR24","unstructured":"Schulman, J., Moritz, P., Levine, S., Jordan, M., Abbeel, P.: High-dimensional continuous control using generalized advantage estimation. In: Proceedings of the 4th International Conference on Learning Representations (ICLR) (2016)"},{"key":"3_CR25","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)"},{"key":"3_CR26","unstructured":"Singh, R., Lee, K., Chen, Y.: Sample-based distributional policy gradient (2020)"},{"key":"3_CR27","first-page":"1057","volume":"12","author":"RS Sutton","year":"1999","unstructured":"Sutton, R.S.: Policy gradient methods for reinforcement learning with function approximation. Adv. Neural. Inf. Process. Syst. 12, 1057\u20131063 (1999)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"3_CR28","doi-asserted-by":"crossref","unstructured":"Tang, Y., Agrawal, S.: Exploration by distributional reinforcement learning. In: Proceedings of the 27th International Joint Conference on Artificial Intelligence, pp. 2710\u20132716 (2020)","DOI":"10.24963\/ijcai.2018\/376"},{"issue":"2","key":"3_CR29","doi-asserted-by":"publisher","first-page":"450","DOI":"10.2307\/2371219","volume":"57","author":"WR Thompson","year":"1935","unstructured":"Thompson, W.R.: On the theory of apportionment. Am. J. Math. 57(2), 450\u2013456 (1935)","journal-title":"Am. J. Math."},{"issue":"4","key":"3_CR30","doi-asserted-by":"publisher","first-page":"930","DOI":"10.1109\/TSMCB.2008.920231","volume":"38","author":"MA Wiering","year":"2008","unstructured":"Wiering, M.A., van Hasselt, H.P.: Ensemble algorithms in reinforcement learning. IEEE Trans. Syst. Man Cybern. Part B 38(4), 930\u2013936 (2008)","journal-title":"IEEE Trans. Syst. Man Cybern. Part B"},{"key":"3_CR31","doi-asserted-by":"publisher","first-page":"604","DOI":"10.1109\/TCYB.2019.2939174","volume":"51","author":"Z Zhang","year":"2019","unstructured":"Zhang, Z., Chen, J., Chen, Z., Li, W.: Asynchronous episodic deep deterministic policy gradient: toward continuous control in computationally complex environments. IEEE Trans. Cybern. 51, 604\u2013613 (2019)","journal-title":"IEEE Trans. Cybern."}],"container-title":["Lecture Notes in Computer Science","Epistemic Uncertainty in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-57963-9_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,16]],"date-time":"2024-11-16T21:28:17Z","timestamp":1731792497000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-57963-9_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031579622","9783031579639"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-57963-9_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"24 April 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Epi UAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on Epistemic Uncertainty in Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Pittsburgh, PA","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 August 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 August 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"epiuai2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/sites.google.com\/view\/epi-workshop-uai-2023\/home","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}