{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T18:55:24Z","timestamp":1772823324845,"version":"3.50.1"},"reference-count":34,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2023,1]]},"DOI":"10.1109\/tnnls.2021.3089425","type":"journal-article","created":{"date-parts":[[2021,6,29]],"date-time":"2021-06-29T19:38:21Z","timestamp":1624995501000},"page":"43-51","source":"Crossref","is-referenced-by-count":17,"title":["The Wisdom of the Crowd: Reliable Deep Reinforcement Learning Through Ensembles of Q-Functions"],"prefix":"10.1109","volume":"34","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9125-592X","authenticated-orcid":false,"given":"Daniel L.","family":"Elliott","sequence":"first","affiliation":[{"name":"Lindsay Corporation, Omaha, NE, USA"}]},{"given":"Charles","family":"Anderson","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Colorado State University, Fort Collins, CO, USA"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/bf00058655"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref3","article-title":"Giraffe: Using deep reinforcement learning to play chess","author":"Lai","year":"2015"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref5","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Mnih"},{"key":"ref6","article-title":"Prioritized experience replay","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Schaul"},{"key":"ref7","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Lillicrap"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1038\/075450a0"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.2469\/faj.v43.n3.50"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1287\/mnsc.1050.0459"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1037\/0033-295X.112.2.494"},{"key":"ref12","volume-title":"The Wisdom of Crowds","author":"Surowiecki","year":"2005"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1991.3.1.79"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/34.58871"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.aei.2016.05.001"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/BF00116037"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1006\/jcss.1997.1504"},{"key":"ref18","first-page":"1146","article-title":"A competitive strategy for function approximation in Q-learning","volume-title":"Proc. Int. Joint Conf. Artif. Intell.","author":"Agostini"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA.2010.66"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1162\/089976602753712972"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/s11063-013-9334-5"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCB.2008.920231"},{"key":"ref23","article-title":"Massively parallel methods for deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn. Deep Learn. Workshop","author":"Nair"},{"key":"ref24","first-page":"2613","article-title":"Double Q-learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Hasselt"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.4236\/ica.2016.74012"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1155\/2018\/2129393"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2015.7280824"},{"key":"ref30","article-title":"Adam: A method for stochastic optimization","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Kingma"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1016\/S0893-6080(05)80056-5"},{"key":"ref32","volume-title":"Openai Gym","year":"2016"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3089425"},{"issue":"1","key":"ref34","first-page":"387","article-title":"Deterministic policy gradient algorithms","volume-title":"Proc. 31st Int. Conf. Mach. Learn.","volume":"32","author":"Silver"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/10007736\/09467501.pdf?arnumber=9467501","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,11]],"date-time":"2024-01-11T23:37:57Z","timestamp":1705016277000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9467501\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,1]]},"references-count":34,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2021.3089425","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,1]]}}}