{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,22]],"date-time":"2024-10-22T17:26:50Z","timestamp":1729618010961,"version":"3.28.0"},"reference-count":19,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,7]]},"DOI":"10.1109\/ijcnn48605.2020.9206677","type":"proceedings-article","created":{"date-parts":[[2020,9,29]],"date-time":"2020-09-29T20:40:33Z","timestamp":1601412033000},"page":"1-8","source":"Crossref","is-referenced-by-count":0,"title":["The Deep Quality-Value Family of Deep Reinforcement Learning Algorithms"],"prefix":"10.1109","author":[{"given":"Matthia","family":"Sabatelli","sequence":"first","affiliation":[]},{"given":"Gilles","family":"Louppe","sequence":"additional","affiliation":[]},{"given":"Pierre","family":"Geurts","sequence":"additional","affiliation":[]},{"given":"Marco A.","family":"Wiering","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"article-title":"Towards characterizing divergence in deep Q-learning","year":"2019","author":"achiam","key":"ref10"},{"article-title":"Deep reinforcement learning and the deadly triad","year":"2018","author":"van hasselt","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/BF00115009"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3912"},{"key":"ref14","first-page":"17","article-title":"QV (lambda)-learning: A new on-policy reinforcement learning algorithm","author":"wiering","year":"2005","journal-title":"Proceedings of the 7th European Workshop on Reinforcement Learning"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2009.4927532"},{"article-title":"Noisy networks for exploration","year":"2017","author":"fortunato","key":"ref16"},{"key":"ref17","first-page":"1995","article-title":"Dueling network architectures for deep reinforcement learning","author":"wang","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref18","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.11796","article-title":"Rainbow: Combining improvements in deep reinforcement learning","author":"hessel","year":"2018","journal-title":"Thirty-Second AAAI Conference on Artificial Intelligence"},{"key":"ref19","first-page":"1075","article-title":"Analysis of temporal-diffference learning with function approximation","author":"tsitsiklis","year":"1997","journal-title":"Advances in neural information processing systems"},{"key":"ref4","first-page":"2613","article-title":"Double Q-learning","author":"van hasselt","year":"2010","journal-title":"Advances in neural information processing systems"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1007\/BF00992698","article-title":"Q-learning","volume":"8","author":"watkins","year":"1992","journal-title":"Machine Learning"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref5","volume":"37","author":"rummery","year":"1994","journal-title":"On-line Q-learning using connectionist systems"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/SSCI.2016.7849837"},{"key":"ref7","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v30i1.10295","article-title":"Deep reinforcement learning with double Q-learning","author":"van hasselt","year":"2016","journal-title":"THIRTIETH AAAI Conference on Artificial Intelligence"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"436","DOI":"10.1038\/nature14539","article-title":"Deep learning","volume":"521","author":"lecun","year":"2015","journal-title":"Nature"},{"journal-title":"Reinforcement Learning An Introduction","year":"2018","author":"sutton","key":"ref1"},{"article-title":"Deep reinforcement learning: An overview","year":"2017","author":"li","key":"ref9"}],"event":{"name":"2020 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2020,7,19]]},"location":"Glasgow, United Kingdom","end":{"date-parts":[[2020,7,24]]}},"container-title":["2020 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9200848\/9206590\/09206677.pdf?arnumber=9206677","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,21]],"date-time":"2022-11-21T01:20:48Z","timestamp":1668993648000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9206677\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,7]]},"references-count":19,"URL":"https:\/\/doi.org\/10.1109\/ijcnn48605.2020.9206677","relation":{},"subject":[],"published":{"date-parts":[[2020,7]]}}}