{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,15]],"date-time":"2026-04-15T18:17:48Z","timestamp":1776277068510,"version":"3.50.1"},"reference-count":23,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013,4]]},"DOI":"10.1109\/adprl.2013.6614994","type":"proceedings-article","created":{"date-parts":[[2014,9,10]],"date-time":"2014-09-10T15:29:28Z","timestamp":1410362968000},"page":"93-99","source":"Crossref","is-referenced-by-count":18,"title":["Bias-corrected Q-learning to control max-operator bias in Q-learning"],"prefix":"10.1109","author":[{"given":"Donghun","family":"Lee","sequence":"first","affiliation":[]},{"given":"Boris","family":"Defourny","sequence":"additional","affiliation":[]},{"given":"Warren B.","family":"Powell","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"19","author":"lee","year":"2012","journal-title":"Bias-corrected Q-learning and Its Application in Electricity Storage Control"},{"key":"22","doi-asserted-by":"crossref","first-page":"106","DOI":"10.1145\/2093548.2093574","article-title":"The grand challenge of computer go: Monte carlo tree search and extensions","volume":"55","author":"gelly","year":"2012","journal-title":"Communications of the ACM"},{"key":"17","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017928328829"},{"key":"23","article-title":"Speedy qlearning","author":"azar","year":"2011","journal-title":"Advances in neural information processing systems"},{"key":"18","article-title":"Double Q-learning","volume":"23","author":"van hasselt","year":"2010","journal-title":"Proc Neural Info Process Syst"},{"key":"15","first-page":"1","article-title":"Learning rates for Q-learning","volume":"5","author":"even-dar","year":"2003","journal-title":"J Machine Learning Research"},{"key":"16","article-title":"An intelligent battery controller using biascorrected Q-learning","author":"lee","year":"2012","journal-title":"Proc 22nd AAAI Conf Artificial Intelligence"},{"key":"13","first-page":"1064","article-title":"The asymptotic convergence rate of Q-learning","volume":"10","author":"szepesvari","year":"1997","journal-title":"Neural Information Processing Systems"},{"key":"14","first-page":"996","article-title":"Finite-sample convergence rates for Q-learning and indirect algorithms","volume":"12","author":"kearns","year":"1999","journal-title":"Neural Information Processing Systems"},{"key":"11","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1994.6.6.1185"},{"key":"12","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012997331639"},{"key":"21","first-page":"282","article-title":"Bandit based monte-carlo planning","author":"kocsis","year":"2006","journal-title":"Proc 10th European Conf on Machine Learning"},{"key":"3","doi-asserted-by":"crossref","first-page":"496","DOI":"10.1109\/WCICA.2002.1022159","article-title":"An application in robocup combining q-learning with adversarial planning","volume":"1","author":"yao","year":"2002","journal-title":"Proc 4th world congress on intelligent control and automation"},{"key":"20","doi-asserted-by":"publisher","DOI":"10.1023\/A:1013689704352"},{"key":"2","doi-asserted-by":"publisher","DOI":"10.1016\/S0921-8890(01)00114-2"},{"key":"1","author":"watkins","year":"1989","journal-title":"Learning from delayed rewards"},{"key":"10","doi-asserted-by":"publisher","DOI":"10.1007\/BF00993306"},{"key":"7","article-title":"Learning to fight","author":"graepel","year":"2004","journal-title":"Microsoft Research"},{"key":"6","doi-asserted-by":"crossref","first-page":"48","DOI":"10.1109\/ICMLC.2004.1380601","article-title":"Mobile robot navigation using neural Q-learning","volume":"1","author":"yang","year":"2004","journal-title":"Proc 2004 Int Conf Machine Learning and Cybernetics"},{"key":"5","doi-asserted-by":"crossref","first-page":"289","DOI":"10.1023\/A:1015504423309","article-title":"Pricing in agent economies using multiagent Q-learning","volume":"5","author":"tesauro","year":"2002","journal-title":"Autonomous Agents and Multi-Agent Systems"},{"key":"4","doi-asserted-by":"publisher","DOI":"10.3115\/1289189.1289246"},{"key":"9","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2007.893228"},{"key":"8","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2004.08.018"}],"event":{"name":"2013 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)","location":"Singapore, Singapore","start":{"date-parts":[[2013,4,16]]},"end":{"date-parts":[[2013,4,19]]}},"container-title":["2013 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6596003\/6614979\/06614994.pdf?arnumber=6614994","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,22]],"date-time":"2017-06-22T16:14:05Z","timestamp":1498148045000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6614994\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,4]]},"references-count":23,"URL":"https:\/\/doi.org\/10.1109\/adprl.2013.6614994","relation":{},"subject":[],"published":{"date-parts":[[2013,4]]}}}