{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,22]],"date-time":"2024-10-22T16:28:07Z","timestamp":1729614487959,"version":"3.28.0"},"reference-count":86,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2011,4]]},"DOI":"10.1109\/adprl.2011.5967379","type":"proceedings-article","created":{"date-parts":[[2011,8,4]],"date-time":"2011-08-04T01:40:00Z","timestamp":1312422000000},"page":"17-24","source":"Crossref","is-referenced-by-count":3,"title":["On learning with imperfect representations"],"prefix":"10.1109","author":[{"given":"Shivaram","family":"Kalyanakrishnan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peter","family":"Stone","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref73","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"2000","journal-title":"Advances in Neural Information Processing Systems 12"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017936530646"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992699"},{"journal-title":"On-line Q-learning using connectionist systems","year":"1994","author":"rummery","key":"ref70"},{"journal-title":"The CMA evolution strategy A tutorial","year":"2009","author":"hansen","key":"ref76"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-009-9100-2"},{"key":"ref74","first-page":"1531","article-title":"A natural policy gradient","author":"kakade","year":"2001","journal-title":"Advances in Neural Information Processing Systems 14"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-307-3.50031-9"},{"journal-title":"Efficient evolution of neural networks through complex-ification","year":"2004","author":"stanley","key":"ref75"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1287\/mnsc.28.1.1"},{"key":"ref78","first-page":"749","article-title":"An empirical analysis of value function-based and policy search reinforcement learning","author":"kalyanakrishnan","year":"0","journal-title":"Proc AAMAS 2009 IFAAMAS"},{"key":"ref79","first-page":"149","article-title":"Similarities and differences between policy gradient methods and evolution strategies","author":"heidrich-meisner","year":"0","journal-title":"Proc ESANN 2008 D-side Publication"},{"key":"ref33","first-page":"1595","article-title":"A convergent form of approximate policy iteration","author":"perkins","year":"2003","journal-title":"Advances in Neural Information Processing Systems 15"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012901385691"},{"key":"ref31","first-page":"335","article-title":"Anytime point-based approximations for large POMDPs","volume":"27","author":"pineau","year":"2006","journal-title":"J Art Int Res"},{"key":"ref30","first-page":"1023","article-title":"Acting optimally in partially observable stochastic domains","author":"cassandra","year":"0","journal-title":"Proc AAAI 1994 AAAI Press"},{"key":"ref37","doi-asserted-by":"crossref","first-page":"174","DOI":"10.1016\/0022-247X(65)90154-X","article-title":"Optimal control of Markov Processes with incomplete state information","volume":"10","author":"\u00e5str\u00f6m","year":"1965","journal-title":"J Math Analysis Applic"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/BF00058926"},{"key":"ref35","first-page":"877","article-title":"Evolutionary function approximation for reinforcement learning","volume":"7","author":"whiteson","year":"2006","journal-title":"J Mach Learn Res"},{"key":"ref34","first-page":"719","article-title":"Toward off-policy learning control with function approximation","author":"maei","year":"0","journal-title":"Proc ICML 2010 Omnipress"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1007\/BF00114723"},{"key":"ref62","first-page":"369","article-title":"Generalization in reinforcement learning: Safely approximating the value function","volume":"7","author":"boyan","year":"1995","journal-title":"Advances in neural information processing systems"},{"key":"ref61","first-page":"1107","article-title":"Least-squares policy iteration","volume":"4","author":"lagoudakis","year":"2003","journal-title":"J Mach Learn Res"},{"key":"ref63","first-page":"650","article-title":"Batch reinforcement learning in a complex domain","author":"kalyanakrishnan","year":"0","journal-title":"Proc AAMAS 2007 IFAAMAS"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2008.02.003"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1007\/BF00993308"},{"key":"ref27","first-page":"39","article-title":"Self-optimizing memory controllers: A reinforcement learning approach","author":"ipek","year":"0","journal-title":"Proc ISCA 2008 IEEE Press"},{"key":"ref65","article-title":"Tight performance bounds on greedy policies based on imperfect value functions","author":"williams","year":"0","journal-title":"Proc Tenth Yale Workshop on Adaptive and Learning Systems"},{"key":"ref66","first-page":"319","article-title":"Infinite-horizon policy-gradient estimation","volume":"15","author":"baxter","year":"2001","journal-title":"J Art Int Res"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2008.924423"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177704711"},{"key":"ref68","article-title":"Inductive biases in a reinforcement learner","author":"cobb","year":"1992","journal-title":"Navy Center for Applied Research in Artificial Intelligence Washington DC USA Tech Rep AIC-92&#x2013;013"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-58484-6_300"},{"journal-title":"Dynamic Programming","year":"1957","author":"bellman","key":"ref2"},{"journal-title":"Reinforcement Learning An Introduction","year":"1998","author":"sutton","key":"ref1"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2004.1389841"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143929"},{"key":"ref21","first-page":"3003","article-title":"Quadruped robot obstacle negotiation via reinforcement learning","author":"lee","year":"0","journal-title":"Proc ICRA 2006"},{"key":"ref24","first-page":"659","article-title":"Learning to play using low-complexity rule-based policies: Illustrations through Ms. Pac-Man","volume":"30","author":"szita","year":"2007","journal-title":"J Art Int Res"},{"key":"ref23","first-page":"1053","article-title":"Reinforcement learning of local shape in the game of Go","author":"silver","year":"0","journal-title":"in Proc IJCAI 2007"},{"key":"ref26","first-page":"1671","article-title":"Adaptive treatment of epilepsy via batch-mode reinforcement learning","author":"guez","year":"0","journal-title":"Proc AAAI 2008 AAAI Press"},{"key":"ref25","first-page":"259","article-title":"Simulation-based approach to General Game Playing","author":"finnsson","year":"0","journal-title":"Proc AAAI 2008 AAAI Press"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-78671-9_19"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-04180-8_60"},{"key":"ref59","first-page":"528","article-title":"Dyna-style planning with linear function approximation and prioritized sweeping","author":"sutton","year":"0","journal-title":"Proc UAI 2008 AAAI Press"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390240"},{"key":"ref57","first-page":"255","article-title":"Issues in using function approximation for reinforcement learning","author":"thrun","year":"0","journal-title":"Proc 1993 Connectionist Models Summer School Lawrence Erlbaum"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553501"},{"key":"ref55","first-page":"417","article-title":"Off-policy temporal difference learning with function approximation","author":"precup","year":"0","journal-title":"Proc ICML 2001 Morgan Kauffman"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/9.580874"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/BF00993978"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1007\/BF00115009"},{"key":"ref10","first-page":"1114","article-title":"A reinforcement learning approach to job-shop scheduling","author":"zhang","year":"0","journal-title":"Proc IJCAI 1995 Morgan Kauffman"},{"journal-title":"Neuro-Dynamic Programming","year":"1996","author":"bertsekas","key":"ref11"},{"journal-title":"Reinforcement Learning with Selective Perception and Hidden State","year":"1996","author":"mccallum","key":"ref40"},{"key":"ref12","first-page":"3","article-title":"Building controllers for Tetris","volume":"32","author":"thierry","year":"2010","journal-title":"Int Comp Games Assoc J"},{"key":"ref13","first-page":"1017","article-title":"Improving elevator performance using reinforcement learning","volume":"8","author":"crites","year":"1996","journal-title":"Advances in neural information processing systems"},{"key":"ref14","first-page":"1038","article-title":"Generalization in reinforcement learning: Successful examples using sparse coarse coding","volume":"8","author":"sutton","year":"1996","journal-title":"Advances in neural information processing systems"},{"key":"ref15","first-page":"974","article-title":"Reinforcement learning for dynamic channel allocation in cellular telephone systems","volume":"9","author":"singh","year":"1997","journal-title":"Advances in neural information processing systems"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(00)00081-3"},{"key":"ref16","first-page":"2084","article-title":"Active guidance for a finless rocket using neuroevolution","author":"gomez","year":"0","journal-title":"Proc GECCO 2003 Springer"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1023\/A:1019956318069"},{"key":"ref17","first-page":"611","article-title":"Machine learning for fast quadrupedal locomotion","author":"kohl","year":"0","journal-title":"Proc AAAI 2004 AAAI Press"},{"key":"ref84","first-page":"968","article-title":"Gradient descent for general reinforcement learning","author":"baird","year":"1999","journal-title":"Advances in Neural Information Processing Systems 11"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2004.1389903"},{"key":"ref83","first-page":"565","article-title":"SATzilla: Portfolio-based algorithm selection for SAT","volume":"32","author":"xu","year":"2008","journal-title":"J Art Int Res"},{"key":"ref19","article-title":"Autonomous helicopter flight via reinforcement learning","volume":"16","author":"ng","year":"2004","journal-title":"Advances in neural information processing systems"},{"key":"ref80","first-page":"937","article-title":"Accelerated neural evolution through cooperatively coevolved synapses","volume":"9","author":"gomez","year":"2008","journal-title":"J Mach Learn Res"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1023\/A:1007678930559"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref6","first-page":"213","article-title":"R-MAX - a general polynomial time algorithm for near-optimal reinforcement learning","volume":"3","author":"brafman","year":"2003","journal-title":"J Mach Learn Res"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017984413808"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1007\/s10479-005-5732-z"},{"key":"ref8","article-title":"Symposium on applications of reinforcement learning: Final report for NSF Grant IIS-9810208","author":"langley","year":"1998","journal-title":"Institute for the Study of Learning and Expertise Tech Rep"},{"key":"ref86","first-page":"227","article-title":"Coordinated reinforcement learning","author":"guestrin","year":"0","journal-title":"Proc ICML 2002 Morgan Kauffman"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143955"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017992615625"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992697"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"journal-title":"Hierarchical control and learning for Markov decision processes","year":"1998","author":"parr","key":"ref45"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017928328829"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1561\/2200000003"},{"key":"ref42","first-page":"194","article-title":"Evolutionary search, stochastic policies with memory, and reinforcement learning with hidden state","author":"glickman","year":"0","journal-title":"Proc ICML 2001 Morgan Kauffman"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143896"},{"key":"ref44","doi-asserted-by":"crossref","first-page":"321","DOI":"10.7551\/mitpress\/3119.003.0050","article-title":"Learning hierarchical control structures for multiple tasks and changing environments","author":"digney","year":"1998","journal-title":"From Animals to Animats 5"},{"key":"ref43","first-page":"227","article-title":"Hierarchical reinforcement learning with the MAXQ value function decomposition","volume":"13","author":"dietterich","year":"2000","journal-title":"J Art Int Res"}],"event":{"name":"2011 Ieee Symposium On Adaptive Dynamic Programming And Reinforcement Learning","start":{"date-parts":[[2011,4,11]]},"location":"Paris, France","end":{"date-parts":[[2011,4,15]]}},"container-title":["2011 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/5958170\/5967347\/05967379.pdf?arnumber=5967379","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,9]],"date-time":"2024-04-09T14:54:59Z","timestamp":1712674499000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/5967379\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011,4]]},"references-count":86,"URL":"https:\/\/doi.org\/10.1109\/adprl.2011.5967379","relation":{},"subject":[],"published":{"date-parts":[[2011,4]]}}}