{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2022,3,30]],"date-time":"2022-03-30T21:00:55Z","timestamp":1648674055304},"reference-count":50,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2015,12,1]],"date-time":"2015-12-01T00:00:00Z","timestamp":1448928000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"funder":[{"name":"Research Establishment","award":["204636\/3255"],"award-info":[{"award-number":["204636\/3255"]}]},{"name":"University Research Fund from Victoria University of Wellington","award":["206971\/3468"],"award-info":[{"award-number":["206971\/3468"]}]},{"name":"Marsden Fund of New Zealand Government","award":["VUW1209"],"award-info":[{"award-number":["VUW1209"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Evol. Computat."],"published-print":{"date-parts":[[2015,12]]},"DOI":"10.1109\/tevc.2015.2415464","type":"journal-article","created":{"date-parts":[[2015,3,23]],"date-time":"2015-03-23T18:41:59Z","timestamp":1427136119000},"page":"885-902","source":"Crossref","is-referenced-by-count":6,"title":["Using Learning Classifier Systems to Learn Stochastic Decision Policies"],"prefix":"10.1109","volume":"19","author":[{"given":"Gang","family":"Chen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Colin I. J.","family":"Douch","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mengjie","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/11903697_7"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/1569901.1570075"},{"key":"ref33","first-page":"36","article-title":"Evolving neural networks for classifier prediction with XCSF","author":"loiacono","year":"2006","journal-title":"Proc Workshop Evol Comput (ECAI)"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1162\/106365600568239"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CEC.2005.1554945"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/1276958.1277322"},{"key":"ref37","first-page":"1","article-title":"Reinforcement learning for humanoid robotics","author":"peters","year":"2003","journal-title":"Proc 3rd IEEE-RAS Int Conf Human Robots"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2007.11.026"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2006.282564"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.4249\/scholarpedia.3698"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/s12065-007-0003-3"},{"key":"ref27","first-page":"643","article-title":"An analysis of the memory mechanism of XCSM","author":"lanzi","year":"1998","journal-title":"Proc 3rd Conf Genetic Program"},{"key":"ref29","first-page":"353","article-title":"An extension to the XCS classifier system for stochastic environments","author":"lanzi","year":"1999","journal-title":"Proc Genet Evol Comput Conf"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/978-3-540-88138-4_1","article-title":"Learning classifier systems: Looking back and glimpsing ahead","author":"bacardit","year":"2008","journal-title":"Learning Classifier Systems"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1162\/089976698300017746"},{"key":"ref20","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1561\/2300000021","article-title":"A survey on policy search for robotics","volume":"2","author":"deisenroth","year":"2013","journal-title":"Foundations and Trends in Robotics"},{"key":"ref22","author":"holland","year":"1975","journal-title":"Adaptation in Natural and Artificial Systems"},{"key":"ref21","first-page":"1455","article-title":"Accuracy based fuzzy Q-learning for robot behaviours","volume":"3","author":"gu","year":"2004","journal-title":"Proc IEEE Int Conf Fuzzy Syst"},{"key":"ref24","first-page":"1523","article-title":"Learning attractor landscapes for learning motor primitives","author":"ijspeert","year":"2003","journal-title":"Proc Adv Neural Inf Process Syst 15 (NIPS)"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1080\/0907676X.2001.9961411"},{"key":"ref26","first-page":"2619","article-title":"Policy gradient reinforcement learning for fast quadrupedal locomotion","author":"kohl","year":"2003","journal-title":"Proc IEEE Int Conf Robot Autom (ICRA)"},{"key":"ref25","first-page":"1531","article-title":"A natural policy gradient","author":"kakade","year":"2001","journal-title":"Proc Adv Neural Inf Process Syst 14 (NIPS)"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1162\/evco.1995.3.2.149"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2005.850265"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2003.818194"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/2001576.2001740"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2007.903551"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/2001576.2001744"},{"key":"ref14","doi-asserted-by":"crossref","first-page":"253","DOI":"10.1007\/3-540-44640-0_15","article-title":"An algorithmic description of XCS","author":"butz","year":"2001","journal-title":"Advances in Learning Classifier Systems"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-48774-3_40"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TFUZZ.2007.900904"},{"key":"ref17","first-page":"44","article-title":"Reinforcement learning by an accuracy-based fuzzy classifier system with real-valued output","author":"casillas","year":"2008","journal-title":"Proceedings of I Workshop on Genetic Fuzzy Systems"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-004-4344-3"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-12643-2_37"},{"key":"ref4","article-title":"Policy gradient vs. value function approximation: A reinforcement learning shootout","author":"beitelspacher","year":"2006"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-39925-4_2"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2009.07.008"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"115","DOI":"10.1007\/3-540-48104-4_8","article-title":"A comparative study of two learning classifier systems on data mining","author":"bernad\u00f3","year":"2002","journal-title":"Advances in Learning Classifier Systems"},{"key":"ref8","first-page":"905","article-title":"Accuracy-based neuro and neuro-fuzzy classifier systems","author":"bull","year":"2002","journal-title":"Proc Genet Evol Comput Conf (GECCO)"},{"key":"ref7","article-title":"Towards machine learning control of chemical computers","author":"budd","year":"2006","journal-title":"From Utopian to Genuine Unconventional Computers"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/1068009.1068320"},{"key":"ref46","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"2000","journal-title":"Proc Adv Neural Inf Process Syst 12 (NIPS)"},{"key":"ref45","author":"sutton","year":"1998","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref48","first-page":"195","article-title":"The application of Michigan-style learning classifier systems to address genetic heterogeneity and epistasisin association studies","author":"urbanowicz","year":"2010","journal-title":"Proc 12th Annu Conf Genet Evol Comput"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1155\/2009\/736398"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-04617-9_25"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-335-6.50042-8"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CEC.2005.1554954"},{"key":"ref43","article-title":"Learning classifier systems for multi-objective robot control","author":"studley","year":"2006"}],"container-title":["IEEE Transactions on Evolutionary Computation"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/4235\/7337505\/07065281.pdf?arnumber=7065281","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T16:46:47Z","timestamp":1642006007000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7065281\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,12]]},"references-count":50,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/tevc.2015.2415464","relation":{},"ISSN":["1089-778X","1089-778X","1941-0026"],"issn-type":[{"value":"1089-778X","type":"print"},{"value":"1089-778X","type":"print"},{"value":"1941-0026","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,12]]}}}