{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T18:44:05Z","timestamp":1777401845591,"version":"3.51.4"},"reference-count":100,"publisher":"Springer Science and Business Media LLC","issue":"4","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Evol. Intel."],"published-print":{"date-parts":[[2011,12]]},"DOI":"10.1007\/s12065-011-0066-z","type":"journal-article","created":{"date-parts":[[2011,10,29]],"date-time":"2011-10-29T10:26:41Z","timestamp":1319884001000},"page":"219-241","source":"Crossref","is-referenced-by-count":24,"title":["Neuroevolutionary reinforcement learning for generalized control of simulated helicopters"],"prefix":"10.1007","volume":"4","author":[{"given":"Rogier","family":"Koppejan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shimon","family":"Whiteson","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2011,10,30]]},"reference":[{"issue":"13","key":"66_CR1","doi-asserted-by":"crossref","first-page":"1608","DOI":"10.1177\/0278364910371999","volume":"29","author":"P Abbeel","year":"2010","unstructured":"Abbeel P, Coates A, Ng A (2010) Autonomous helicopter aerobatics through apprenticeship learning. Int J Robotics Res 29(13):1608\u20131639","journal-title":"Int J Robotics Res"},{"key":"66_CR2","doi-asserted-by":"crossref","unstructured":"Abbeel P, Coates A, Quigley M, Ng AY (2007) An application of reinforcement learning to aerobatic helicopter flight. In: Advances in neural information processing systems 19. MIT Press, Cambridge, pp 1\u20138","DOI":"10.7551\/mitpress\/7503.003.0006"},{"key":"66_CR3","unstructured":"Abbeel P, Ganapathi V, Ng AY (2006) Learning vehicular dynamics with application to modeling helicopters. In: Proceedings of neural information processing systems (NIPS)"},{"key":"66_CR4","doi-asserted-by":"crossref","unstructured":"Abbeel P, Ng A (2004) Apprenticeship learning via inverse reinforcement learning. In: Proceedings of the twenty-first international conference on machine learning","DOI":"10.1145\/1015330.1015430"},{"key":"66_CR5","doi-asserted-by":"crossref","unstructured":"Abbeel P, Ng AY (2005) Exploration and apprenticeship learning in reinforcement learning. In: Proceedings of the twenty-first international conference on machine learning","DOI":"10.1145\/1015330.1015430"},{"key":"66_CR6","doi-asserted-by":"crossref","unstructured":"Bagnell J, Schneider J (2001) Autonomous helicopter control using reinforcement learning policy search methods. In: Proceedings of the IEEE international conference on robotics and automation 2001","DOI":"10.1109\/ROBOT.2001.932842"},{"key":"66_CR7","doi-asserted-by":"crossref","unstructured":"Beielstein T, Markon S (2002) Threshold selection, hypothesis tests and DOE methods. In: 2002 congress on evolutionary computation, pp 777\u2013782","DOI":"10.1109\/CEC.2002.1007024"},{"key":"66_CR8","volume-title":"Dynamic programming","author":"RE Bellman","year":"1957","unstructured":"Bellman RE (1957) Dynamic programming. Princeton University Press, Princeton"},{"key":"66_CR9","first-page":"679","volume":"6","author":"RE Bellman","year":"1957","unstructured":"Bellman RE (1957) A Markov decision process. J Math Mech 6:679\u2013684","journal-title":"J Math Mech"},{"issue":"2","key":"66_CR10","first-page":"213","volume":"3","author":"R Brafman","year":"2003","unstructured":"Brafman R, Tennenholtz M, Schuurmans D (2003) R-max-A general polynomial time algorithm for near-optimal reinforcement learning. J Mach Learn Res 3(2):213\u2013231","journal-title":"J Mach Learn Res"},{"key":"66_CR11","doi-asserted-by":"crossref","unstructured":"Branke J, Schmidt C (2003) Selection in the presence of noise. In: Proceedings of the genetic and evolutionary computation conference (GECCO), pp 766\u2013777","DOI":"10.1007\/3-540-45105-6_91"},{"key":"66_CR12","doi-asserted-by":"crossref","unstructured":"Branke J, Schmidt C (2004) Sequential sampling in noisy environments. In: Proceedings of the international conference on parallel problem solving from nature (PPSN), pp 202\u2013211","DOI":"10.1007\/978-3-540-30217-9_21"},{"key":"66_CR13","doi-asserted-by":"crossref","unstructured":"Butz M, Goldberg D, Lanzi P (2005) Gradient descent methods in learning classifier systems: improving XCS performance in multistep problems. IEEE Trans Evolut Comput 9(5)","DOI":"10.1109\/TEVC.2005.850265"},{"key":"66_CR14","doi-asserted-by":"crossref","unstructured":"Cardamone L, Loiacono D, Lanzi P (2009) On-line neuroevolution applied to the open racing car simulator. In: Proceedings of the congress on evolutionary computation (CEC), pp 2622\u20132629","DOI":"10.1109\/CEC.2009.4983271"},{"issue":"3","key":"66_CR15","doi-asserted-by":"crossref","first-page":"176","DOI":"10.1109\/TCIAIG.2010.2052102","volume":"2","author":"L Cardamone","year":"2010","unstructured":"Cardamone L, Loiacono D, Lanzi PL (2010) Learning to drive in the open racing car simulator using online neuroevolution. Comput Intell AI in Games IEEE Trans 2(3):176\u2013190","journal-title":"Comput Intell AI in Games IEEE Trans"},{"issue":"5","key":"66_CR16","doi-asserted-by":"crossref","first-page":"1239","DOI":"10.1109\/72.788663","volume":"10","author":"S Chen","year":"2002","unstructured":"Chen S, Wu Y, Luk B (2002) Combined genetic algorithm optimization and regularized orthogonal least squares learning for radial basis function networks. Neural Netw IEEE Trans 10(5):1239\u20131243","journal-title":"Neural Netw IEEE Trans"},{"issue":"1","key":"66_CR17","doi-asserted-by":"crossref","first-page":"19","DOI":"10.1007\/s10479-005-5724-z","volume":"134","author":"P De Boer","year":"2005","unstructured":"De Boer P, Kroese D, Mannor S, Rubinstein R (2005) A tutorial on the cross-entropy method. Ann Oper Res 134(1):19\u201367","journal-title":"Ann Oper Res"},{"key":"66_CR18","unstructured":"De Nardi R, Holland O (2006) Ultraswarm: a further step towards a flock of miniature helicopters. In: Proceedings of the SAB workshop on swarm robotics. Springer, Berlin, pp 116\u2013128"},{"key":"66_CR19","unstructured":"De Nardi R, Holland O (2008) Coevolutionary modelling of a miniature rotorcraft. In: IAS-10: intelligent autonomous systems conference, p 364"},{"issue":"3","key":"66_CR20","doi-asserted-by":"crossref","first-page":"396","DOI":"10.1109\/3477.499791","volume":"26","author":"D Floreano","year":"2002","unstructured":"Floreano D, Mondada F (2002) Evolution of homing navigation in a real mobile robot. IEEE Trans Syst Man Cybern B 26(3):396\u2013407","journal-title":"IEEE Trans Syst Man Cybern B"},{"issue":"3","key":"66_CR21","doi-asserted-by":"crossref","first-page":"311","DOI":"10.1023\/A:1012459627968","volume":"11","author":"D Floreano","year":"2001","unstructured":"Floreano D, Urzelai J (2001) Evolution of plastic control networks. Auton Robots 11(3):311\u2013317","journal-title":"Auton Robots"},{"key":"66_CR22","unstructured":"Gauci J, Stanley KO (2008) A case study on the critical role of geometric regularity in machine learning. In: Proceedings of the twenty-third AAAI conference on artificial intelligence"},{"issue":"7","key":"66_CR23","doi-asserted-by":"crossref","first-page":"1860","DOI":"10.1162\/neco.2010.06-09-1042","volume":"22","author":"J Gauci","year":"2010","unstructured":"Gauci J, Stanley KO (2010) Autonomous evolution of topographic regularities in artificial neural networks. Neural Comput 22(7):1860\u20131898","journal-title":"Neural Comput"},{"issue":"1","key":"66_CR24","doi-asserted-by":"crossref","first-page":"81","DOI":"10.1613\/jair.1666","volume":"24","author":"P Geibel","year":"2005","unstructured":"Geibel P, Wysotzki F (2005) Risk-sensitive reinforcement learning applied to control under constraints. J Artif Intell Res 24(1):81\u2013108","journal-title":"J Artif Intell Res"},{"key":"66_CR25","unstructured":"Goldberg DE, Deb K, Clark JH (1991) Genetic algorithms, noise, and the sizing of populations. Complex Syst 6:333\u2013362"},{"issue":"3","key":"66_CR26","first-page":"265","volume":"5","author":"D Goldberg","year":"1991","unstructured":"Goldberg D, Rudnick M (1991) Genetic algorithms and the variance of fitness. Complex Syst 5(3):265\u2013278","journal-title":"Complex Syst"},{"key":"66_CR27","unstructured":"Goldberg DE (1989) Genetic algorithms in search, optimization, and machine learning, 1st edn. Addison-Wesley, Boston"},{"key":"66_CR28","doi-asserted-by":"crossref","unstructured":"Gomez F, Schmidhuber J, Miikkulainen R (2006) Efficient non-linear control through neuroevolution. In: Proceedings of the European conference on machine learning","DOI":"10.1007\/11871842_64"},{"key":"66_CR29","unstructured":"Gruau F, Whitley D, Pyeatt L (1996) A comparison between cellular encoding and direct encoding for genetic neural networks. In: Genetic programming 1996: Proceedings of the first annual conference, pp 81\u201389"},{"issue":"1","key":"66_CR30","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1162\/106365603321828970","volume":"11","author":"N Hansen","year":"2003","unstructured":"Hansen N, M\u00fcller S, Koumoutsakos P (2003) Reducing the time complexity of the derandomized evolution strategy with covariance matrix adaptation (CMA-ES). Evolut Comput 11(1):1\u201318","journal-title":"Evolut Comput"},{"issue":"3","key":"66_CR31","doi-asserted-by":"crossref","first-page":"231","DOI":"10.1162\/evco.1999.7.3.231","volume":"7","author":"G Harik","year":"1999","unstructured":"Harik G, Cant\u00fa-Paz E, Goldberg D, Miller B (1999) The gambler\u2019s ruin problem, genetic algorithms, and the sizing of populations. Evolut Comput 7(3):231\u2013253","journal-title":"Evolut Comput"},{"key":"66_CR32","doi-asserted-by":"crossref","unstructured":"Heidrich-Meisner V, Igel C (2009) Hoeffding and Bernstein races for selecting policies in evolutionary direct policy search. In: Proceedings of the 26th annual international conference on machinelearning, ACM, pp 401\u2013408","DOI":"10.1145\/1553374.1553426"},{"key":"66_CR33","doi-asserted-by":"crossref","unstructured":"Hernandez-Diaz A, Coello C, Perez F, Caballero R, Molina J, Santana-Quintero L (2008) Seeding the initial population of a multi-objective evolutionary algorithm using gradient-based information. In: evolutionary computation, 2008. CEC 2008. (IEEE world congress on computational intelligence). IEEE congress on, pp 1617\u20131624","DOI":"10.1109\/CEC.2008.4631008"},{"key":"66_CR34","doi-asserted-by":"crossref","unstructured":"Hoffmann G, Huang H, Waslander S, Tomlin C (2007) Quadrotor helicopter flight dynamics and control: theory and experiment. In: Proceedings of the AIAA guidance, navigation, and control conference, pp 1\u201320","DOI":"10.2514\/6.2007-6461"},{"issue":"3","key":"66_CR35","doi-asserted-by":"crossref","first-page":"353","DOI":"10.1162\/artl.2006.12.3.353","volume":"12","author":"J Hurst","year":"2006","unstructured":"Hurst J, Bull L (2006) A neural learning classifier system with self-adaptive constructivism for mobile robot control. Artif Life 12(3):353\u2013380","journal-title":"Artif Life"},{"issue":"1","key":"66_CR36","first-page":"3","volume":"9","author":"Y Jin","year":"2005","unstructured":"Jin Y (2005) A comprehensive survey of fitness approximation in evolutionary computation. Soft Comput Fusion Found Methodol Appl 9(1):3\u201312","journal-title":"Soft Comput Fusion Found Methodol Appl"},{"issue":"5","key":"66_CR37","doi-asserted-by":"crossref","first-page":"481","DOI":"10.1109\/TEVC.2002.800884","volume":"6","author":"Y Jin","year":"2002","unstructured":"Jin Y, Olhofer M, Sendhoff B (2002) A framework for evolutionary optimization with approximate fitness functions. IEEE Trans Evolut Comput 6(5):481\u2013494","journal-title":"IEEE Trans Evolut Comput"},{"key":"66_CR38","doi-asserted-by":"crossref","unstructured":"Julstrom BA (1994) Seeding the population: improved performance in a genetic algorithm for the rectilinear steiner problem. In: Proceedings of the 1994 ACM symposium on applied computing, SAC \u201994, pp 222\u2013226","DOI":"10.1145\/326619.326728"},{"key":"66_CR39","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/4168.001.0001","volume-title":"Learning in Embedded Systems","author":"LP Kaelbling","year":"1993","unstructured":"Kaelbling LP (1993) Learning in embedded systems. MIT Press, Cambridge"},{"key":"66_CR40","first-page":"237","volume":"4","author":"LP Kaelbling","year":"1996","unstructured":"Kaelbling LP, Littman ML, Moore AP (1996) Reinforcement learning: A survey. J Art Intell Res 4:237\u2013285","journal-title":"J Art Intell Res"},{"key":"66_CR41","unstructured":"Kalyanakrishnan S, Stone P (2009) An empirical analysis of value function-based and policy search reinforcement learning. In: Proceedings of the eighth international joint conference on autonomous agents and multi\u2013agent systems (AAMAS 2009)"},{"key":"66_CR42","unstructured":"Kalyanakrishnan S, Stone P (2010) Efficient selection of multiple bandit arms: theory and practice. In: Proceedings of the twenty-seventh international conference on machine learning (ICML 2010) (to appear)"},{"key":"66_CR43","unstructured":"Kassahun Y, Sommer G (2005) Efficient reinforcement learning through evolutionary acquisition of neural topologies. In: 13th European symposium on artificial neural networks, Bruges, Belgium, pp 259\u2013266"},{"key":"66_CR44","unstructured":"Kearns M, Singh S (1998) Near-optimal reinforcement learning in polynomial time. In: Proceedings of the 15th international conference on machine learning. Morgan Kaufmann, San Francisco, pp 260\u2013268"},{"key":"66_CR45","doi-asserted-by":"crossref","unstructured":"Kernbach S, Meister E, Scholz O, Humza R, Liedke J, Ricotti L, Jemai J, Havlik J, Liu W (2009) Evolutionary robotics: the next-generation-platform for on-line and on-board artificial evolution. In: CEC\u201909: IEEE congress on evolutionary computation, pp 1079\u20131086","DOI":"10.1109\/CEC.2009.4983066"},{"key":"66_CR46","doi-asserted-by":"crossref","unstructured":"Koppejan R (2009) Neuroevolutionary reinforcement learning for generalized helicopter control. Master\u2019s thesis, Universiteit van Amsterdam","DOI":"10.1145\/1569901.1569922"},{"key":"66_CR47","doi-asserted-by":"crossref","unstructured":"Koppejan R, Whiteson S (2009) Neuroevolutionary reinforcement learning for generalized helicopter control. In: GECCO 2009: Proceedings of the genetic and evolutionary computation conference, pp 145\u2013152","DOI":"10.1145\/1569901.1569922"},{"key":"66_CR48","unstructured":"Lanzi PL, Colombetti M (1999) An extension to the XCS classifier system for stochastic environments. In: GECCO-99: Proceedings of the genetic and evolutionary computation conference, pp 353\u2013360"},{"key":"66_CR49","doi-asserted-by":"crossref","unstructured":"Lupashin S, Schollig A, Sherback M, D\u2019Andrea R (2010) A simple learning strategy for high-speed quadrocopter multi-flips. In: ICRA-10: IEEE international conference on robotics and automation, pp 1642\u20131648","DOI":"10.1109\/ROBOT.2010.5509452"},{"issue":"1\u20135","key":"66_CR50","doi-asserted-by":"crossref","first-page":"193","DOI":"10.1023\/A:1006556606079","volume":"11","author":"O Maron","year":"1997","unstructured":"Maron O, Moore AW (1997) The racing algorithm: model selection for lazy learners. Artificial Intelligence Review 11(1\u20135):193\u2013225","journal-title":"Artif Intell Rev"},{"key":"66_CR51","doi-asserted-by":"crossref","unstructured":"Mart\u00edn HJA, de Lope J (2009) Learning autonomous helicopter flight with evolutionary reinforcement learning. In: 12th international conference on computer aided systems theory (EUROCAST), pp 75\u201382","DOI":"10.1007\/978-3-642-04772-5_11"},{"key":"66_CR52","doi-asserted-by":"crossref","unstructured":"Meyer J, Husbands P, Harvey I (1998) Evolutionary robotics: a survey of applications and problems. In: evolutionary robotics. Springer, pp 1\u201321","DOI":"10.1007\/3-540-64957-3_61"},{"issue":"2","key":"66_CR53","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1023\/A:1017940631555","volume":"49","author":"O Mihatsch","year":"2002","unstructured":"Mihatsch O, Neuneier R (2002) Risk-sensitive reinforcement learning. Mach Learn 49(2):267\u2013290","journal-title":"Mach Learn"},{"key":"66_CR54","first-page":"103","volume":"13","author":"A Moore","year":"1993","unstructured":"Moore A, Atkeson C (1993) Prioritized sweeping: reinforcement learning with less data and less real time. Mach Learn 13:103\u2013130","journal-title":"Mach Learn"},{"key":"66_CR55","first-page":"199","volume":"11","author":"DE Moriarty","year":"1999","unstructured":"Moriarty DE, Schultz AC, Grefenstette JJ (1999) Evolutionary algorithms for reinforcement learning. J Art Intell Res 11:199\u2013229","journal-title":"J Art Intell Res"},{"key":"66_CR56","unstructured":"Ng A, Jordan M (2000) PEGASUS: a policy search method for large MDPs and POMDPs. In: Proceedings of the sixteenth conference on uncertainty in artificial intelligence, pp 406\u2013415"},{"key":"66_CR57","unstructured":"Ng A.Y, Coates A, Diel M, Ganapathi V, Schulte J, Tse B, Berger E, Liang E (2004) Inverted autonomous helicopter flight via reinforcement learning. In: Proceedings of the international symposium on experimental robotics"},{"issue":"2","key":"66_CR58","doi-asserted-by":"crossref","first-page":"107","DOI":"10.1177\/105971239700500201","volume":"5","author":"P Nordin","year":"1997","unstructured":"Nordin P, Banzhaf W (1997) An on-line method to evolve behavior and to control a miniature robot in real time with genetic programming. Adapt Behav 5(2):107","journal-title":"Adapt Behav"},{"issue":"4","key":"66_CR59","doi-asserted-by":"crossref","first-page":"687","DOI":"10.2514\/2.1999","volume":"41","author":"Y Ong","year":"2003","unstructured":"Ong Y, Nair P, Keane A (2003) Evolutionary optimization of computationally expensive problems via surrogate modeling. AIAA J 41(4):687\u2013696","journal-title":"AIAA J"},{"key":"66_CR60","doi-asserted-by":"crossref","unstructured":"Oyekan J, Lu B, Li B, Gu D, Hu H (2010) A behavior based control system for surveillance UAVs. In: Liu H, Gu D, Howlett RJJ, Liu Y (eds) Robot intelligence, advanced information and knowledge processing. Springer, Berlin, pp 209\u2013228","DOI":"10.1007\/978-1-84996-329-9_10"},{"key":"66_CR61","unstructured":"Poli R, Cagnoni S (1997) Genetic programming with user-driven selection: experiments on the evolution of algorithms for image enhancement. In: Proceedings of the second annual conference on genetic programming, pp 269\u2013277"},{"key":"66_CR62","doi-asserted-by":"crossref","first-page":"345","DOI":"10.1007\/s003660050029","volume":"15","author":"P Ponterosso","year":"1999","unstructured":"Ponterosso P, Fox DSJ (1999) Heuristically seeded genetic algorithms applied to truss optimisation. Eng Comput 15:345\u2013355","journal-title":"Eng Comput"},{"key":"66_CR63","doi-asserted-by":"crossref","unstructured":"Poupart P, Vlassis N, Hoey J, Regan K (2006) An analytic solution to discrete Bayesian reinforcement learning. In: Proceedings of the twenty-third international conference on machine learning","DOI":"10.1145\/1143844.1143932"},{"issue":"6","key":"66_CR64","doi-asserted-by":"crossref","first-page":"999","DOI":"10.1007\/BF02703810","volume":"28","author":"D Pratihar","year":"2003","unstructured":"Pratihar D (2003) Evolutionary robotics: a review. Sadhana 28(6):999\u20131009","journal-title":"Sadhana"},{"key":"66_CR65","doi-asserted-by":"crossref","unstructured":"Priesterjahn S, Weimer A, Eberling M (2008) Real-time imitation-based adaptation of gaming behaviour in modern computer games. In: Proceedings of the genetic and evolutionary computation conference, pp 1431\u20131432","DOI":"10.1145\/1389095.1389374"},{"key":"66_CR66","doi-asserted-by":"crossref","unstructured":"Purwin O, D\u2019Andrea R (2009) Performing aggressive maneuvers using iterative learning control. In: ICRA-09: IEEE international conference on robotics and automation, 2009, pp 1731\u20131736","DOI":"10.1109\/ROBOT.2009.5152599"},{"issue":"5","key":"66_CR67","doi-asserted-by":"crossref","first-page":"490","DOI":"10.1109\/TEVC.2004.835247","volume":"8","author":"R Regis","year":"2004","unstructured":"Regis R, Shoemaker C (2004) Local function approximation in evolutionary algorithms for the optimization of costly functions. IEEE Trans Evolut Comput 8(5):490\u2013505","journal-title":"IEEE Trans Evolut Comput"},{"key":"66_CR68","doi-asserted-by":"crossref","unstructured":"Sastry K, Lima CF, Goldberg DE (2006) Evaluation relaxation using substructural information and linear estimation. In: Proceedings of the 8th annual conference on genetic and evolutionary computation, GECCO \u201906, pp 419\u2013426","DOI":"10.1145\/1143997.1144074"},{"key":"66_CR69","doi-asserted-by":"crossref","unstructured":"Schmidt M, Lipson H (2006) Actively probing and modeling users in interactive coevolution. In: Proceedings of the 8th conference on genetic and evolutionary computation, pp 385\u2013386","DOI":"10.1145\/1143997.1144068"},{"issue":"6","key":"66_CR70","doi-asserted-by":"crossref","first-page":"736","DOI":"10.1109\/TEVC.2008.919006","volume":"12","author":"M Schmidt","year":"2008","unstructured":"Schmidt M, Lipson H (2008) Coevolution of fitness predictors. IEEE Trans Evolut Comput 12(6):736\u2013749","journal-title":"IEEE Trans Evolut Comput"},{"issue":"1","key":"66_CR71","doi-asserted-by":"crossref","first-page":"37","DOI":"10.1016\/S0967-0661(00)00087-3","volume":"9","author":"P Schroder","year":"2001","unstructured":"Schroder P, Green B, Grum N, Fleming P (2001) On-line evolution of robust control systems: an industrial active magnetic bearing application. Cont Eng Pract 9(1):37\u201349","journal-title":"Cont Eng Pract"},{"key":"66_CR72","unstructured":"Siebel NT, Sommer G (2007) Evolutionary reinforcement learning of artificial neural networks. Int J Hybrid Intell Syst 4(3):171\u2013183"},{"issue":"11","key":"66_CR73","first-page":"1065","volume":"11","author":"O Sigaud","year":"2007","unstructured":"Sigaud O, Wilson S (2007) Learning classifier systems: a survey. Soft Comput Fusion Found Method Appl 11(11):1065\u20131078","journal-title":"Soft Comput Fusion Found Method Appl"},{"key":"66_CR74","doi-asserted-by":"crossref","first-page":"188","DOI":"10.1007\/BFb0056862","volume":"5","author":"P Stagge","year":"1998","unstructured":"Stagge P (1998) Averaging efficiently in the presence of noise. Parallel Probl Solving Nat 5:188\u2013197","journal-title":"In: Parallel Problem Solving from Nature"},{"issue":"2","key":"66_CR75","doi-asserted-by":"crossref","first-page":"185","DOI":"10.1162\/artl.2009.15.2.15202","volume":"15","author":"KO Stanley","year":"2009","unstructured":"Stanley KO, D\u2019Ambrosio DB, Gauci J (2009) A hypercube-based indirect encoding for evolving large-scale neural networks. Art Life 15(2):185\u2013212","journal-title":"Art Life"},{"issue":"2","key":"66_CR76","doi-asserted-by":"crossref","first-page":"99","DOI":"10.1162\/106365602320169811","volume":"10","author":"K.O Stanley","year":"2002","unstructured":"Stanley KO, Miikkulainen R (2002) Evolving neural networks through augmenting topologies. Evolut Comput 10(2):99\u2013127","journal-title":"Evolut Comput"},{"key":"66_CR77","doi-asserted-by":"crossref","unstructured":"Steels L (1994) Emergent functionality in robotic agents through on-line evolution. In: artificial life IV: Proceedings of the fourth international workshop on the synthesis and simulation of living systems, pp 8\u201316","DOI":"10.7551\/mitpress\/1428.003.0004"},{"issue":"3","key":"66_CR78","doi-asserted-by":"crossref","first-page":"165","DOI":"10.1177\/105971230501300301","volume":"13","author":"P Stone","year":"2005","unstructured":"Stone P, Sutton RS, Kuhlmann G (2005) Reinforcement learning in Robocup-soccer keepaway. Adapt Behav 13(3):165\u2013188","journal-title":"Adapt Behav"},{"key":"66_CR79","doi-asserted-by":"crossref","unstructured":"Strehl AL, Li L, Wiewiora E, Langford J, Littman ML (2006) PAC model-free reinforcement learning. In: ICML-06: Proceedings of the 23rd international conference on machine learning, pp 881\u2013888","DOI":"10.1145\/1143844.1143955"},{"key":"66_CR80","first-page":"9","volume":"3","author":"RS Sutton","year":"1988","unstructured":"Sutton RS (1988) Learning to predict by the methods of temporal differences. Mach Learn 3:9\u201344","journal-title":"Mach Learn"},{"key":"66_CR81","doi-asserted-by":"crossref","unstructured":"Sutton RS (1990) Integrated architectures for learning, planning, and reacting based on approximating dynamic programming. In: Proceedings of the seventh international conference on machine learning, pp 216\u2013224","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"key":"66_CR82","doi-asserted-by":"crossref","DOI":"10.1109\/TNN.1998.712192","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton RS, Barto AG (1998) Reinforcement learning: an introduction. MIT Press, Cambridge"},{"key":"66_CR83","doi-asserted-by":"crossref","unstructured":"Tan C, Ang J, Tan K, Tay A (2008) Online adaptive controller for simulated car racing. In: Congress on evolutionary computation (CEC), pp 2239\u20132245","DOI":"10.1109\/CEC.2008.4631096"},{"key":"66_CR84","unstructured":"Tang J, Singh A, Goehausen N, Abbeel P (2010) Parameterized maneuver learning for autonomous helicopter flight. In: International conference on robotics and automation (ICRA)"},{"key":"66_CR85","first-page":"2133","volume":"10","author":"B Tanner","year":"2009","unstructured":"Tanner B, White A (2009) RL-Glue : Language-independent software for reinforcement-learning experiments. J Mach Learn Res 10:2133\u20132136","journal-title":"J Mach Learn Res"},{"issue":"3","key":"66_CR86","doi-asserted-by":"crossref","first-page":"58","DOI":"10.1145\/203330.203343","volume":"38","author":"G Tesauro","year":"1995","unstructured":"Tesauro G (1995) Temporal difference learning and TD-gammon. Commun ACM 38(3):58\u201368. doi: 10.1145\/203330.203343","journal-title":"Commun ACM"},{"key":"66_CR87","unstructured":"Watkins C (1989) Learning from delayed rewards. Ph.D. thesis, Cambridge University"},{"issue":"1","key":"66_CR88","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1007\/s10994-005-0460-9","volume":"59","author":"S Whiteson","year":"2005","unstructured":"Whiteson S, Kohl N, Miikkulainen R, Stone P (2005) Evolving keepaway soccer players through task decomposition. Mach Learn 59(1):5\u201330","journal-title":"Mach Learn"},{"key":"66_CR89","first-page":"877","volume":"7","author":"S Whiteson","year":"2006","unstructured":"Whiteson S, Stone P (2006) Evolutionary function approximation for reinforcement learning. J Mach Learn Res 7:877\u2013917","journal-title":"J Mach Learn Res"},{"key":"66_CR90","doi-asserted-by":"crossref","unstructured":"Whiteson S, Stone P (2006) On-line evolutionary computation for reinforcement learning in stochastic domains. In: GECCO 2006: Proceedings of the genetic and evolutionary computation conference, pp 1577\u20131584","DOI":"10.1145\/1143997.1144252"},{"key":"66_CR91","unstructured":"Whiteson S, Tanner B, Taylor ME, Stone P (2009) Generalized domains for empirical evaluations in reinforcement learning. In: ICML 2009: Proceedings of the twenty-sixth international conference on machine learning: workshop on evaluation methods for machine learning"},{"key":"66_CR92","doi-asserted-by":"crossref","unstructured":"Whiteson S, Tanner B, Taylor ME, Stone P (2011) Protecting against evaluation overfitting in empirical reinforcement learning. In: ADPRL 2011: Proceedings of the IEEE symposium on adaptive dynamic programming and reinforcement learning (to appear)","DOI":"10.1109\/ADPRL.2011.5967363"},{"issue":"2","key":"66_CR93","doi-asserted-by":"crossref","first-page":"81","DOI":"10.1609\/aimag.v31i2.2227","volume":"31","author":"S Whiteson","year":"2010","unstructured":"Whiteson S, Tanner B, White A (2010) The reinforcement learning competitions. AI Mag 31(2):81\u201394","journal-title":"AI Mag"},{"issue":"1","key":"66_CR94","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s10458-009-9100-2","volume":"21","author":"S Whiteson","year":"2010","unstructured":"Whiteson S, Taylor ME, Stone P (2010) Critical factors in the empirical performance of temporal difference and evolutionary methods for reinforcement learning. Auton Agents Multi-Agent Syst 21(1):1\u201327","journal-title":"Auton Agents Multi-Agent Syst"},{"key":"66_CR95","doi-asserted-by":"crossref","unstructured":"Wilson A, Fern A, Ray S, Tadepalli P (2007) Multi-task reinforcement learning: a hierarchical Bayesian approach. In: Proceedings of the 24th international conference on machine learning, pp 1015\u20131022","DOI":"10.1145\/1273496.1273624"},{"issue":"2","key":"66_CR96","doi-asserted-by":"crossref","first-page":"149","DOI":"10.1162\/evco.1995.3.2.149","volume":"3","author":"S Wilson","year":"1995","unstructured":"Wilson S (1995) Classifier fitness based on accuracy. Evolut Comput 3(2):149\u2013175","journal-title":"Evolut Comput"},{"key":"66_CR97","unstructured":"Wilson S (2001) Function approximation with a classifier system. In: GECCO-2001: Proceedings of the genetic and evolutionary computation conference, p 974"},{"key":"66_CR98","unstructured":"Yang D, Flockton S (1995) Evolutionary algorithms with a coarse-to-fine function smoothing. In: IEEE international conference on evolutionary computation 2: 657\u2013662"},{"issue":"9","key":"66_CR99","doi-asserted-by":"crossref","first-page":"1423","DOI":"10.1109\/5.784219","volume":"87","author":"X Yao","year":"1999","unstructured":"Yao X (1999) Evolving artificial neural networks. Proc IEEE 87(9):1423\u20131447","journal-title":"Proc IEEE"},{"key":"66_CR100","unstructured":"Zufferey J-C, Floreano D, Van Leeuwen M, Merenda T (2002) Evolving vision-based flying robots. In: Lee B, Wallraven P (eds) Proceedings of the 2nd international workshop on biologically motivated computer vision (BMCV). Springer, Berlin, pp 592\u2013600"}],"container-title":["Evolutionary Intelligence"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/www.springerlink.com\/index\/pdf\/10.1007\/s12065-011-0066-z","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,14]],"date-time":"2024-04-14T07:17:16Z","timestamp":1713079036000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s12065-011-0066-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011,10,30]]},"references-count":100,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2011,12]]}},"alternative-id":["66"],"URL":"https:\/\/doi.org\/10.1007\/s12065-011-0066-z","relation":{},"ISSN":["1864-5909","1864-5917"],"issn-type":[{"value":"1864-5909","type":"print"},{"value":"1864-5917","type":"electronic"}],"subject":[],"published":{"date-parts":[[2011,10,30]]}}}