{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T20:41:02Z","timestamp":1743108062910,"version":"3.40.3"},"publisher-location":"Cham","reference-count":23,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319687582"},{"type":"electronic","value":"9783319687599"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-68759-9_39","type":"book-chapter","created":{"date-parts":[[2017,10,13]],"date-time":"2017-10-13T02:33:42Z","timestamp":1507862022000},"page":"473-485","source":"Crossref","is-referenced-by-count":1,"title":["Effective Policy Gradient Search for\u00a0Reinforcement Learning Through NEAT Based Feature Extraction"],"prefix":"10.1007","author":[{"given":"Yiming","family":"Peng","sequence":"first","affiliation":[]},{"given":"Gang","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Mengjie","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yi","family":"Mei","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,10,14]]},"reference":[{"key":"39_CR1","unstructured":"Balduzzi, D., Frean, M., Leary, L., Lewis, J.P.: The shattered gradients problem: if resnets are the answer, then what is the question? arXiv.org (2017)"},{"issue":"8","key":"39_CR2","doi-asserted-by":"crossref","first-page":"1798","DOI":"10.1109\/TPAMI.2013.50","volume":"35","author":"Y Bengio","year":"2013","unstructured":"Bengio, Y., Courville, A., Vincent, P.: Representation learning: a review and new perspectives. IEEE Trans. Pattern Anal. Mach. Intell. 35(8), 1798\u20131828 (2013)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"11","key":"39_CR3","doi-asserted-by":"crossref","first-page":"2471","DOI":"10.1016\/j.automatica.2009.07.008","volume":"45","author":"S Bhatnagar","year":"2009","unstructured":"Bhatnagar, S., Sutton, R.S., Ghavamzadeh, M., Lee, M.: Natural actor-critic algorithms. Automatica 45(11), 2471\u20132482 (2009)","journal-title":"Automatica"},{"issue":"6","key":"39_CR4","doi-asserted-by":"crossref","first-page":"953","DOI":"10.1109\/TEVC.2016.2560139","volume":"20","author":"G Chen","year":"2016","unstructured":"Chen, G., Douch, C.I.J., Zhang, M.: Accuracy-based learning classifier systems for multistep reinforcement learning: a fuzzy logic approach to handling continuous inputs and learning continuous actions. IEEE Trans. Evol. Comput. 20(6), 953\u2013971 (2016)","journal-title":"IEEE Trans. Evol. Comput."},{"issue":"1\u20132","key":"39_CR5","first-page":"1","volume":"2","author":"MP Deisenroth","year":"2013","unstructured":"Deisenroth, M.P., Neumann, G., Peters, J.: A survey on policy search for robotics. Found. Trends Robot. 2(1\u20132), 1\u2013142 (2013)","journal-title":"Found. Trends Robot."},{"key":"39_CR6","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"312","DOI":"10.1007\/978-3-642-15880-3_26","volume-title":"Machine Learning and Knowledge Discovery in Databases","author":"D Castro","year":"2010","unstructured":"Castro, D., Mannor, S.: Adaptive bases for reinforcement learning. In: Balc\u00e1zar, J.L., Bonchi, F., Gionis, A., Sebag, M. (eds.) ECML PKDD 2010. LNCS (LNAI), vol. 6321, pp. 312\u2013327. Springer, Heidelberg (2010). doi: 10.1007\/978-3-642-15880-3_26"},{"issue":"6","key":"39_CR7","doi-asserted-by":"crossref","first-page":"1291","DOI":"10.1109\/TSMCC.2012.2218595","volume":"42","author":"I Grondman","year":"2012","unstructured":"Grondman, I., Busoniu, L., Lopes, G.A.D., Babu\u0161ka, R.: A survey of actor-critic reinforcement learning: standard and natural policy gradients. IEEE Trans. Syst. Man Cybern. Part C Appl. Rev. 42(6), 1291\u20131307 (2012)","journal-title":"IEEE Trans. Syst. Man Cybern. Part C Appl. Rev."},{"key":"39_CR8","unstructured":"Gu, S., Lillicrap, T.P., Sutskever, I., Levine, S.: Continuous deep q-learning with model-based acceleration. In: ICML, pp. 2829\u20132838 (2016)"},{"issue":"6","key":"39_CR9","doi-asserted-by":"crossref","first-page":"e1002063","DOI":"10.1371\/journal.pcbi.1002063","volume":"7","author":"AM Hermundstad","year":"2011","unstructured":"Hermundstad, A.M., Brown, K.S., Bassett, D.S., Carlson, J.M.: Learning, memory, and the role of neural network architecture. PLoS Comput. Biol. 7(6), e1002063 (2011)","journal-title":"PLoS Comput. Biol."},{"issue":"3","key":"39_CR10","doi-asserted-by":"crossref","first-page":"318","DOI":"10.1109\/TEVC.2005.850290","volume":"9","author":"S Kamio","year":"2005","unstructured":"Kamio, S., Iba, H.: Adaptation technique for integrating genetic programming and reinforcement learning for real robots. IEEE Trans. Evol. Comput. 9(3), 318\u2013333 (2005)","journal-title":"IEEE Trans. Evol. Comput."},{"key":"39_CR11","doi-asserted-by":"crossref","unstructured":"Konidaris, G., Osentoski, S., Thomas, P.: Value function approximation in reinforcement learning using the fourier basis. In: 2011 AAAI, pp. 380\u2013385 (2011)","DOI":"10.1609\/aaai.v25i1.7903"},{"issue":"1","key":"39_CR12","doi-asserted-by":"crossref","first-page":"63","DOI":"10.1007\/s12065-007-0003-3","volume":"1","author":"PL Lanzi","year":"2008","unstructured":"Lanzi, P.L.: Learning classifier systems: then and now. Evol. Intell. 1(1), 63\u201382 (2008)","journal-title":"Evol. Intell."},{"key":"39_CR13","first-page":"1","volume":"2014","author":"S Loscalzo","year":"2014","unstructured":"Loscalzo, S., Wright, R., Yu, L.: Predictive feature selection for genetic policy search. AAMAS 2014, 1\u201333 (2014)","journal-title":"AAMAS"},{"issue":"1","key":"39_CR14","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1007\/s10479-005-5732-z","volume":"134","author":"I Menache","year":"2005","unstructured":"Menache, I., Mannor, S., Shimkin, N.: Basis function adaptation in temporal difference reinforcement learning. Ann. Oper. Res. 134(1), 215\u2013238 (2005)","journal-title":"Ann. Oper. Res."},{"key":"39_CR15","doi-asserted-by":"crossref","unstructured":"Parr, R., Painter-Wakefield, C., Li, L.: Analyzing feature generation for value-function approximation. In: ICML, pp. 737\u2013744 (2007)","DOI":"10.1145\/1273496.1273589"},{"key":"39_CR16","doi-asserted-by":"crossref","unstructured":"Peng, Y., Chen, G., Zhang, M., Pang, S.: A sandpile model for reliable actor-critic reinforcement learning. In: IJCNN, pp. 4014\u20134021. IEEE (2017)","DOI":"10.1109\/IJCNN.2017.7966362"},{"key":"39_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"615","DOI":"10.1007\/978-3-319-46687-3_68","volume-title":"Neural Information Processing","author":"Y Peng","year":"2016","unstructured":"Peng, Y., Chen, G., Zhang, M., Pang, S.: Generalized compatible function approximation for policy gradient search. In: Hirose, A., Ozawa, S., Doya, K., Ikeda, K., Lee, M., Liu, D. (eds.) ICONIP 2016. LNCS, vol. 9947, pp. 615\u2013622. Springer, Cham (2016). doi: 10.1007\/978-3-319-46687-3_68"},{"issue":"1","key":"39_CR18","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1109\/TCIAIG.2015.2390615","volume":"8","author":"J Schrum","year":"2016","unstructured":"Schrum, J., Miikkulainen, R.: Discovering multimodal behavior in ms. pac-man through evolution of modular neural networks. IEEE Trans. Comput. Intell. AI Games 8(1), 67\u201381 (2016)","journal-title":"IEEE Trans. Comput. Intell. AI Games"},{"issue":"2","key":"39_CR19","doi-asserted-by":"crossref","first-page":"99","DOI":"10.1162\/106365602320169811","volume":"10","author":"KO Stanley","year":"2002","unstructured":"Stanley, K.O., Miikkulainen, R.: Evolving neural network through augmenting topologies. Evol. Comput. 10(2), 99\u2013127 (2002)","journal-title":"Evol. Comput."},{"key":"39_CR20","volume-title":"Reinforcement learning: An introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement learning: An introduction, vol. 1. MIT press, Cambridge (1998)"},{"key":"39_CR21","unstructured":"Sutton, R.S., Mcallester, D., Singh, S., Mansour, Y.: Policy gradient methods for reinforcement learning with function approximation. In: NIPS, pp. 1057\u20131063 (1999)"},{"issue":"5","key":"39_CR22","first-page":"877","volume":"7","author":"S Whiteson","year":"2006","unstructured":"Whiteson, S., Stone, P.: Evolutionary function approximation for reinforcement learning. J. Mach. Learn. Res. 7(5), 877\u2013917 (2006)","journal-title":"J. Mach. Learn. Res."},{"key":"39_CR23","doi-asserted-by":"crossref","unstructured":"Whiteson, S., Stone, P., Stanley, K.O., Miikkulainen, R., Kohl, N.: Automatic feature selection in neuroevolution. In: 2005 GECCO, pp. 1225\u20131232 (2005)","DOI":"10.1145\/1068009.1068210"}],"container-title":["Lecture Notes in Computer Science","Simulated Evolution and Learning"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-68759-9_39","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,27]],"date-time":"2023-08-27T03:15:53Z","timestamp":1693106153000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-68759-9_39"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319687582","9783319687599"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-68759-9_39","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017]]}}}