{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T20:50:52Z","timestamp":1764276652550,"version":"3.40.4"},"publisher-location":"Berlin, Heidelberg","reference-count":30,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642334917"},{"type":"electronic","value":"9783642334924"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-33492-4_6","type":"book-chapter","created":{"date-parts":[[2012,10,22]],"date-time":"2012-10-22T09:06:10Z","timestamp":1350896770000},"page":"37-51","source":"Crossref","is-referenced-by-count":14,"title":["Policy Search in a Space of Simple Closed-form Formulas: Towards Interpretability of Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Francis","family":"Maes","sequence":"first","affiliation":[]},{"given":"Raphael","family":"Fonteneau","sequence":"additional","affiliation":[]},{"given":"Louis","family":"Wehenkel","sequence":"additional","affiliation":[]},{"given":"Damien","family":"Ernst","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"6_CR1","doi-asserted-by":"publisher","first-page":"223","DOI":"10.3934\/mbe.2004.1.223","volume":"1","author":"B. Adams","year":"2004","unstructured":"Adams, B., Banks, H., Kwon, H.D., Tran, H.: Dynamic multidrug therapies for HIV: Optimal and STI approaches. Mathematical Biosciences and Engineering\u00a01, 223\u2013241 (2004)","journal-title":"Mathematical Biosciences and Engineering"},{"key":"6_CR2","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"150","DOI":"10.1007\/978-3-540-75225-7_15","volume-title":"Algorithmic Learning Theory","author":"J.-Y. Audibert","year":"2007","unstructured":"Audibert, J.-Y., Munos, R., Szepesv\u00e1ri, C.: Tuning Bandit Algorithms in Stochastic Environments. In: Hutter, M., Servedio, R.A., Takimoto, E. (eds.) ALT 2007. LNCS (LNAI), vol.\u00a04754, pp. 150\u2013165. Springer, Heidelberg (2007)"},{"issue":"2","key":"6_CR3","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1023\/A:1013689704352","volume":"47","author":"P. Auer","year":"2002","unstructured":"Auer, P., Cesa-Bianchi, N., Fischer, P.: Finite-time analysis of the multiarmed bandit problem. Machine Learning\u00a047(2), 235\u2013256 (2002)","journal-title":"Machine Learning"},{"issue":"6","key":"6_CR4","doi-asserted-by":"publisher","first-page":"439","DOI":"10.1016\/j.pneurobio.2003.12.001","volume":"71","author":"I. Bar-Gad","year":"2003","unstructured":"Bar-Gad, I., Morris, G., Bergman, H.: Information processing, dimensionality reduction and reinforcement learning in the basal ganglia. Progress in Neurobiology\u00a071(6), 439\u2013473 (2003)","journal-title":"Progress in Neurobiology"},{"issue":"4","key":"6_CR5","doi-asserted-by":"publisher","first-page":"1034","DOI":"10.1109\/18.86996","volume":"37","author":"A.R. Barron","year":"1991","unstructured":"Barron, A.R., Cover, T.M.: Minimum complexity density estimation. IEEE Transactions on Information Theory\u00a037(4), 1034\u20131054 (1991)","journal-title":"IEEE Transactions on Information Theory"},{"key":"6_CR6","unstructured":"Busoniu, L., Babuska, R., De\u00a0Schutter, B., Ernst, D.: Reinforcement Learning and Dynamic Programming using Function Approximators. Taylor & Francis, CRC Press (2010)"},{"key":"6_CR7","doi-asserted-by":"crossref","unstructured":"Castelletti, A., Galelli, S., Restelli, M., Soncini-Sessa, R.: Tree-based variable selection for dimensionality reduction of large-scale control systems. In: Adaptive Dynamic Programming and Reinforcement Learning (ADPRL), pp. 62\u201369. IEEE (2011)","DOI":"10.1109\/ADPRL.2011.5967387"},{"key":"6_CR8","first-page":"503","volume":"6","author":"D. Ernst","year":"2005","unstructured":"Ernst, D., Geurts, P., Wehenkel, L.: Tree-based batch mode reinforcement learning. Journal of Machine Learning Research\u00a06, 503\u2013556 (2005)","journal-title":"Journal of Machine Learning Research"},{"key":"6_CR9","unstructured":"Fonteneau, R., Wehenkel, L., Ernst, D.: Variable selection for dynamic treatment regimes: a reinforcement learning approach. In: European Workshop on Reinforcement Learning, EWRL (2008)"},{"key":"6_CR10","unstructured":"Gearhart, C.: Genetic programming as policy search in markov decision processes. In: Genetic Algorithms and Genetic Programming at Stanford, pp. 61\u201367 (2003)"},{"key":"6_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"218","DOI":"10.1007\/978-3-540-78671-9_19","volume-title":"Genetic Programming","author":"S. Girgin","year":"2008","unstructured":"Girgin, S., Preux, P.: Feature Discovery in Reinforcement Learning Using Genetic Programming. In: O\u2019Neill, M., Vanneschi, L., Gustafson, S., Esparcia Alc\u00e1zar, A.I., De Falco, I., Della Cioppa, A., Tarantino, E. (eds.) EuroGP 2008. LNCS, vol.\u00a04971, pp. 218\u2013229. Springer, Heidelberg (2008)"},{"key":"6_CR12","unstructured":"Goldberg, D.: Genetic Algorithms in Search, Optimization, and Machine Learning. Addison-wesley (1989)"},{"key":"6_CR13","unstructured":"Guez, A., Vincent, R., Avoli, M., Pineau, J.: Adaptive treatment of epilepsy via batch-mode reinforcement learning. In: Innovative Applications of Artificial Intelligence (IAAI), pp. 1671\u20131678 (2008)"},{"key":"6_CR14","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1007\/978-3-540-73599-1_17","volume-title":"Artificial Intelligence in Medicine","author":"L. Gunter","year":"2007","unstructured":"Gunter, L., Zhu, J., Murphy, S.: Variable Selection for Optimal Decision Making. In: Bellazzi, R., Abu-Hanna, A., Hunter, J. (eds.) AIME 2007. LNCS (LNAI), vol.\u00a04594, pp. 149\u2013154. Springer, Heidelberg (2007)"},{"key":"6_CR15","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"151","DOI":"10.1007\/978-3-540-89722-4_12","volume-title":"Recent Advances in Reinforcement Learning","author":"J.-F. Hren","year":"2008","unstructured":"Hren, J.-F., Munos, R.: Optimistic Planning of Deterministic Systems. In: Girgin, S., Loth, M., Munos, R., Preux, P., Ryabko, D. (eds.) EWRL 2008. LNCS (LNAI), vol.\u00a05323, pp. 151\u2013164. Springer, Heidelberg (2008)"},{"key":"6_CR16","doi-asserted-by":"crossref","DOI":"10.1007\/b138233","volume-title":"Universal Artificial Intelligence: Sequential Decisions based on Algorithmic Probability","author":"M. Hutter","year":"2005","unstructured":"Hutter, M.: Universal Artificial Intelligence: Sequential Decisions based on Algorithmic Probability. Springer, Berlin (2005)"},{"key":"6_CR17","unstructured":"Ingersoll, J.: Theory of Financial Decision Making. Rowman and Littlefield Publishers, Inc. (1987)"},{"issue":"1","key":"6_CR18","first-page":"1","volume":"1","author":"A.N. Kolmogorov","year":"1965","unstructured":"Kolmogorov, A.N.: Three approaches to the quantitative definition of information. Problems of Information Transmission\u00a01(1), 1\u20137 (1965)","journal-title":"Problems of Information Transmission"},{"key":"6_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1007\/978-3-642-29946-9_5","volume-title":"Recent Advances in Reinforcement Learning","author":"F. Maes","year":"2012","unstructured":"Maes, F., Wehenkel, L., Ernst, D.: Automatic Discovery of Ranking Formulas for Playing with Multi-armed Bandits. In: Sanner, S., Hutter, M. (eds.) EWRL 2011. LNCS, vol.\u00a07188, pp. 5\u201317. Springer, Heidelberg (2012)"},{"key":"6_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"189","DOI":"10.1007\/978-3-642-29946-9_20","volume-title":"Recent Advances in Reinforcement Learning","author":"F. Maes","year":"2012","unstructured":"Maes, F., Wehenkel, L., Ernst, D.: Optimized Look-ahead Tree Search Policies. In: Sanner, S., Hutter, M. (eds.) EWRL 2011. LNCS, vol.\u00a07188, pp. 189\u2013200. Springer, Heidelberg (2012)"},{"issue":"3","key":"6_CR21","first-page":"199","volume":"21","author":"A. Moore","year":"1995","unstructured":"Moore, A., Atkeson, C.: The parti-game algorithm for variable resolution reinforcement learning in multidimensional state-spaces. Machine Learning\u00a021(3), 199\u2013233 (1995)","journal-title":"Machine Learning"},{"issue":"2","key":"6_CR22","doi-asserted-by":"publisher","first-page":"331","DOI":"10.1111\/1467-9868.00389","volume":"65","author":"S. Murphy","year":"2003","unstructured":"Murphy, S.: Optimal dynamic treatment regimes. Journal of the Royal Statistical Society, Series B\u00a065(2), 331\u2013366 (2003)","journal-title":"Journal of the Royal Statistical Society, Series B"},{"key":"6_CR23","unstructured":"Randl\u00f8v, J., Alstr\u00f8m, P.: Learning to drive a bicycle using reinforcement learning and shaping. In: Proceedings of the Fifteenth International Conference on Machine Learning (ICML), pp. 463\u2013471. Citeseer (1998)"},{"key":"6_CR24","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1007\/11564096_32","volume-title":"Machine Learning: ECML 2005","author":"M. Riedmiller","year":"2005","unstructured":"Riedmiller, M.: Neural Fitted Q Iteration - First Experiences with a Data Efficient Neural Reinforcement Learning Method. In: Gama, J., Camacho, R., Brazdil, P.B., Jorge, A.M., Torgo, L. (eds.) ECML 2005. LNCS (LNAI), vol.\u00a03720, pp. 317\u2013328. Springer, Heidelberg (2005)"},{"key":"6_CR25","doi-asserted-by":"crossref","unstructured":"Rubinstein, R., Kroese, D.: The Cross-Entropy Method. A Unified Approach to Combinatorial Optimization, Monte-Carlo Simulation, and Machine Learning. Information Science and Statistics. Springer (2004)","DOI":"10.1007\/978-1-4757-4321-0_4"},{"key":"6_CR26","unstructured":"R\u00fcping, S.: Learning Interpretable Models. Ph.D. thesis (2006)"},{"issue":"2","key":"6_CR27","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1162\/106365602320169811","volume":"10","author":"K. Stanley","year":"2002","unstructured":"Stanley, K., Miikkulainen, R.: Evolving neural networks through augmenting topologies. Evolutionary Computation\u00a010(2), 99\u2013127 (2002)","journal-title":"Evolutionary Computation"},{"key":"6_CR28","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-5451-6","volume-title":"Automatic Learning Techniques in Power Systems","author":"L. Wehenkel","year":"1998","unstructured":"Wehenkel, L.: Automatic Learning Techniques in Power Systems. Kluwer Academic Publishers, Boston (1998)"},{"key":"6_CR29","doi-asserted-by":"crossref","unstructured":"Yoshimoto, J., Ishii, S., Sato, M.: Application of reinforcement learning to balancing of acrobot. In: Systems, Man, and Cybernetics Conference Proceedings, vol.\u00a05, pp. 516\u2013521. IEEE (1999)","DOI":"10.1109\/ICSMC.1999.815605"},{"key":"6_CR30","doi-asserted-by":"publisher","first-page":"3294","DOI":"10.1002\/sim.3720","volume":"28","author":"Y. Zhao","year":"2009","unstructured":"Zhao, Y., Kosorok, M., Zeng, D.: Reinforcement learning design for cancer clinical trials. Statistics in Medicine\u00a028, 3294\u20133315 (2009)","journal-title":"Statistics in Medicine"}],"container-title":["Lecture Notes in Computer Science","Discovery Science"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-33492-4_6.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,17]],"date-time":"2025-04-17T02:09:44Z","timestamp":1744855784000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-33492-4_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642334917","9783642334924"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-33492-4_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}