{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T04:53:34Z","timestamp":1725684814653},"publisher-location":"Berlin, Heidelberg","reference-count":32,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642299452"},{"type":"electronic","value":"9783642299469"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-29946-9_10","type":"book-chapter","created":{"date-parts":[[2012,5,18]],"date-time":"2012-05-18T17:01:49Z","timestamp":1337360509000},"page":"66-77","source":"Crossref","is-referenced-by-count":5,"title":["Feature Reinforcement Learning in Practice"],"prefix":"10.1007","author":[{"given":"Phuong","family":"Nguyen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peter","family":"Sunehag","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marcus","family":"Hutter","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"10_CR1","doi-asserted-by":"publisher","first-page":"716","DOI":"10.1109\/TAC.1974.1100705","volume":"19","author":"H. Akaike","year":"1974","unstructured":"Akaike, H.: A new look at the statistical model identification. IEEE Transactions on Automatic Control\u00a019, 716\u2013723 (1974)","journal-title":"IEEE Transactions on Automatic Control"},{"key":"10_CR2","volume-title":"Neuro-Dynamic Programming","author":"D.P. Bertsekas","year":"1996","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Neuro-Dynamic Programming. Anthena Scientific, Belmont (1996)"},{"key":"10_CR3","first-page":"213","volume":"3","author":"R.I. Brafman","year":"2002","unstructured":"Brafman, R.I., Tennenholz, M.: R-max -a general polynomial time algorithm for near-optimal reinforcement learning. Journal of Machine Learing Research\u00a03, 213\u2013231 (2002)","journal-title":"Journal of Machine Learing Research"},{"key":"10_CR4","unstructured":"Chrisman, L.: Reinforcement learning with perceptual aliasing: The perceptual distinctions approach. In: AAAI, pp. 183\u2013188 (1992)"},{"key":"10_CR5","doi-asserted-by":"crossref","unstructured":"Cover, T.M., Thomas, J.A.: Elements of Information Theory. John Willey and Sons (1991)","DOI":"10.1002\/0471200611"},{"issue":"5","key":"10_CR6","doi-asserted-by":"publisher","first-page":"2441","DOI":"10.1109\/TIT.2010.2043762","volume":"56","author":"V. Farias","year":"2010","unstructured":"Farias, V., Moallemi, C., Van Roy, B., Weissman, T.: Universal reinforcement learning. IEEE Transactions on Information Theory\u00a056(5), 2441\u20132454 (2010)","journal-title":"IEEE Transactions on Information Theory"},{"key":"10_CR7","first-page":"156","volume-title":"Computing Science and Statistics: the 23rd Symposium on the Interface","author":"C.J. Geyer","year":"1991","unstructured":"Geyer, C.J.: Markov chain Monte Calro maximum likelihood. In: Computing Science and Statistics: the 23rd Symposium on the Interface, pp. 156\u2013163. Interface Foundation, Fairfax (1991)"},{"key":"10_CR8","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1016\/S0004-3702(02)00376-4","volume":"147","author":"R. Givan","year":"2003","unstructured":"Givan, R., Dean, T., Greig, M.: Equivalence notions and model minimization in Markov decision process. Artificial Intelligence\u00a0147, 163\u2013223 (2003)","journal-title":"Artificial Intelligence"},{"issue":"6","key":"10_CR9","doi-asserted-by":"publisher","first-page":"652","DOI":"10.1109\/34.295910","volume":"16","author":"V. Granville","year":"1994","unstructured":"Granville, V., K\u0159iv\u00e1nek, M., Rasson, J.P.: Simulated annealing: A proof of convergence. IEEE Transactions on Pattern Analysis and Machine Intelligence\u00a016(6), 652\u2013656 (1994)","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"10_CR10","doi-asserted-by":"crossref","unstructured":"Gr\u00fcnwald, P.D.: The Minimum Description Length Principle. The MIT Press (2007)","DOI":"10.7551\/mitpress\/4643.001.0001"},{"issue":"4","key":"10_CR11","doi-asserted-by":"publisher","first-page":"1604","DOI":"10.1143\/JPSJ.65.1604","volume":"65","author":"K. Hukushima","year":"1996","unstructured":"Hukushima, K., Nemoto, K.: Exchange Monte Carlo method and application to spin glass simulations. Journal of the Physical Socieity of Japan\u00a065(4), 1604\u20131608 (1996)","journal-title":"Journal of the Physical Socieity of Japan"},{"key":"10_CR12","doi-asserted-by":"crossref","DOI":"10.1007\/b138233","volume-title":"Universal Articial Intelligence: Sequential Decisions based on Algorithmic Probability","author":"M. Hutter","year":"2005","unstructured":"Hutter, M.: Universal Articial Intelligence: Sequential Decisions based on Algorithmic Probability. Springer, Berlin (2005)"},{"key":"10_CR13","doi-asserted-by":"crossref","unstructured":"Hutter, M.: Feature reinforcement learning: Part I. Unstructured MDPs. Journal of General Artificial Intelligence (2009)","DOI":"10.2478\/v10229-011-0002-8"},{"key":"10_CR14","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1016\/S0004-3702(98)00023-X","volume":"101","author":"L.P. Kaelbling","year":"1998","unstructured":"Kaelbling, L.P., Littman, M.L., Cassandra, A.R.: Planning and acting in paritally observable stochastic domains. Artifical Intelligence\u00a0101, 99\u2013134 (1998)","journal-title":"Artifical Intelligence"},{"key":"10_CR15","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"282","DOI":"10.1007\/11871842_29","volume-title":"Machine Learning: ECML 2006","author":"L. Kocsis","year":"2006","unstructured":"Kocsis, L., Szepesv\u00e1ri, C.: Bandit Based Monte-Carlo Planning. In: F\u00fcrnkranz, J., Scheffer, T., Spiliopoulou, M. (eds.) ECML 2006. LNCS (LNAI), vol.\u00a04212, pp. 282\u2013293. Springer, Heidelberg (2006)"},{"key":"10_CR16","unstructured":"Li, L., Walsh, T.J., Littmans, M.L.: Towards a unified theory of state abstraction for MDPs. In: Proceedings of the 9th International Symposium on Artificial Intelligence and Mathematics (2006)"},{"key":"10_CR17","volume-title":"Monte Carlo Strategies in Scientific Computing","author":"J.S. Liu","year":"2001","unstructured":"Liu, J.S.: Monte Carlo Strategies in Scientific Computing. Springer, Heidelberg (2001)"},{"key":"10_CR18","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1016\/S0004-3702(02)00378-8","volume":"147","author":"O. Madani","year":"2003","unstructured":"Madani, O., Handks, S., Condon: On the undecidability of probabilistic planning and related stochastic optimization problems. Artifical Intelligence\u00a0147, 5\u201334 (2003)","journal-title":"Artifical Intelligence"},{"key":"10_CR19","unstructured":"Mahmud, M.M.H.: Constructing states for reinforcement learning. In: F\u00fcrnkranz, J., Joachims, T. (eds.) Proceedings of the 27th International Conference on Machine Learning (ICML 2010), Haifa, Israel, pp. 727\u2013734 (June 2010), \n                    \n                      http:\/\/www.icml2010.org\/papers\/593.pdf"},{"key":"10_CR20","unstructured":"McCallum, A.K.: Reinforcement Learning with Selective Perception and Hidden State. Ph.D. thesis, Department of Computer Science, University of Rochester (1996)"},{"key":"10_CR21","doi-asserted-by":"crossref","unstructured":"Nguyen, P., Sunehag, P., Hutter, M.: Feature refinrocement learning in practice. Tech. rep., Australian National University (2011)","DOI":"10.1007\/978-3-642-29946-9_10"},{"key":"10_CR22","unstructured":"Poland, J., Hutter, M.: Universal learning of repeated matrix games. In: Proc. 15th Annual Machine Learning Conf. of Belgium and The Netherlands (Benelearn 2006), pp. 7\u201314. Ghent (2006), \n                    \n                      http:\/\/arxiv.org\/abs\/cs.LG\/0508073"},{"issue":"5","key":"10_CR23","doi-asserted-by":"publisher","first-page":"656","DOI":"10.1109\/TIT.1983.1056741","volume":"29","author":"J. Rissanen","year":"1983","unstructured":"Rissanen, J.: A universal data compression system. IEEE Transactions on Information Theory\u00a029(5), 656\u2013663 (1983)","journal-title":"IEEE Transactions on Information Theory"},{"key":"10_CR24","volume-title":"Stochastic Optimization","author":"J. Schneider","year":"2006","unstructured":"Schneider, J., Kirkpatrick, S.: Stochastic Optimization, 1st edn. Springer, Heidelberg (2006)","edition":"1"},{"key":"10_CR25","unstructured":"Singh, S.P., James, M.R., Rudary, M.R.: Predictive state representations: A new theory for modeling dynamical systems. In: Proceedings of the 20th Conference in Uncertainty in Artificial Intelligence, Banff, Canada, pp. 512\u2013518 (2004)"},{"key":"10_CR26","doi-asserted-by":"publisher","first-page":"1143","DOI":"10.1057\/palgrave.jors.2602068","volume":"57","author":"B. Suman","year":"2006","unstructured":"Suman, B., Kumar, P.: A survey of simulated annealing as a tool for single and multiobjecctive optimization. Journal of the Operational Research Society\u00a057, 1143\u20131160 (2006)","journal-title":"Journal of the Operational Research Society"},{"key":"10_CR27","series-title":"LNCS(LNAI)","doi-asserted-by":"publisher","first-page":"360","DOI":"10.1007\/978-3-642-16108-7_29","volume-title":"Algorithmic Learning Theory","author":"P. Sunehag","year":"2010","unstructured":"Sunehag, P., Hutter, M.: Consistency of Feature Markov Processes. In: Hutter, M., Stephan, F., Vovk, V., Zeugmann, T. (eds.) ALT 2010. LNCS(LNAI), vol.\u00a06331, pp. 360\u2013374. Springer, Heidelberg (2010)"},{"key":"10_CR28","unstructured":"Sutton, R., Barto, A.: Reinforcement Learning. The MIT Press (1998)"},{"issue":"1","key":"10_CR29","doi-asserted-by":"crossref","first-page":"95","DOI":"10.1613\/jair.3125","volume":"40","author":"J. Veness","year":"2011","unstructured":"Veness, J., Ng, K.S., Hutter, M., Uther, W., Silver, D.: A Monte-Carlo AIXI approximation. Journal of Artifiicial Intelligence Research\u00a040(1), 95\u2013142 (2011)","journal-title":"Journal of Artifiicial Intelligence Research"},{"issue":"7","key":"10_CR30","doi-asserted-by":"publisher","first-page":"1013","DOI":"10.1109\/TPAMI.2005.147","volume":"27","author":"E. Vidal","year":"2005","unstructured":"Vidal, E., Thollard, F., Higuera, C.D.L., Casacuberta, F., Carrasco, R.C.: Probabilitic finite-state machines. IEEE Transactions on Pattern Analysis and Machine Intelligence\u00a027(7), 1013\u20131025 (2005)","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"10_CR31","volume-title":"Statistical and Inductive Inference by Minimum Message Length","author":"C.S. Wallace","year":"2005","unstructured":"Wallace, C.S.: Statistical and Inductive Inference by Minimum Message Length. Springer, Berlin (2005)"},{"key":"10_CR32","doi-asserted-by":"publisher","first-page":"653","DOI":"10.1109\/18.382012","volume":"41","author":"F.M.J. Wilems","year":"1995","unstructured":"Wilems, F.M.J., Shtarkov, Y.M., Tjalkens, T.J.: The context tree weighting method: Basic properties. IEEE Transactions on Information Theory\u00a041, 653\u2013664 (1995)","journal-title":"IEEE Transactions on Information Theory"}],"container-title":["Lecture Notes in Computer Science","Recent Advances in Reinforcement Learning"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-29946-9_10.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,5,4]],"date-time":"2021-05-04T11:22:20Z","timestamp":1620127340000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-29946-9_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642299452","9783642299469"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-29946-9_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}