{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T09:46:06Z","timestamp":1743155166054,"version":"3.40.3"},"publisher-location":"Berlin, Heidelberg","reference-count":25,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642299452"},{"type":"electronic","value":"9783642299469"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-29946-9_6","type":"book-chapter","created":{"date-parts":[[2012,5,18]],"date-time":"2012-05-18T17:01:49Z","timestamp":1337360509000},"page":"18-29","source":"Crossref","is-referenced-by-count":1,"title":["Goal-Directed Online Learning of Predictive Models"],"prefix":"10.1007","author":[{"given":"Sylvie C. W.","family":"Ong","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuri","family":"Grinberg","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Joelle","family":"Pineau","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"6_CR1","unstructured":"Aberdeen, D., Buffet, O., Thomas, O.: Policy-gradients for PSRs and POMDPs. In: AISTATS (2007)"},{"key":"6_CR2","doi-asserted-by":"crossref","unstructured":"Boots, B., Gordon, G.J.: An online spectral learning algorithm for partially observable nonlinear dynamical systems. In: Proceedings AAAI (2011)","DOI":"10.1609\/aaai.v25i1.7924"},{"key":"6_CR3","doi-asserted-by":"crossref","unstructured":"Boots, B., Siddiqi, S., Gordon, G.: Closing the learning-planning loop with predictive state representations. In: Proceedings of Robotics: Science and Systems (2010)","DOI":"10.15607\/RSS.2010.VI.036"},{"key":"6_CR4","doi-asserted-by":"crossref","unstructured":"Bowling, M., McCracken, P., James, M., Neufeld, J., Wilkinson, D.: Learning predictive state representations using non-blind policies. In: Proceedings ICML (2006)","DOI":"10.1145\/1143844.1143861"},{"key":"6_CR5","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1016\/j.laa.2005.07.021","volume":"415","author":"M. Brand","year":"2006","unstructured":"Brand, M.: Fast low-rank modifications of the thin singular value decomposition. Linear Algebra and its Applications\u00a0415, 20\u201330 (2006)","journal-title":"Linear Algebra and its Applications"},{"key":"6_CR6","unstructured":"Dinculescu, M., Precup, D.: Approximate predictive representations of partially observable systems. In: Proceedings ICML (2010)"},{"key":"6_CR7","unstructured":"Ernst, D., Geurts, P., Wehenkel, L.: Tree-based batch mode reinforcement learning. Journal of Machine Learning (2005)"},{"key":"6_CR8","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/s10994-006-6226-1","volume":"63","author":"P. Geurts","year":"2006","unstructured":"Geurts, P., Ernst, D., Wehenkel, L.: Extremely randomized trees. Machine Learning\u00a063, 3\u201342 (2006)","journal-title":"Machine Learning"},{"key":"6_CR9","unstructured":"Gordon, G.J.: Approximate Solutions to Markov Decision Processes. Ph.D. thesis, School of Computer Science, Carnegie Mellon University (1999)"},{"key":"6_CR10","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"126","DOI":"10.1007\/978-3-540-68825-9_13","volume-title":"Advances in Artificial Intelligence","author":"M.T. Izadi","year":"2008","unstructured":"Izadi, M.T., Precup, D.: Point-Based Planning for Predictive State Representations. In: Bergler, S. (ed.) Canadian AI. LNCS (LNAI), vol.\u00a05032, pp. 126\u2013137. Springer, Heidelberg (2008)"},{"key":"6_CR11","unstructured":"James, M.R., Wessling, T., Vlassis, N.: Improving approximate value iteration using memories and predictive state representations. In: AAAI (2006)"},{"key":"6_CR12","doi-asserted-by":"crossref","unstructured":"James, M.R., Singh, S., Littman, M.L.: Planning with predictive state representations. In: International Conference on Machine Learning and Applications, pp. 304\u2013311 (2004)","DOI":"10.1109\/ICMLA.2004.1383528"},{"key":"6_CR13","unstructured":"Littman, M., Sutton, R., Singh, S.: Predictive representations of state. In: Advances in Neural Information Processing Systems, NIPS (2002)"},{"key":"6_CR14","unstructured":"McCallum, A.K.: Reinforcement Learning with Selective Perception and Hidden State. Ph.D. thesis, University of Rochester (1996)"},{"key":"6_CR15","doi-asserted-by":"crossref","unstructured":"McCracken, P., Bowling, M.: Online discovery and learning of predictive state representations. In: Neural Information Processing Systems, vol.\u00a018 (2006)","DOI":"10.1145\/1143844.1143861"},{"key":"6_CR16","doi-asserted-by":"crossref","unstructured":"Nguyen, P., Sunehag, P., Hutter, M.: Feature reinforcement learning in practice. Tech. rep. (2011)","DOI":"10.1007\/978-3-642-29946-9_10"},{"key":"6_CR17","unstructured":"Poupart, P., Vlassis, N.: Model-based bayesian reinforcement learning in partially observable domains. In: Tenth International Symposium on Artificial Intelligence and Mathematics, ISAIM (2008)"},{"key":"6_CR18","unstructured":"Rafols, E.J., Ring, M., Sutton, R., Tanner, B.: Using predictive representations to improve generalization in reinforcement learning. In: IJCAI (2005)"},{"key":"6_CR19","doi-asserted-by":"crossref","unstructured":"Rosencrantz, M., Gordon, G.J., Thrun, S.: Learning low dimensional predictive representations. In: Proceedings ICML (2004)","DOI":"10.1145\/1015330.1015441"},{"key":"6_CR20","first-page":"1655","volume":"12","author":"S. Ross","year":"2011","unstructured":"Ross, S., Pineau, J., Chaib-draa, B., Kreitmann, P.: A Bayesian approach for learning and planning in partially observable Markov decision processes. Journal of Machine Learning Research\u00a012, 1655\u20131696 (2011)","journal-title":"Journal of Machine Learning Research"},{"key":"6_CR21","unstructured":"Singh, S., James, M., Rudary, M.: Predictive state representations: A new theory for modeling dynamical systems. In: Proceedings UAI (2004)"},{"key":"6_CR22","unstructured":"Soni, V., Singh, S.: Abstraction in predictive state representations. In: AAAI (2007)"},{"key":"6_CR23","doi-asserted-by":"crossref","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. The MIT Press (1998)","DOI":"10.1109\/TNN.1998.712192"},{"key":"6_CR24","unstructured":"Talvitie, E., Singh, S.: Simple local models for complex dynamical systems. In: Advances in Neural Information Processing Systems, NIPS (2008)"},{"key":"6_CR25","doi-asserted-by":"crossref","first-page":"95","DOI":"10.1613\/jair.3125","volume":"40","author":"J. Veness","year":"2011","unstructured":"Veness, J., Ng, K.S., Hutter, M., Uther, W., Silver, D.: A Monte-Carlo AIXI approximation. JAIR\u00a040, 95\u2013142 (2011)","journal-title":"JAIR"}],"container-title":["Lecture Notes in Computer Science","Recent Advances in Reinforcement Learning"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-29946-9_6.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T09:08:05Z","timestamp":1743152885000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-29946-9_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642299452","9783642299469"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-29946-9_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}