{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T00:47:18Z","timestamp":1771548438931,"version":"3.50.1"},"publisher-location":"Berlin, Heidelberg","reference-count":33,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783540729259","type":"print"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"DOI":"10.1007\/978-3-540-72927-3_23","type":"book-chapter","created":{"date-parts":[[2007,6,12]],"date-time":"2007-06-12T02:30:27Z","timestamp":1181615427000},"page":"308-322","source":"Crossref","is-referenced-by-count":43,"title":["Q-Learning with Linear Function Approximation"],"prefix":"10.1007","author":[{"given":"Francisco S.","family":"Melo","sequence":"first","affiliation":[]},{"given":"M. Isabel","family":"Ribeiro","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"23_CR1","first-page":"9","volume":"3","author":"R. Sutton","year":"1988","unstructured":"Sutton, R.: Learning to predict by the methods of temporal differences. Machine Learning\u00a03, 9\u201344 (1988)","journal-title":"Machine Learning"},{"key":"23_CR2","unstructured":"Watkins, C.: Learning from delayed rewards. PhD thesis, King\u2019s College, University of Cambridge (May 1989)"},{"key":"23_CR3","unstructured":"Rummery, G., Niranjan, M.: On-line Q-learning using connectionist systems. Technical Report CUED\/F-INFENG\/TR 166, Cambridge University Engineering Department (1994)"},{"issue":"4","key":"23_CR4","doi-asserted-by":"publisher","first-page":"160","DOI":"10.1145\/122344.122377","volume":"2","author":"R. Sutton","year":"1991","unstructured":"Sutton, R.: DYNA, an integrated architecture for learning, planning, and reacting. ACM SIGART Bulletin\u00a02(4), 160\u2013163 (1991)","journal-title":"ACM SIGART Bulletin"},{"key":"23_CR5","unstructured":"Barto, A., Bradtke, S., Singh, S.: Learning to act using real-time dynamic programming. Technical Report UM-CS-1993-002, Department of Computer Science, University of Massachusetts at Amherst (1993)"},{"key":"23_CR6","unstructured":"Boyan, J.: Least-squares temporal difference learning. In: Proc. 16th Int. Conf. Machine Learning, 49\u201356 (1999)"},{"key":"23_CR7","unstructured":"Bertsekas, D., Tsitsiklis, J.: Neuro-Dynamic Programming. Athena Scientific (1996)"},{"key":"23_CR8","first-page":"1038","volume":"8","author":"R. Sutton","year":"1996","unstructured":"Sutton, R.: Generalization in reinforcement learning: Successful examples using sparse coarse coding. Advances in Neural Information Processing Systems\u00a08, 1038\u20131044 (1996)","journal-title":"Advances in Neural Information Processing Systems"},{"key":"23_CR9","first-page":"369","volume":"7","author":"J. Boyan","year":"1994","unstructured":"Boyan, J., Moore, A.: Generalization in reinforcement learning: Safely approximating the value function. Advances in Neural Information Processing Systems\u00a07, 369\u2013376 (1994)","journal-title":"Advances in Neural Information Processing Systems"},{"issue":"2","key":"23_CR10","doi-asserted-by":"publisher","first-page":"215","DOI":"10.1162\/neco.1994.6.2.215","volume":"6","author":"G. Tesauro","year":"1994","unstructured":"Tesauro, G.: TD-Gammon, a self-teaching backgammon program, achieves master-level play. Neural Computation\u00a06(2), 215\u2013219 (1994)","journal-title":"Neural Computation"},{"key":"23_CR11","first-page":"361","volume":"7","author":"S. Singh","year":"1994","unstructured":"Singh, S., Jaakkola, T., Jordan, M.: Reinforcement learning with soft state aggregation. Advances in Neural Information Processing Systems\u00a07, 361\u2013368 (1994)","journal-title":"Advances in Neural Information Processing Systems"},{"key":"23_CR12","unstructured":"Gordon, G.: Stable function approximation in dynamic programming. Technical Report CMU-CS-95-103, School of Computer Science, Carnegie Mellon University (1995)"},{"key":"23_CR13","first-page":"59","volume":"22","author":"J. Tsitsiklis","year":"1996","unstructured":"Tsitsiklis, J., Van Roy, B.: Feature-based methods for large scale dynamic programming. Machine Learning\u00a022, 59\u201394 (1996)","journal-title":"Machine Learning"},{"key":"23_CR14","unstructured":"Precup, D., Sutton, R., Dasgupta, S.: Off-policy temporal-difference learning with function approximation. In: Proc. 18th Int. Conf. Machine Learning, 417\u2013424 (2001)"},{"key":"23_CR15","doi-asserted-by":"crossref","unstructured":"Szepesv\u00e1ri, C., Smart, W.: Interpolation-based Q-learning. In: Proc. 21st Int. Conf. Machine learning, 100\u2013107 (2004)","DOI":"10.1145\/1015330.1015445"},{"issue":"5","key":"23_CR16","doi-asserted-by":"crossref","first-page":"674","DOI":"10.1109\/9.580874","volume":"AC-42","author":"J. Tsitsiklis","year":"1996","unstructured":"Tsitsiklis, J., Van Roy, B.: An analysis of temporal-difference learning with function approximation. IEEE Transactions on Automatic Control\u00a0AC-42(5), 674\u2013690 (1996)","journal-title":"IEEE Transactions on Automatic Control"},{"key":"23_CR17","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1017\/S0269964800142081","volume":"14","author":"V. Borkar","year":"2000","unstructured":"Borkar, V.: A learning algorithm for discrete-time stochastic control. Probability in the Engineering and Informational Sciences\u00a014, 243\u2013258 (2000)","journal-title":"Probability in the Engineering and Informational Sciences"},{"key":"23_CR18","doi-asserted-by":"crossref","unstructured":"Melo, F., Ribeiro, M.I.: Q-learning with linear function approximation. Technical Report RT-602-07, Institute for Systems and Robotics (March 2007)","DOI":"10.23919\/ECC.2007.7068926"},{"key":"23_CR19","first-page":"279","volume":"8","author":"C. Watkins","year":"1992","unstructured":"Watkins, C., Dayan, P.: Technical note: Q-learning. Machine Learning\u00a08, 279\u2013292 (1992)","journal-title":"Machine Learning"},{"key":"23_CR20","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4471-3267-7","volume-title":"Markov Chains and Stochastic Stability","author":"S. Meyn","year":"1993","unstructured":"Meyn, S., Tweedie, R.: Markov Chains and Stochastic Stability. Springer, Heidelberg (1993)"},{"key":"23_CR21","doi-asserted-by":"crossref","unstructured":"Baird, L.: Residual algorithms: Reinforcement learning with function approximation. In: Proc. 12th Int. Conf. Machine Learning, 30\u201337 (1995)","DOI":"10.1016\/B978-1-55860-377-6.50013-X"},{"key":"23_CR22","unstructured":"Bertsekas, D., Borkar, V., Nedi\u0107, A.: 9. In: Improved temporal difference methods with linear function approximation. Wiley Publishers, 235\u2013260 (2004)"},{"key":"23_CR23","unstructured":"Baker, W.: Learning via stochastic approximation in function space. PhD Thesis (1997)"},{"key":"23_CR24","doi-asserted-by":"crossref","first-page":"83","DOI":"10.1613\/jair.714","volume":"14","author":"C. Lusena","year":"2001","unstructured":"Lusena, C., Goldsmith, J., Mundhenk, M.: Nonapproximability results for partially observable Markov decision processes. J. Artificial Intelligence Research\u00a014, 83\u2013103 (2001)","journal-title":"J. Artificial Intelligence Research"},{"issue":"3","key":"23_CR25","doi-asserted-by":"publisher","first-page":"441","DOI":"10.1287\/moor.12.3.441","volume":"12","author":"C. Papadimitriou","year":"1987","unstructured":"Papadimitriou, C., Tsitsiklis, J.: The complexity of Markov chain decision processes. Mathematics of Operations Research\u00a012(3), 441\u2013450 (1987)","journal-title":"Mathematics of Operations Research"},{"key":"23_CR26","unstructured":"Cassandra, A.: Exact and approximate algorithms for partially observable Markov decision processes. PhD thesis, Brown University (May 1998)"},{"key":"23_CR27","unstructured":"Aberdeen, D.: A (revised) survey of approximate methods for solving partially observable Markov decision processes. Technical report, National ICT Australia, Canberra, Australia (2003)"},{"key":"23_CR28","doi-asserted-by":"crossref","unstructured":"Littman, M., Cassandra, A., Kaelbling, L.: Learning policies for partially observable environments: Scaling up. In: Proc. 12th Int. Conf. Machine Learning, 362\u2013370 (1995)","DOI":"10.1016\/B978-1-55860-377-6.50052-9"},{"key":"23_CR29","unstructured":"Parr, R., Russell, S.: Approximating optimal policies for partially observable stochastic domains. In: Proc. Int. Joint Conf. Artificial Intelligence, 1088\u20131094 (1995)"},{"key":"23_CR30","unstructured":"He, Q., Shayman, M.: Solving POMDPs by on-policy linear approximate learning algorithm. In: Proc. Conf. Information Sciences and Systems (2000)"},{"key":"23_CR31","unstructured":"Glaubius, R., Smart, W.: Manifold representations for value-function approximation in reinforcement learning. Technical Report 05-19, Department of Computer Science and Engineering, Washington University in St. Louis (2005)"},{"key":"23_CR32","doi-asserted-by":"crossref","unstructured":"Keller, P., Mannor, S., Precup, D.: Automatic basis function construction for approximate dynamic programming and reinforcement learning. In: Proc. 23rd Int. Conf. Machine Learning, 449\u2013456 (2006)","DOI":"10.1145\/1143844.1143901"},{"issue":"1","key":"23_CR33","doi-asserted-by":"publisher","first-page":"215","DOI":"10.1007\/s10479-005-5732-z","volume":"134","author":"I. Menache","year":"2005","unstructured":"Menache, I., Mannor, S., Shimkin, N.: Basis function adaptation in temporal difference reinforcement learning. Annals of Operations Research\u00a0134(1), 215\u2013238 (2005)","journal-title":"Annals of Operations Research"}],"container-title":["Lecture Notes in Computer Science","Learning Theory"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-72927-3_23.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,19]],"date-time":"2020-11-19T05:08:01Z","timestamp":1605762481000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-72927-3_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[null]]},"ISBN":["9783540729259"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-72927-3_23","relation":{},"subject":[]}}