{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T07:20:44Z","timestamp":1778656844491,"version":"3.51.4"},"reference-count":23,"publisher":"Springer Science and Business Media LLC","issue":"2-3","license":[{"start":{"date-parts":[[2002,11,1]],"date-time":"2002-11-01T00:00:00Z","timestamp":1036108800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2002,11,1]],"date-time":"2002-11-01T00:00:00Z","timestamp":1036108800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Learning"],"published-print":{"date-parts":[[2002,11]]},"DOI":"10.1023\/a:1017988514716","type":"journal-article","created":{"date-parts":[[2002,12,30]],"date-time":"2002-12-30T09:36:44Z","timestamp":1041241004000},"page":"247-265","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":80,"title":["Continuous-Action Q-Learning"],"prefix":"10.1007","volume":"49","author":[{"given":"Jos\u00e9 del R.","family":"Mill\u00e1n","sequence":"first","affiliation":[]},{"given":"Daniele","family":"Posenato","sequence":"additional","affiliation":[]},{"given":"Eric","family":"Dedieu","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"395111_CR1","doi-asserted-by":"crossref","unstructured":"Baird, L. C. (1995). Residual algorithms: Reinforcement learning with function approximation. In Proceedings of the 12th International Conference on Machine Learning (pp. 30-37).","DOI":"10.1016\/B978-1-55860-377-6.50013-X"},{"key":"395111_CR2","first-page":"835","volume":"13","author":"A. G. Barto","year":"1983","unstructured":"Barto, A. G., Sutton, R. S., & Anderson, C. W. (1983). Neuronlike elements that can solve difficult learning control problems. IEEE Transactions on Systems, Man, and Cybernetics, 13, 835\u2013846.","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics"},{"key":"395111_CR3","unstructured":"Dedieu, E., & Mill\u00e1n, J. del R. (1998). Efficient occupancy grids for variable resolution map building. In Proceedings of the 6th International Symposium on Intelligent Robotic Systems (pp. 195-203)."},{"key":"395111_CR4","first-page":"625","volume":"7","author":"B. Fritzke","year":"1995","unstructured":"Fritzke, B. (1995). A growing neural gas network learns topologies. In Advances in neural information processing systems 7 (pp. 625\u2013632).","journal-title":"Advances in neural information processing systems"},{"key":"395111_CR5","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-642-97966-8","volume-title":"Self-organizing maps","author":"T. Kohonen","year":"1997","unstructured":"Kohonen, T. (1997). Self-organizing maps (2nd edn.). Berlin: Springer-Verlag.","edition":"2nd edn."},{"key":"395111_CR6","doi-asserted-by":"crossref","first-page":"293","DOI":"10.1023\/A:1022628806385","volume":"8","author":"L.-J. Lin","year":"1992","unstructured":"Lin, L.-J. (1992). Self-improving reactive agents based on reinforcement learning, planning and teaching. Machine Learning, 8, 293\u2013321.","journal-title":"Machine Learning"},{"key":"395111_CR7","doi-asserted-by":"crossref","first-page":"159","DOI":"10.1023\/A:1018064306595","volume":"22","author":"S. Mahadevan","year":"1996","unstructured":"Mahadevan, S. (1996). Average reward reinforcement learning: Foundations, algorithms, and empirical results. Machine Learning, 22, 159\u2013195.","journal-title":"Machine Learning"},{"key":"395111_CR8","doi-asserted-by":"crossref","first-page":"359","DOI":"10.1016\/S0893-6080(97)00137-8","volume":"11","author":"P. Mart\u00edn","year":"1998","unstructured":"Mart\u00edn, P., & Mill\u00e1n, J. del R. (1998). Learning reaching strategies through reinforcement for a sensor-based manipulator. Neural Networks, 11, 359\u2013376.","journal-title":"Neural Networks"},{"key":"395111_CR9","doi-asserted-by":"crossref","first-page":"304","DOI":"10.1109\/70.143349","volume":"8","author":"M. J. Matari?","year":"1992","unstructured":"Matari?, M. J. (1992). Integration of representation into goal-driven behavior-based robots. IEEE Transactions on Robotics and Automation, 8, 304\u2013312.","journal-title":"IEEE Transactions on Robotics and Automation"},{"key":"395111_CR10","volume-title":"A reinforcement connectionist learning approach to robot path finding","author":"J. d. R. Mill\u00e1n","year":"1992","unstructured":"Mill\u00e1n, J. del R. (1992). A reinforcement connectionist learning approach to robot path finding. Ph.D. Thesis, Software Dept., Universitat Polit`ecnica de Catalunya, Barcelona, Spain."},{"key":"395111_CR11","doi-asserted-by":"crossref","first-page":"408","DOI":"10.1109\/3477.499792","volume":"26","author":"J. d. R. Mill\u00e1n","year":"1996","unstructured":"Mill\u00e1n, J. del R. (1996). Rapid, safe, and incremental learning of navigation strategies. IEEE Transactions on Systems, Man, and Cybernetics-Part B, 26, 408\u2013420.","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics-Part B"},{"key":"395111_CR12","doi-asserted-by":"crossref","unstructured":"Mill\u00e1n, J. del R. (1997). Incremental acquisition of local networks for the control of autonomous robots. In Proceedings of the 7th International Conference on Artificial Neural Networks (pp. 739-744).","DOI":"10.1007\/BFb0020242"},{"key":"395111_CR13","first-page":"363","volume":"8","author":"J. d. R. Mill\u00e1n","year":"1992","unstructured":"Mill\u00e1n, J. del R., & Torras, C. (1992). A reinforcement connectionist approach to robot path finding in non-mazelike environments. Machine Learning, 8, 363\u2013395.","journal-title":"Machine Learning"},{"key":"395111_CR14","doi-asserted-by":"crossref","first-page":"163","DOI":"10.1177\/105971239700600201","volume":"6","author":"J. C. Santamar\u00eda","year":"1998","unstructured":"Santamar\u00eda, J. C., Sutton, R. S., & Ram, A. (1998). Experiments with reinforcement learning in problems with continuous state and action spaces. Adaptive Behavior, 6, 163\u2013217.","journal-title":"Adaptive Behavior"},{"key":"395111_CR15","doi-asserted-by":"crossref","unstructured":"Schwartz, A. (1993). A reinforcement learning method for maximizing undiscounted rewards. In Proceedings of the 10th International Conference on Machine Learning (pp. 298-305).","DOI":"10.1016\/B978-1-55860-307-3.50045-9"},{"key":"395111_CR16","first-page":"123","volume":"22","author":"S. P. Singh","year":"1996","unstructured":"Singh, S. P., & Sutton, R. S. (1996). Reinforcement learning with replacing eligibility traces. Machine Learning, 22, 123\u2013158.","journal-title":"Machine Learning"},{"key":"395111_CR17","first-page":"9","volume":"3","author":"R. S. Sutton","year":"1988","unstructured":"Sutton, R. S. (1988). Learning to predict by the methods of temporal differences. Machine Learning, 3, 9\u201344.","journal-title":"Machine Learning"},{"key":"395111_CR18","unstructured":"Sutton, R. S. (1996). Generalization in reinforcement learning: Successful examples using sparse coarse coding. In Advances in neural information processing systems 8 (pp. 1038-1044)."},{"key":"395111_CR19","volume-title":"Reinforcement learning: An introduction","author":"R. S. Sutton","year":"1998","unstructured":"Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: An introduction. Cambridge, MA: MIT Press."},{"key":"395111_CR20","doi-asserted-by":"crossref","first-page":"177","DOI":"10.1016\/S0004-3702(98)00002-2","volume":"100","author":"P. Tadepalli","year":"1998","unstructured":"Tadepalli, P., & Ok, D. (1998). Model-based average reward reinforcement learning. Artificial Intelligence, 100, 177\u2013224.","journal-title":"Artificial Intelligence"},{"key":"395111_CR21","doi-asserted-by":"crossref","first-page":"247","DOI":"10.1016\/0921-8890(95)00005-Z","volume":"15","author":"C. L. Tham","year":"1995","unstructured":"Tham, C. L. (1995). Reinforcement learning of multiple tasks using a hierarchical CMAC architecture. Robotics and Autonomous Systems, 15, 247\u2013274.","journal-title":"Robotics and Autonomous Systems"},{"key":"395111_CR22","first-page":"527","volume-title":"Handbook of intelligent control: Neural, fuzzy and adaptive approaches","author":"S. B. Thrun","year":"1992","unstructured":"Thrun, S. B. (1992). The role of exploration in learning control. In D. A. White & D. A. Sofge (Eds.), Handbook of intelligent control: Neural, fuzzy and adaptive approaches (pp. 527\u2013559). New York: Van Nostrand Reinhold."},{"key":"395111_CR23","volume-title":"Learning with delayed rewards","author":"C. J. C. H. Watkins","year":"1989","unstructured":"Watkins, C. J. C. H. (1989). Learning with delayed rewards. Ph.D. Thesis, Cambridge University, England, UK."}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1017988514716.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1023\/A:1017988514716\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1017988514716.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,10]],"date-time":"2025-07-10T11:42:19Z","timestamp":1752147739000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1023\/A:1017988514716"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2002,11]]},"references-count":23,"journal-issue":{"issue":"2-3","published-print":{"date-parts":[[2002,11]]}},"alternative-id":["395111"],"URL":"https:\/\/doi.org\/10.1023\/a:1017988514716","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2002,11]]},"assertion":[{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}