{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,25]],"date-time":"2025-09-25T14:31:06Z","timestamp":1758810666219},"publisher-location":"Berlin, Heidelberg","reference-count":15,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540425366"},{"type":"electronic","value":"9783540447955"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2001]]},"DOI":"10.1007\/3-540-44795-4_28","type":"book-chapter","created":{"date-parts":[[2007,8,15]],"date-time":"2007-08-15T02:35:10Z","timestamp":1187145310000},"page":"324-335","source":"Crossref","is-referenced-by-count":14,"title":["DQL: A New Updating Strategy for Reinforcement Learning Based on Q-Learning"],"prefix":"10.1007","author":[{"given":"Carlos E.","family":"Mariano","sequence":"first","affiliation":[]},{"given":"Eduardo F.","family":"Morales","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2001,8,30]]},"reference":[{"key":"28_CR1","unstructured":"Boutilier, C.: Sequential Optimality and Coordination in Multi agent Systems, In Proc. of IJCAI-99, Stockholm, Sweden, 1999."},{"key":"28_CR2","unstructured":"Claus, C., Boutilier, C.: The Dynamics of Reinforcement Learning in Cooperative Multiagents Systems, In Proc. of AAAI-97 Multiagent Learning Workshop, pg. 13\u201318, Providence, 1997."},{"key":"28_CR3","volume-title":"Optimization, Learning, and Natural Algorithms","author":"M. Dorigo","year":"1992","unstructured":"Dorigo, M.: Optimization, Learning, and Natural Algorithms, PhD thesis, Politecnico da Milano, Italy, 1992."},{"key":"28_CR4","doi-asserted-by":"crossref","unstructured":"Gambardella, L., M., Dorigo, M.: Ant-Q: A reinforcement Learning Approach to the Traveling Salesman Problem, In Proceedings of the 12th International Conference on Machine Learning, pp. 252\u2013260, Morgan Kaufmann, 1995.","DOI":"10.1016\/B978-1-55860-377-6.50039-6"},{"key":"28_CR5","unstructured":"Hu, J., Wellman, M.: Multiagent Reinforcement Learning: Theoretical Framework and an Algorithm, In Proc. 15th Int. Conf. on Machine Learning, pp. 242\u2013250, Morgan Kaufmann, 1998."},{"key":"28_CR6","first-page":"45","volume-title":"Proc. Int. Workshop on Applications of Neural Networks to Telecommunications","author":"L. M","year":"1993","unstructured":"Littman, M., Boyan, J.: A Distributed Reinforcement Learning Scheme for Network Routing, In Proc. Int. Workshop on Applications of Neural Networks to Telecommunications, pp. 45\u201351, J. Alspector, et al., (eds.), Lawrence Erlbaum, Hillsdale, NJ, 1993."},{"key":"28_CR7","doi-asserted-by":"crossref","unstructured":"Littman, M.: Markov Games as a Framework for Multiagent Reinforcement Learning, In Proc. 11th Int. Conf. on Machine Learning, pp. 157\u2013163, New Brunswick, NJ, 1994, Morgan Kaufmann.","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"28_CR8","doi-asserted-by":"crossref","unstructured":"Mariano, C., Morales, E.: A New Distributed Reinforcement Learning Algorithm for the solution of Multiple Objective Optimization Problems, In O. Cairo et al., eds. Lecture Notes in Artificial Intelligence, 1793:212\u2013223, April 2000.","DOI":"10.1007\/10720076_20"},{"key":"28_CR9","first-page":"224","volume-title":"Proc. 2nd Int. Conf. an Genetic Algorithms","author":"I. Oliver","year":"1987","unstructured":"Oliver, I., Smith, D., Holland, J.R.: A study of Permutation Crossover Operators on the Traveling Salesman Problem, In Proc. 2nd Int. Conf. an Genetic Algorithms, pp. 224\u2013230, J.J. Grefenstette (ed.), Lawrence Erlbaum, Hillsdale, NJ, 1987."},{"key":"28_CR10","unstructured":"Price, B., Boutilier, C.: Implicit Imitation in Multiagent Reinforcement Learning, In Proc. 16th Int. Conf. on Machine Learning, pp., 1999."},{"key":"28_CR11","volume-title":"The Traveling Salesman: Computational Solutions for TSP Applications","author":"G. Reinelt","year":"1994","unstructured":"Reinelt, G.: The Traveling Salesman: Computational Solutions for TSP Applications, Springer Verlag, Berlin, 1994."},{"key":"28_CR12","volume-title":"Reinforcement Learning an Introduction","author":"R. Sutton","year":"1998","unstructured":"Sutton, R., Barto, A.: Reinforcement Learning an Introduction, MIT Press, Cambridge, MA, 1998."},{"key":"28_CR13","doi-asserted-by":"crossref","unstructured":"Tan, M.: Multiagent Reinforcement Learning: Independent vs. Cooperative Agents, In Proc. 10th Int. Conf. on Machine Learning, pp. 330\u2013337, Amherst, MA, 1993.","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"key":"28_CR14","volume-title":"Learning from Delayed Rewards","author":"C. Watkins","year":"1978","unstructured":"Watkins, C.: Learning from Delayed Rewards. PhD thesis, Cambridge University, Cambridge, MA, 1978."},{"key":"28_CR15","first-page":"279","volume":"3","author":"C. Watkins","year":"1992","unstructured":"Watkins, C., Dayan, P.: Q-Learning, Machine Learning, 3:279\u2013292, 1992.","journal-title":"Machine Learning"}],"container-title":["Lecture Notes in Computer Science","Machine Learning: ECML 2001"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/3-540-44795-4_28","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,1]],"date-time":"2019-05-01T22:09:53Z","timestamp":1556748593000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/3-540-44795-4_28"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2001]]},"ISBN":["9783540425366","9783540447955"],"references-count":15,"URL":"https:\/\/doi.org\/10.1007\/3-540-44795-4_28","relation":{},"ISSN":["0302-9743"],"issn-type":[{"type":"print","value":"0302-9743"}],"subject":[],"published":{"date-parts":[[2001]]}}}