{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T16:11:04Z","timestamp":1746115864204,"version":"3.40.4"},"publisher-location":"Berlin, Heidelberg","reference-count":35,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642406539"},{"type":"electronic","value":"9783642406546"}],"license":[{"start":{"date-parts":[[2013,1,1]],"date-time":"2013-01-01T00:00:00Z","timestamp":1356998400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2013,1,1]],"date-time":"2013-01-01T00:00:00Z","timestamp":1356998400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013]]},"DOI":"10.1007\/978-3-642-40654-6_11","type":"book-chapter","created":{"date-parts":[[2013,10,23]],"date-time":"2013-10-23T16:40:07Z","timestamp":1382546407000},"page":"172-188","source":"Crossref","is-referenced-by-count":0,"title":["Combining Learning Algorithms: An Approach to Markov Decision Processes"],"prefix":"10.1007","author":[{"given":"Richardson","family":"Ribeiro","sequence":"first","affiliation":[]},{"given":"F\u00e1bio","family":"Favarim","sequence":"additional","affiliation":[]},{"given":"Marco A. C.","family":"Barbosa","sequence":"additional","affiliation":[]},{"given":"Alessandro L.","family":"Koerich","sequence":"additional","affiliation":[]},{"given":"Fabr\u00edcio","family":"Enembreck","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"issue":"3\/4","key":"11_CR1","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1023\/A:1022676722315","volume":"8","author":"CJCH Watkins","year":"1992","unstructured":"Watkins, C.J.C.H., Dayan, P.: Q-learning. Mach. Learn. 8(3\/4), 279\u2013292 (1992)","journal-title":"Mach. Learn."},{"key":"11_CR2","unstructured":"Ribeiro, C.H.C.: A tutorial on reinforcement learning techniques. In: Proceedings of International Joint Conference on Neural Networks, Washington, USA, pp. 59\u201361 (1999)"},{"issue":"3","key":"11_CR3","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1145\/203330.203343","volume":"38","author":"G Tesauro","year":"1995","unstructured":"Tesauro, G.: Temporal difference learning and td-gammon. Commun. ACM 38(3), 58\u201368 (1995)","journal-title":"Commun. ACM"},{"issue":"1","key":"11_CR4","first-page":"1633","volume":"10","author":"M Taylor","year":"2009","unstructured":"Taylor, M., Stone, P.: Using imagery to simplify perceptual abstraction in reinforcement learning agents. J. Mach. Learn. Res. (JMLR) 10(1), 1633\u20131685 (2009)","journal-title":"J. Mach. Learn. Res. (JMLR)"},{"key":"11_CR5","first-page":"2413","volume":"10","author":"AL Strehl","year":"2009","unstructured":"Strehl, A.L., Li, L., Littman, M.L.: Reinforcement learning in finite mdps: Pac analysis. J. Mach. Learn. Res. (JMLR) 10, 2413\u20132444 (2009)","journal-title":"J. Mach. Learn. Res. (JMLR)"},{"issue":"24","key":"11_CR6","doi-asserted-by":"publisher","first-page":"981","DOI":"10.1002\/int.20435","volume":"25","author":"M Stula","year":"2010","unstructured":"Stula, M., Stipanicev, D., Bodrozic, L.: Intelligent modeling with agent-based fuzzy cognitive map. Int. J. Intell. Syst. 25(24), 981\u20131004 (2010)","journal-title":"Int. J. Intell. Syst."},{"key":"11_CR7","doi-asserted-by":"crossref","unstructured":"Walsh, T.J., Goschin, S., Littman, M.L.: Integrating sample-based planning and model-based reinforcement learning. In: Proceedings of 14th Conference on Artificial Intelligence (AAAI\u201910), vol. 1 (2010)","DOI":"10.1609\/aaai.v24i1.7689"},{"key":"11_CR8","unstructured":"Zhang, C., Lesser, V., Abdallah, S.: Self-organization for cordinating decentralized reinforcement learning. In: Proceedings of the 9th International Conference on Autonomous Agents and Multiagent Systems. AAMAS\u201910, International Foundation for Autonomous Agents and Multiagent Systems, pp. 739\u2013746 (2010)"},{"key":"11_CR9","doi-asserted-by":"crossref","unstructured":"Wintermute, S.: Using imagery to simplify perceptual abstraction in reinforcement learning agents. In: Proceedings of 24th Conference on Artificial Intelligence (AAAI\u201910), Atlanta, Georgia, USA, pp. 1567\u20131573 (2010)","DOI":"10.1609\/aaai.v24i1.7570"},{"key":"11_CR10","doi-asserted-by":"publisher","first-page":"569","DOI":"10.1613\/jair.898","volume":"19","author":"B Price","year":"2003","unstructured":"Price, B., Boutilier, C.: Accelerating reinforcement learning through implicit imitation. J. Artif. Intell. Res. 19, 569\u2013629 (2003)","journal-title":"J. Artif. Intell. Res."},{"key":"11_CR11","first-page":"245","volume-title":"SBIA 2004. LNCS (LNAI), vol. 3171","author":"RAC Bianchi","year":"2004","unstructured":"Bianchi, R.A.C., Ribeiro, C.H.C., Costa, A.H.R.: Heuristically accelerated Q\u2013learning: A new approach to speed up reinforcement learning. In: Bazzan, A.L.C., Labidi, S. (eds.) SBIA 2004. LNCS (LNAI), vol. 3171, pp. 245\u2013254. Springer, Heidelberg (2004)"},{"key":"11_CR12","unstructured":"Comanici, G., Precup, D.: Optimal policy switching algorithms for reinforcement learning. In: Proceedings of 9th International Conference on Autonomous Agents and Multiagent Systems (AAMAS\u201910), pp. 709\u2013714 (2010)"},{"key":"11_CR13","unstructured":"Banerjee, B., Kraemer, L.: Action discovery for reinforcement learning. In: Proceedings of 9th International Conference on Autonomous Agents and Multiagent Systems (AAMAS\u201910), pp. 585\u20131586 (2010)"},{"key":"11_CR14","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"key":"11_CR15","first-page":"268","volume-title":"IBERAMIA-SBIA 2006. LNCS (LNAI), vol. 4140","author":"R Ribeiro","year":"2006","unstructured":"Ribeiro, R., Enembreck, F., Koerich, A.L.: A hybrid learning strategy for discovery of policies of action. In: Sichman, J.S., Coelho, H., Rezende, S.O. (eds.) IBERAMIA-SBIA 2006. LNCS (LNAI), vol. 4140, pp. 268\u2013277. Springer, Heidelberg (2006)"},{"key":"11_CR16","unstructured":"Jordan, P.R., Schvartzman, L.J., Wellman, M.P.: Strategy exploration in empirical games. In: Proceedings of 9th International Conference on Autonomous Agents and Multiagent Systems (AAMAS\u201910),Toronto, Canada, vol. 1, pp. 1131\u20131138 (2010)"},{"key":"11_CR17","unstructured":"Amato, C., Shani, G.: High-level reinforcement learning in strategy games. In: Proceedings of 9th International Conference on Autonomous Agents and Multiagent Systems (AAMAS\u201910), pp. 75\u201382 (2010)"},{"key":"11_CR18","unstructured":"Spaan, M.T.J., Melo, F.S.: Interaction-driven markov games for decentralized multiagent planning under uncertainty. In: Proceedings of 7th International Conference on AAMAS, Estoril, Portugal, pp. 525\u2013532 (2008)"},{"key":"11_CR19","doi-asserted-by":"crossref","unstructured":"Mohammadian, M.: Multi-agents systems for intelligent control of traffic signals. In: Proceedings of International Conference on Computational Inteligence for Modelling Control and Automation and International Conference on Intelligent Agents Web Technologies and International Commerce, Sydney, Australia, p. 270 (2006)","DOI":"10.1109\/CIMCA.2006.152"},{"key":"11_CR20","doi-asserted-by":"crossref","unstructured":"Le, T., Cai, C.: A new feature for approximate dynamic programming traffic light controller. In: Proceedings of 2th International Workshop on Computational Transportation Science (IWCTS\u201910), San Jose, CA, USA, pp. 29\u201334 (2010)","DOI":"10.1145\/1899441.1899450"},{"key":"11_CR21","unstructured":"Sislak, D., Samek, J., Pechoucek, M.: Decentralized algorithms for collision avoidance in airspace. In: Proceedings of 7th International Conference on AAMAS, Estoril, Portugal, pp. 543\u2013550 (2008)"},{"issue":"28","key":"11_CR22","first-page":"460","volume":"25","author":"D Dimitrakiev","year":"2010","unstructured":"Dimitrakiev, D., Nikolova, N., Tenekedjiev, K.: Simulation and discrete event optimization for automated decisions for in-queue flights. Int. J. Intell. Syst. 25(28), 460\u2013487 (2010)","journal-title":"Int. J. Intell. Syst."},{"key":"11_CR23","unstructured":"Firby, R.J.: Adaptive execution in complex dynamic worlds. Ph.D. thesis, Yale University (1989)"},{"issue":"18","key":"11_CR24","doi-asserted-by":"publisher","first-page":"844","DOI":"10.1002\/int.20363","volume":"24","author":"D Pelta","year":"2009","unstructured":"Pelta, D., Cruz, C., Gonzlez, J.: A study on diversity and cooperation in a multiagent strategy for dynamic optimization problems. Int. J. Intell. Syst. 24(18), 844\u2013861 (2009)","journal-title":"Int. J. Intell. Syst."},{"key":"11_CR25","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1613\/jair.904","volume":"16","author":"C Drummond","year":"2002","unstructured":"Drummond, C.: Accelerating reinforcement learning by composing solutions of automatically identified subtask. J. Artif. Intell. Res. 16, 59\u2013104 (2002)","journal-title":"J. Artif. Intell. Res."},{"key":"11_CR26","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-0891-5","volume-title":"State value learning with an anticipatory learning classifier system in a markov decision process","author":"M Butz","year":"2002","unstructured":"Butz, M.: State value learning with an anticipatory learning classifier system in a markov decision process. Technical report, Illinois Genetic Algorithms Laboratory (2002)"},{"issue":"1\/3","key":"11_CR27","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1007\/BF00114729","volume":"22","author":"S Koenig","year":"1996","unstructured":"Koenig, S., Simmons, R.G.: The effect of representation and knowledge on goal-directed exploration with reinforcement learning algorithms. Mach. Learn. 22(1\/3), 227\u2013250 (1996)","journal-title":"Mach. Learn."},{"key":"11_CR28","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1007\/s10732-007-9031-5","volume":"14","author":"RAC Bianchi","year":"2008","unstructured":"Bianchi, R.A.C., Ribeiro, C.H.C., Costa, A.H.R.: Accelerating autonomous learning by using heuristic selection of actions. J. Heuristics 14, 135\u2013168 (2008)","journal-title":"J. Heuristics"},{"issue":"3","key":"11_CR29","doi-asserted-by":"publisher","first-page":"226","DOI":"10.1109\/34.667881","volume":"20","author":"J Kittler","year":"1998","unstructured":"Kittler, J., Hatef, M., Duin, R.P.W., Matas, J.: On combining classifiers. IEEE Trans. Pattern Analysis Mach. Intell. 20(3), 226\u2013239 (1998)","journal-title":"IEEE Trans. Pattern Analysis Mach. Intell."},{"issue":"1","key":"11_CR30","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1007\/BF00153759","volume":"6","author":"DW Aha","year":"1991","unstructured":"Aha, D.W., Kibler, D., Albert, M.K.: Instance-based learning algorithms. Mach. Learn. 6(1), 37\u201366 (1991)","journal-title":"Mach. Learn."},{"issue":"8","key":"11_CR31","doi-asserted-by":"publisher","first-page":"773","DOI":"10.1002\/int.20493","volume":"26","author":"I Galvn","year":"2011","unstructured":"Galvn, I., Valls, J., Garca, M., Isasi, P.: A lazy learning approach for building classification models. Int. J. Intell. Syst. 26(8), 773\u2013786 (2011)","journal-title":"Int. J. Intell. Syst."},{"key":"11_CR32","doi-asserted-by":"publisher","first-page":"861","DOI":"10.1080\/08839510701526954","volume":"21","author":"F Enembreck","year":"2007","unstructured":"Enembreck, F., Avila, B.C., Scalabrini, E.E., Barthes, J.P.A.: Learning drifting negotiations. Appl. Artif. Intell. 21, 861\u2013881 (2007)","journal-title":"Appl. Artif. Intell."},{"key":"11_CR33","doi-asserted-by":"crossref","unstructured":"Pegoraro, R., Costa, A.H.R., Ribeiro, C.H.C.: Experience generalization for multi-agent reinforcement learning. In: Proceedings of XXI International Conference of the Chilean Computer Science Society, Punta Arenas, Chile, pp. 233\u2013239 (2001)","DOI":"10.1109\/SCCC.2001.972652"},{"key":"11_CR34","doi-asserted-by":"crossref","unstructured":"Ribeiro, R., Borges, A.P., Enembreck, F.: Interaction models for multiagent reinforcement learning. In: Proceedings of International Conferences on Computational Intelligence for Modelling, Control and Automation; Intelligent Agents, Web Technologies and Internet Commerce; and Innovation in Software Engineering, Vienna, Austria, pp. 464\u2013469 (2008)","DOI":"10.1109\/CIMCA.2008.98"},{"key":"11_CR35","doi-asserted-by":"publisher","first-page":"133","DOI":"10.22456\/2175-2745.8611","volume":"18","author":"R Ribeiro","year":"2011","unstructured":"Ribeiro, R., Borges, A.P., Ronszcka, A.F., Scalabrin, E., Avila, B.C., Enembreck, F.: Combinando modelos de interao para melhorar a coordenao em sistemas multiagente. Revista de Informtica Terica e Aplicada 18, 133\u2013157 (2011)","journal-title":"Revista de Informtica Terica e Aplicada"}],"container-title":["Lecture Notes in Business Information Processing","Enterprise Information Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-40654-6_11","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,30]],"date-time":"2025-04-30T18:07:42Z","timestamp":1746036462000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-642-40654-6_11"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013]]},"ISBN":["9783642406539","9783642406546"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-40654-6_11","relation":{},"ISSN":["1865-1348","1865-1356"],"issn-type":[{"type":"print","value":"1865-1348"},{"type":"electronic","value":"1865-1356"}],"subject":[],"published":{"date-parts":[[2013]]}}}