{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T18:43:46Z","timestamp":1780512226177,"version":"3.54.1"},"publisher-location":"Berlin, Heidelberg","reference-count":90,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642276446","type":"print"},{"value":"9783642276453","type":"electronic"}],"license":[{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-27645-3_15","type":"book-chapter","created":{"date-parts":[[2012,3,5]],"date-time":"2012-03-05T22:18:12Z","timestamp":1330985892000},"page":"471-503","source":"Crossref","is-referenced-by-count":64,"title":["Decentralized POMDPs"],"prefix":"10.1007","author":[{"given":"Frans A.","family":"Oliehoek","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","reference":[{"key":"15_CR1","doi-asserted-by":"crossref","unstructured":"Abdallah, S., Lesser, V.: Multiagent reinforcement learning and self-organization in a network of agents. In: Proc. of the International Joint Conference on Autonomous Agents and Multi Agent Systems, pp. 172\u2013179 (2007)","DOI":"10.1145\/1329125.1329172"},{"key":"15_CR2","unstructured":"Amato, C., Carlin, A., Zilberstein, S.: Bounded dynamic programming for decentralized POMDPs. In: Proc. of the AAMAS Workshop on Multi-Agent Sequential Decision Making in Uncertain Domains, MSDM (2007)"},{"key":"15_CR3","doi-asserted-by":"crossref","unstructured":"Amato, C., Dibangoye, J.S., Zilberstein, S.: Incremental policy generation for finite-horizon DEC-POMDPs. In: Proc. of the International Conference on Automated Planning and Scheduling, pp. 2\u20139 (2009)","DOI":"10.1609\/icaps.v19i1.13355"},{"issue":"3","key":"15_CR4","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1007\/s10458-009-9103-z","volume":"21","author":"C. Amato","year":"2010","unstructured":"Amato, C., Bernstein, D.S., Zilberstein, S.: Optimizing fixed-size stochastic controllers for POMDPs and decentralized POMDPs. Autonomous Agents and Multi-Agent Systems\u00a021(3), 293\u2013320 (2010)","journal-title":"Autonomous Agents and Multi-Agent Systems"},{"key":"15_CR5","unstructured":"Aras, R., Dutech, A., Charpillet, F.: Mixed integer linear programming for exact finite-horizon planning in decentralized POMDPs. In: Proc. of the International Conference on Automated Planning and Scheduling (2007)"},{"key":"15_CR6","doi-asserted-by":"crossref","unstructured":"Becker, R., Zilberstein, S., Lesser, V.: Decentralized Markov decision processes with event-driven interactions. In: Proc. of the International Joint Conference on Autonomous Agents and Multi Agent Systems, pp. 302\u2013309 (2004a)","DOI":"10.1145\/860575.860583"},{"key":"15_CR7","doi-asserted-by":"crossref","first-page":"423","DOI":"10.1613\/jair.1497","volume":"22","author":"R. Becker","year":"2004","unstructured":"Becker, R., Zilberstein, S., Lesser, V., Goldman, C.V.: Solving transition independent decentralized Markov decision processes. Journal of Artificial Intelligence Research\u00a022, 423\u2013455 (2004b)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"15_CR8","doi-asserted-by":"crossref","unstructured":"Becker, R., Lesser, V., Zilberstein, S.: Analyzing myopic approaches for multi-agent communication. In: Proc. of the International Conference on Intelligent Agent Technology, pp. 550\u2013557 (2005)","DOI":"10.1109\/IAT.2005.44"},{"issue":"4","key":"15_CR9","doi-asserted-by":"publisher","first-page":"819","DOI":"10.1287\/moor.27.4.819.297","volume":"27","author":"D.S. Bernstein","year":"2002","unstructured":"Bernstein, D.S., Givan, R., Immerman, N., Zilberstein, S.: The complexity of decentralized control of Markov decision processes. Mathematics of Operations Research\u00a027(4), 819\u2013840 (2002)","journal-title":"Mathematics of Operations Research"},{"key":"15_CR10","unstructured":"Bernstein, D.S., Hansen, E.A., Zilberstein, S.: Bounded policy iteration for decentralized POMDPs. In: Proc. of the International Joint Conference on Artificial Intelligence, pp. 1287\u20131292 (2005)"},{"key":"15_CR11","doi-asserted-by":"crossref","first-page":"89","DOI":"10.1613\/jair.2667","volume":"34","author":"D.S. Bernstein","year":"2009","unstructured":"Bernstein, D.S., Amato, C., Hansen, E.A., Zilberstein, S.: Policy iteration for decentralized control of Markov decision processes. Journal of Artificial Intelligence Research\u00a034, 89\u2013132 (2009)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"15_CR12","unstructured":"Boularias, A., Chaib-draa, B.: Exact dynamic programming for decentralized POMDPs with lossless policy compression. In: Proc. of the International Conference on Automated Planning and Scheduling (2008)"},{"key":"15_CR13","unstructured":"Boutilier, C.: Planning, learning and coordination in multiagent decision processes. In: Proc. of the 6th Conference on Theoretical Aspects of Rationality and Knowledge, pp. 195\u2013210 (1996)"},{"key":"15_CR14","unstructured":"Boyan, J.A., Littman, M.L.: Packet routing in dynamically changing networks: A reinforcement learning approach. In: Advances in Neural Information Processing Systems, vol.\u00a06, pp. 671\u2013678 (1993)"},{"issue":"2","key":"15_CR15","doi-asserted-by":"publisher","first-page":"156","DOI":"10.1109\/TSMCC.2007.913919","volume":"38","author":"L. Bu\u015foniu","year":"2008","unstructured":"Bu\u015foniu, L., Babu\u0161ka, R., De Schutter, B.: A comprehensive survey of multi-agent reinforcement learning. IEEE Transactions on Systems, Man, and Cybernetics, Part C: Applications and Reviews\u00a038(2), 156\u2013172 (2008)","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics, Part C: Applications and Reviews"},{"key":"15_CR16","unstructured":"Carlin, A., Zilberstein, S.: Value-based observation compression for DEC-POMDPs. In: Proc. of the International Joint Conference on Autonomous Agents and Multi Agent Systems, pp. 501\u2013508 (2008)"},{"key":"15_CR17","unstructured":"Chang, Y.H., Ho, T.: Mobilized ad-hoc networks: A reinforcement learning approach. In: Proceedings of the First International Conference on Autonomic Computing, pp. 240\u2013247 (2004)"},{"key":"15_CR18","unstructured":"Chang, Y.H., Ho, T., Kaelbling, L.P.: All learning is local: Multi-agent learning in global reward games. In: Advances in Neural Information Processing Systems, vol. 16 (2004)"},{"key":"15_CR19","unstructured":"Claus, C., Boutilier, C.: The dynamics of reinforcement learning in cooperative multiagent systems. In: Proc. of the National Conference on Artificial Intelligence, pp. 746\u2013752 (1998)"},{"key":"15_CR20","unstructured":"Cogill, R., Rotkowitz, M., Roy, B.V., Lall, S.: An approximate dynamic programming approach to decentralized control of stochastic systems. In: Proc. of the 2004 Allerton Conference on Communication, Control, and Computing (2004)"},{"issue":"2-3","key":"15_CR21","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1023\/A:1007518724497","volume":"33","author":"R.H. Crites","year":"1998","unstructured":"Crites, R.H., Barto, A.G.: Elevator group control using multiple reinforcement learning agents. Machine Learning\u00a033(2-3), 235\u2013262 (1998)","journal-title":"Machine Learning"},{"key":"15_CR22","unstructured":"Dibangoye, J.S., Mouaddib, A.I., Chai-draa, B.: Point-based incremental pruning heuristic for solving finite-horizon DEC-POMDPs. In: Proc. of the International Joint Conference on Autonomous Agents and Multi Agent Systems, pp. 569\u2013576 (2009)"},{"key":"15_CR23","doi-asserted-by":"crossref","unstructured":"Eker, B., Ak\u0131n, H.L.: Using evolution strategies to solve DEC-POMDP problems. Soft Computing - A Fusion of Foundations, Methodologies and Applications (2008)","DOI":"10.1007\/s00500-008-0388-7"},{"key":"15_CR24","unstructured":"Emery-Montemerlo, R., Gordon, G., Schneider, J., Thrun, S.: Approximate solutions for partially observable stochastic games with common payoffs. In: Proc. of the International Joint Conference on Autonomous Agents and Multi Agent Systems, pp. 136\u2013143 (2004)"},{"key":"15_CR25","unstructured":"Emery-Montemerlo, R., Gordon, G., Schneider, J., Thrun, S.: Game theoretic control for robot teams. In: Proc. of the IEEE International Conference on Robotics and Automation, pp. 1175\u20131181 (2005)"},{"key":"15_CR26","doi-asserted-by":"crossref","unstructured":"Goldman, C.V., Zilberstein, S.: Optimizing information exchange in cooperative multi-agent systems. In: Proc. of the International Joint Conference on Autonomous Agents and Multi Agent Systems, pp. 137\u2013144 (2003)","DOI":"10.1145\/860575.860598"},{"key":"15_CR27","doi-asserted-by":"crossref","first-page":"143","DOI":"10.1613\/jair.1427","volume":"22","author":"C.V. Goldman","year":"2004","unstructured":"Goldman, C.V., Zilberstein, S.: Decentralized control of cooperative systems: Categorization and complexity analysis. Journal of Artificial Intelligence Research\u00a022, 143\u2013174 (2004)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"15_CR28","doi-asserted-by":"crossref","first-page":"169","DOI":"10.1613\/jair.2466","volume":"32","author":"C.V. Goldman","year":"2008","unstructured":"Goldman, C.V., Zilberstein, S.: Communication-based decomposition mechanisms for decentralized MDPs. Journal of Artificial Intelligence Research\u00a032, 169\u2013202 (2008)","journal-title":"Journal of Artificial Intelligence Research"},{"issue":"1","key":"15_CR29","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1007\/s10458-006-0008-9","volume":"15","author":"C.V. Goldman","year":"2007","unstructured":"Goldman, C.V., Allen, M., Zilberstein, S.: Learning to communicate in a decentralized environment. Autonomous Agents and Multi-Agent Systems\u00a015(1), 47\u201390 (2007)","journal-title":"Autonomous Agents and Multi-Agent Systems"},{"key":"15_CR30","unstructured":"Guestrin, C., Lagoudakis, M., Parr, R.: Coordinated reinforcement learning. In: Proc. of the International Conference on Machine Learning, pp. 227\u2013234 (2002)"},{"key":"15_CR31","unstructured":"Hansen, E.A., Bernstein, D.S., Zilberstein, S.: Dynamic programming for partially observable stochastic games. In: Proc. of the National Conference on Artificial Intelligence, pp. 709\u2013715 (2004)"},{"key":"15_CR32","unstructured":"Kaisers, M., Tuyls, K.: Frequency adjusted multi-agent Q-learning. In: Proc. of the International Joint Conference on Autonomous Agents and Multi Agent Systems, pp. 309\u2013316 (2010)"},{"key":"15_CR33","unstructured":"Kim, Y., Nair, R., Varakantham, P., Tambe, M., Yokoo, M.: Exploiting locality of interaction in networked distributed POMDPs. In: Proc. of the AAAI Spring Symposium on Distributed Plan and Schedule Management (2006)"},{"key":"15_CR34","first-page":"1789","volume":"7","author":"J.R. Kok","year":"2006","unstructured":"Kok, J.R., Vlassis, N.: Collaborative multiagent reinforcement learning by payoff propagation. Journal of Machine Learning Research\u00a07, 1789\u20131828 (2006)","journal-title":"Journal of Machine Learning Research"},{"issue":"1-2","key":"15_CR35","doi-asserted-by":"publisher","first-page":"167","DOI":"10.1016\/S0004-3702(97)00023-4","volume":"94","author":"D. Koller","year":"1997","unstructured":"Koller, D., Pfeffer, A.: Representations and solutions for game-theoretic problems. Artificial Intelligence\u00a094(1-2), 167\u2013215 (1997)","journal-title":"Artificial Intelligence"},{"key":"15_CR36","doi-asserted-by":"crossref","unstructured":"Koller, D., Megiddo, N., von Stengel, B.: Fast algorithms for finding randomized strategies in game trees. In: Proc. of the 26th ACM Symposium on Theory of Computing, pp. 750\u2013759 (1994)","DOI":"10.1145\/195058.195451"},{"key":"15_CR37","unstructured":"Kumar, A., Zilberstein, S.: Constraint-based dynamic programming for decentralized POMDPs with structured interactions. In: Proc. of the International Joint Conference on Autonomous Agents and Multi Agent Systems, pp. 561\u2013568 (2009)"},{"key":"15_CR38","unstructured":"Kumar, A., Zilberstein, S.: Anytime planning for decentralized POMDPs using expectation maximization. In: Proc. of Uncertainty in Artificial Intelligence (2010a)"},{"key":"15_CR39","unstructured":"Kumar, A., Zilberstein, S.: Point-based backup for decentralized POMDPs: Complexity and new algorithms. In: Proc. of the International Joint Conference on Autonomous Agents and Multi Agent Systems, pp. 1315\u20131322 (2010b)"},{"key":"15_CR40","unstructured":"Madani, O., Hanks, S., Condon, A.: On the undecidability of probabilistic planning and infinite-horizon partially observable Markov decision problems. In: Proc. of the National Conference on Artificial Intelligence, pp. 541\u2013548 (1999)"},{"key":"15_CR41","unstructured":"Marecki, J., Gupta, T., Varakantham, P., Tambe, M., Yokoo, M.: Not all agents are equal: scaling up distributed POMDPs for agent networks. In: Proc. of the International Joint Conference on Autonomous Agents and Multi Agent Systems, pp. 485\u2013492 (2008)"},{"key":"15_CR42","doi-asserted-by":"crossref","unstructured":"Mostafa, H., Lesser, V.: Offline planning for communication by exploiting structured interactions in decentralized MDPs. In: Proc. of 2009 IEEE\/WIC\/ACM International Conference on Web Intelligence and Intelligent Agent Technology, pp. 193\u2013200 (2009)","DOI":"10.1109\/WI-IAT.2009.150"},{"key":"15_CR43","doi-asserted-by":"crossref","unstructured":"Nair, R., Tambe, M., Marsella, S.: Role allocation and reallocation in multiagent teams: towards a practical analysis. In: Proc. of the International Joint Conference on Autonomous Agents and Multi Agent Systems, pp. 552\u2013559 (2003a)","DOI":"10.1145\/860575.860664"},{"key":"15_CR44","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"150","DOI":"10.1007\/978-3-540-45135-8_12","volume-title":"RoboCup 2002: Robot Soccer World Cup VI","author":"R. Nair","year":"2003","unstructured":"Nair, R., Tambe, M., Marsella, S.C.: Team Formation for Reformation in Multiagent Domains Like RoboCupRescue. In: Kaminka, G.A., Lima, P.U., Rojas, R. (eds.) RoboCup 2002: Robot Soccer World Cup VI, LNCS (LNAI), vol.\u00a02752, pp. 150\u2013161. Springer, Heidelberg (2003)"},{"key":"15_CR45","unstructured":"Nair, R., Tambe, M., Yokoo, M., Pynadath, D.V., Marsella, S.: Taming decentralized POMDPs: Towards efficient policy computation for multiagent settings. In: Proc. of the International Joint Conference on Artificial Intelligence, pp. 705\u2013711 (2003c)"},{"key":"15_CR46","unstructured":"Nair, R., Roth, M., Yohoo, M.: Communication for improving policy computation in distributed POMDPs. In: Proc. of the International Joint Conference on Autonomous Agents and Multi Agent Systems, pp. 1098\u20131105 (2004)"},{"key":"15_CR47","unstructured":"Nair, R., Varakantham, P., Tambe, M., Yokoo, M.: Networked distributed POMDPs: A synthesis of distributed constraint optimization and POMDPs. In: Proc. of the National Conference on Artificial Intelligence, pp. 133\u2013139 (2005)"},{"key":"15_CR48","doi-asserted-by":"crossref","unstructured":"Oliehoek, F.A.: Value-based planning for teams of agents in stochastic partially observable environments. PhD thesis, Informatics Institute, University of Amsterdam (2010)","DOI":"10.5117\/9789056296100"},{"key":"15_CR49","doi-asserted-by":"crossref","unstructured":"Oliehoek, F.A., Vlassis, N.: Q-value functions for decentralized POMDPs. In: Proc. of The International Joint Conference on Autonomous Agents and Multi Agent Systems, pp. 833\u2013840 (2007)","DOI":"10.1145\/1329125.1329390"},{"key":"15_CR50","unstructured":"Oliehoek, F.A., Spaan, M.T.J., Vlassis, N.: Dec-POMDPs with delayed communication. In: AAMAS Workshop on Multi-agent Sequential Decision Making in Uncertain Domains (2007)"},{"key":"15_CR51","first-page":"341","volume":"32","author":"F.A. Oliehoek","year":"2008","unstructured":"Oliehoek, F.A., Kooi, J.F., Vlassis, N.: The cross-entropy method for policy search in decentralized POMDPs. Informatica\u00a032, 341\u2013357 (2008a)","journal-title":"Informatica"},{"key":"15_CR52","doi-asserted-by":"crossref","first-page":"289","DOI":"10.1613\/jair.2447","volume":"32","author":"F.A. Oliehoek","year":"2008","unstructured":"Oliehoek, F.A., Spaan, M.T.J., Vlassis, N.: Optimal and approximate Q-value functions for decentralized POMDPs. Journal of Artificial Intelligence Research\u00a032, 289\u2013353 (2008b)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"15_CR53","unstructured":"Oliehoek, F.A., Spaan, M.T.J., Whiteson, S., Vlassis, N.: Exploiting locality of interaction in factored Dec-POMDPs. In: Proc. of The International Joint Conference on Autonomous Agents and Multi Agent Systems, pp. 517\u2013524 (2008)"},{"key":"15_CR54","unstructured":"Oliehoek, F.A., Whiteson, S., Spaan, M.T.J.: Lossless clustering of histories in decentralized POMDPs. In: Proc. of The International Joint Conference on Autonomous Agents and Multi Agent Systems, pp. 577\u2013584 (2009)"},{"key":"15_CR55","unstructured":"Oliehoek, F.A., Spaan, M.T.J., Dibangoye, J., Amato, C.: Heuristic search for identical payoff Bayesian games. In: Proc. of The International Joint Conference on Autonomous Agents and Multi Agent Systems, pp. 1115\u20131122 (2010)"},{"key":"15_CR56","doi-asserted-by":"crossref","unstructured":"Ooi, J.M., Wornell, G.W.: Decentralized control of a multiple access broadcast channel: Performance bounds. In: Proc. of the 35th Conference on Decision and Control, pp. 293\u2013298 (1996)","DOI":"10.1109\/CDC.1996.574318"},{"key":"15_CR57","unstructured":"Osborne, M.J., Rubinstein, A.: A Course in Game Theory. The MIT Press (1994)"},{"key":"15_CR58","unstructured":"Pajarinen, J., Peltonen, J.: Efficient planning for factored infinite-horizon DEC-POMDPs. In: Proc. of the International Joint Conference on Artificial Intelligence (to appear, 2011)"},{"key":"15_CR59","doi-asserted-by":"crossref","unstructured":"Paquet, S., Tobin, L., Chaib-draa, B.: An online POMDP algorithm for complex multiagent environments. In: Proc. of the International Joint Conference on Autonomous Agents and Multi Agent Systems (2005)","DOI":"10.1145\/1082473.1082620"},{"key":"15_CR60","unstructured":"Peshkin, L.: Reinforcement learning by policy search. PhD thesis, Brown University (2001)"},{"key":"15_CR61","unstructured":"Peshkin, L., Kim, K.E., Meuleau, N., Kaelbling, L.P.: Learning to cooperate via policy search. In: Proc. of Uncertainty in Artificial Intelligence, pp. 307\u2013314 (2000)"},{"key":"15_CR62","doi-asserted-by":"crossref","first-page":"389","DOI":"10.1613\/jair.1024","volume":"16","author":"D.V. Pynadath","year":"2002","unstructured":"Pynadath, D.V., Tambe, M.: The communicative multiagent team decision problem: Analyzing teamwork theories and models. Journal of Artificial Intelligence Research\u00a016, 389\u2013423 (2002)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"15_CR63","doi-asserted-by":"crossref","unstructured":"Rabinovich, Z., Goldman, C.V., Rosenschein, J.S.: The complexity of multiagent systems: the price of silence. In: Proc. of the International Joint Conference on Autonomous Agents and Multi Agent Systems, pp. 1102\u20131103 (2003)","DOI":"10.1145\/860575.860816"},{"key":"15_CR64","doi-asserted-by":"publisher","first-page":"93","DOI":"10.1007\/1-4020-3389-3_8","volume-title":"Multi-Robot Systems. From Swarms to Intelligent Automata","author":"M. Roth","year":"2005","unstructured":"Roth, M., Simmons, R., Veloso, M.: Decentralized communication strategies for coordinated multi-agent policies. In: Parker, L.E., Schneider, F.E., Shultz, A.C. (eds.) Multi-Robot Systems. From Swarms to Intelligent Automata, vol.\u00a0III, pp. 93\u2013106. Springer, Heidelberg (2005a)"},{"key":"15_CR65","doi-asserted-by":"crossref","unstructured":"Roth, M., Simmons, R., Veloso, M.: Reasoning about joint beliefs for execution-time communication decisions. In: Proc. of the International Joint Conference on Autonomous Agents and Multi Agent Systems, pp. 786\u2013793 (2005b)","DOI":"10.1145\/1082473.1082593"},{"key":"15_CR66","doi-asserted-by":"crossref","unstructured":"Roth, M., Simmons, R., Veloso, M.: Exploiting factored representations for decentralized execution in multi-agent teams. In: Proc. of the International Joint Conference on Autonomous Agents and Multi Agent Systems, pp. 467\u2013463 (2007)","DOI":"10.1145\/1329125.1329213"},{"key":"15_CR67","unstructured":"Russell, S., Norvig, P.: Artificial Intelligence: A Modern Approach, 2nd edn. Pearson Education (2003)"},{"key":"15_CR68","unstructured":"Seuken, S., Zilberstein, S.: Improved memory-bounded dynamic programming for decentralized POMDPs. In: Proc. of Uncertainty in Artificial Intelligence (2007a)"},{"key":"15_CR69","unstructured":"Seuken, S., Zilberstein, S.: Memory-bounded dynamic programming for DEC-POMDPs. In: Proc. of the International Joint Conference on Artificial Intelligence, pp. 2009\u20132015 (2007b)"},{"issue":"2","key":"15_CR70","doi-asserted-by":"publisher","first-page":"190","DOI":"10.1007\/s10458-007-9026-5","volume":"17","author":"S. Seuken","year":"2008","unstructured":"Seuken, S., Zilberstein, S.: Formal models and algorithms for decentralized decision making under uncertainty. Autonomous Agents and Multi-Agent Systems\u00a017(2), 190\u2013250 (2008)","journal-title":"Autonomous Agents and Multi-Agent Systems"},{"key":"15_CR71","doi-asserted-by":"crossref","unstructured":"Singh, S.P., Jaakkola, T., Jordan, M.I.: Learning without state-estimation in partially observable Markovian decision processes. In: Proc. of the International Conference on Machine Learning, pp. 284\u2013292. Morgan Kaufmann (1994)","DOI":"10.1016\/B978-1-55860-335-6.50042-8"},{"key":"15_CR72","doi-asserted-by":"crossref","unstructured":"Spaan, M.T.J., Gordon, G.J., Vlassis, N.: Decentralized planning under uncertainty for teams of communicating agents. In: Proc. of the International Joint Conference on Autonomous Agents and Multi Agent Systems, pp. 249\u2013256 (2006)","DOI":"10.1145\/1160633.1160678"},{"key":"15_CR73","unstructured":"Spaan, M.T.J., Oliehoek, F.A., Amato, C.: Scaling up optimal heuristic search in Dec-POMDPs via incremental expansion. In: Proc. of the International Joint Conference on Artificial Intelligence (to appear, 2011)"},{"key":"15_CR74","unstructured":"Szer, D., Charpillet, F.: Point-based dynamic programming for DEC-POMDPs. In: Proc. of the National Conference on Artificial Intelligence (2006)"},{"key":"15_CR75","unstructured":"Szer, D., Charpillet, F., Zilberstein, S.: MAA*: A heuristic search algorithm for solving decentralized POMDPs. In: Proc. of Uncertainty in Artificial Intelligence, pp. 576\u2013583 (2005)"},{"issue":"1","key":"15_CR76","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1007\/s10458-005-3783-9","volume":"12","author":"K. Tuyls","year":"2006","unstructured":"Tuyls, K., Hoen, P.J., Vanschoenwinkel, B.: An evolutionary dynamical analysis of multi-agent learning in iterated games. Autonomous Agents and Multi-Agent Systems\u00a012(1), 115\u2013153 (2006)","journal-title":"Autonomous Agents and Multi-Agent Systems"},{"key":"15_CR77","doi-asserted-by":"crossref","unstructured":"Varakantham, P., Marecki, J., Yabu, Y., Tambe, M., Yokoo, M.: Letting loose a SPIDER on a network of POMDPs: Generating quality guaranteed policies. In: Proc. of the International Joint Conference on Autonomous Agents and Multi Agent Systems (2007)","DOI":"10.1145\/1329125.1329388"},{"key":"15_CR78","doi-asserted-by":"crossref","unstructured":"Varakantham, P., Young Kwak, J., Taylor, M.E., Marecki, J., Scerri, P., Tambe, M.: Exploiting coordination locales in distributed POMDPs via social model shaping. In: Proc. of the International Conference on Automated Planning and Scheduling (2009)","DOI":"10.1609\/icaps.v19i1.13369"},{"issue":"3-4","key":"15_CR79","doi-asserted-by":"publisher","first-page":"505","DOI":"10.1177\/0278364907084983","volume":"27","author":"P. Varshavskaya","year":"2008","unstructured":"Varshavskaya, P., Kaelbling, L.P., Rus, D.: Automated design of adaptive controllers for modular robots using reinforcement learning. International Journal of Robotics Research\u00a027(3-4), 505\u2013526 (2008)","journal-title":"International Journal of Robotics Research"},{"key":"15_CR80","doi-asserted-by":"crossref","unstructured":"Vlassis, N.: A Concise Introduction to Multiagent Systems and Distributed Artificial Intelligence. In: Synthesis Lectures on Artificial Intelligence and Machine Learning. Morgan & Claypool Publishers (2007)","DOI":"10.1007\/978-3-031-01543-4"},{"key":"15_CR81","unstructured":"Witwicki, S.J.: Abstracting influences for efficient multiagent coordination under uncertainty. PhD thesis, University of Michigan, Ann Arbor, Michigan, USA (2011)"},{"key":"15_CR82","doi-asserted-by":"crossref","unstructured":"Witwicki, S.J., Durfee, E.H.: Influence-based policy abstraction for weakly-coupled Dec-POMDPs. In: Proc. of the International Conference on Automated Planning and Scheduling, pp. 185\u2013192 (2010)","DOI":"10.1609\/icaps.v20i1.13419"},{"key":"15_CR83","unstructured":"Wu, F., Zilberstein, S., Chen, X.: Point-based policy generation for decentralized POMDPs. In: Proc. of the International Joint Conference on Autonomous Agents and Multi Agent Systems, pp. 1307\u20131314 (2010a)"},{"key":"15_CR84","unstructured":"Wu, F., Zilberstein, S., Chen, X.: Rollout sampling policy iteration for decentralized POMDPs. In: Proc. of Uncertainty in Artificial Intelligence (2010b)"},{"key":"15_CR85","doi-asserted-by":"crossref","unstructured":"Wu, F., Zilberstein, S., Chen, X.: Trial-based dynamic programming for multi-agent planning. In: Proc. of the National Conference on Artificial Intelligence, pp. 908\u2013914 (2010c)","DOI":"10.1609\/aaai.v24i1.7616"},{"issue":"2","key":"15_CR86","doi-asserted-by":"publisher","first-page":"487","DOI":"10.1016\/j.artint.2010.09.008","volume":"175","author":"F. Wu","year":"2011","unstructured":"Wu, F., Zilberstein, S., Chen, X.: Online planning for multi-agent systems with bounded communication. Artificial Intelligence\u00a0175(2), 487\u2013511 (2011)","journal-title":"Artificial Intelligence"},{"key":"15_CR87","doi-asserted-by":"crossref","unstructured":"Wu, J., Durfee, E.H.: Mixed-integer linear programming for transition-independent decentralized MDPs. In: Proc. of the International Joint Conference on Autonomous Agents and Multi Agent Systems, pp. 1058\u20131060 (2006)","DOI":"10.1145\/1160633.1160822"},{"key":"15_CR88","unstructured":"Wunder, M., Littman, M.L., Babes, M.: Classes of multiagent Q-learning dynamics with epsilon-greedy exploration. In: Proc. of the International Conference on Machine Learning, pp. 1167\u20131174 (2010)"},{"key":"15_CR89","doi-asserted-by":"crossref","unstructured":"Xuan, P., Lesser, V., Zilberstein, S.: Communication decisions in multi-agent cooperation: Model and experiments. In: Proc. of the International Conference on Autonomous Agents (2001)","DOI":"10.1145\/375735.376469"},{"key":"15_CR90","unstructured":"Zettlemoyer, L.S., Milch, B., Kaelbling, L.P.: Multi-agent filtering with infinitely nested beliefs. In: Advances in Neural Information Processing Systems, vol.\u00a021 (2009)"}],"container-title":["Adaptation, Learning, and Optimization","Reinforcement Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-27645-3_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,22]],"date-time":"2025-03-22T13:02:21Z","timestamp":1742648541000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-642-27645-3_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642276446","9783642276453"],"references-count":90,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-27645-3_15","relation":{},"ISSN":["1867-4534","1867-4542"],"issn-type":[{"value":"1867-4534","type":"print"},{"value":"1867-4542","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012]]}}}