{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T21:56:47Z","timestamp":1765231007318},"reference-count":47,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2014,4,13]],"date-time":"2014-04-13T00:00:00Z","timestamp":1397347200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Auton Agent Multi-Agent Syst"],"published-print":{"date-parts":[[2015,5]]},"DOI":"10.1007\/s10458-014-9261-5","type":"journal-article","created":{"date-parts":[[2014,4,12]],"date-time":"2014-04-12T12:17:52Z","timestamp":1397305072000},"page":"455-494","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Scalable solutions of interactive POMDPs using generalized and bounded policy iteration"],"prefix":"10.1007","volume":"29","author":[{"given":"Ekhlas","family":"Sonu","sequence":"first","affiliation":[]},{"given":"Prashant","family":"Doshi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,4,13]]},"reference":[{"issue":"3","key":"9261_CR1","doi-asserted-by":"crossref","first-page":"293","DOI":"10.1007\/s10458-009-9103-z","volume":"21","author":"C Amato","year":"2010","unstructured":"Amato, C., Bernstein, D. S., & Zilberstein, S. (2010). Optimizing fixed-size stochastic controllers for POMDPs and decentralized POMDPs. Journal of Autonomous Agents and Multi-Agent Systems, 21(3), 293\u2013320.","journal-title":"Journal of Autonomous Agents and Multi-Agent Systems"},{"key":"9261_CR2","doi-asserted-by":"crossref","unstructured":"Amato, C., Bonet, B. & Zilberstein, S. (2010). Finite-state controllers based on Mealy machines for centralized and decentralized POMDPs. In Twenty Fourth AAAI Conference on Artificial Intelligence (AAAI), pp. 1052\u20131058.","DOI":"10.1609\/aaai.v24i1.7748"},{"key":"9261_CR3","doi-asserted-by":"crossref","first-page":"301","DOI":"10.1007\/s001820050112","volume":"28","author":"RJ Aumann","year":"1999","unstructured":"Aumann, R. J. (1999). Interactive epistemology II: Probability. International Journal of Game Theory, 28, 301\u2013314.","journal-title":"International Journal of Game Theory"},{"key":"9261_CR4","doi-asserted-by":"crossref","first-page":"89","DOI":"10.1613\/jair.2667","volume":"34","author":"DS Bernstein","year":"2009","unstructured":"Bernstein, D. S., Amato, C., Hansen, E., & Zilberstein, S. (2009). Policy iteration for decentralized control of Markov decision processes. Journal of Artificial Intelligence Research, 34, 89\u2013132.","journal-title":"Journal of Artificial Intelligence Research"},{"issue":"4","key":"9261_CR5","doi-asserted-by":"crossref","first-page":"819","DOI":"10.1287\/moor.27.4.819.297","volume":"27","author":"DS Bernstein","year":"2002","unstructured":"Bernstein, D. S., Givan, R., Immerman, N., & Zilberstein, S. (2002). The complexity of decentralized control of Markov decision processes. Mathematics of Operations Research, 27(4), 819\u2013840.","journal-title":"Mathematics of Operations Research"},{"key":"9261_CR6","doi-asserted-by":"crossref","first-page":"189","DOI":"10.1006\/jeth.1993.1012","volume":"59","author":"A Brandenburger","year":"1993","unstructured":"Brandenburger, A., & Dekel, E. (1993). Hierarchies of beliefs and common knowledge. Journal of Economic Theory, 59, 189\u2013198.","journal-title":"Journal of Economic Theory"},{"key":"9261_CR7","unstructured":"Cassandra, A. R., Littman, M. L., & Zhang, N. L. (1997). Incremental pruning: A simple, fast, exact method for partially observable Markov decision processes. In Proceedings of the Thirteenth Conference on Uncertainty in Artificial Intelligence. San Francisco: Morgan Kaufmann Publishers."},{"key":"9261_CR8","volume-title":"Intentional systems. Brainstorms","author":"D Dennett","year":"1986","unstructured":"Dennett, D. (1986). Intentional systems. Brainstorms. Cambridge, MA: MIT Press."},{"key":"9261_CR9","unstructured":"Dongarra, J., Lumsdaine, A., Pozo, R. & Remington, K. (1992). A sparse matrix library in c++ for high performance architectures. In Second Object Oriented Numerics Conference, pp. 214\u2013218."},{"issue":"4","key":"9261_CR10","doi-asserted-by":"crossref","first-page":"82","DOI":"10.1609\/aimag.v33i4.2402","volume":"33","author":"P Doshi","year":"2012","unstructured":"Doshi, P. (2012). Decision making in complex multiagent settings: A tale of two frameworks. AI Magazine, 33(4), 82\u201395.","journal-title":"AI Magazine"},{"key":"9261_CR11","doi-asserted-by":"crossref","first-page":"297","DOI":"10.1613\/jair.2630","volume":"34","author":"P Doshi","year":"2009","unstructured":"Doshi, P., & Gmytrasiewicz, P. J. (2009). Monte Carlo sampling methods for approximating interactive POMDPs. Journal of Artificial Intelligence Research, 34, 297\u2013337.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"9261_CR12","unstructured":"Doshi, P. & Perez, D. (2008). Generalized point based value iteration for interactive POMDPs. In Twenty third conference on artificial intelligence (AAAI) (pp. 63\u201368)."},{"key":"9261_CR13","unstructured":"Doshi, P., Qu, X., Goodie, A. & Young, D. (2010). Modeling recursive reasoning in humans using empirically informed interactive POMDPs. In International autonomous agents and multiagent systems conference (AAMAS) (pp. 1223\u20131230)."},{"key":"9261_CR14","volume-title":"The theory of learning in games","author":"D Fudenberg","year":"1998","unstructured":"Fudenberg, D., & Levine, D. K. (1998). The theory of learning in games. Cambridge, MA: MIT Press."},{"key":"9261_CR15","unstructured":"Giudice, A. D. & Gmytrasiewicz, P. (2007). Towards strategic Kriegspiel play with opponent modeling. In Game theoretic and decision-theoretic agents, AAAI spring symposium (pp. 17\u201322)."},{"key":"9261_CR16","unstructured":"Giudice, A. D. & Gmytrasiewicz, P. (2009). Towards strategic Kriegspiel play with opponent modeling (extended abstract). In Autonomous agents and multiagent systems conference (AAMAS) (pp. 1265\u20131266)"},{"key":"9261_CR17","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1613\/jair.1579","volume":"24","author":"PJ Gmytrasiewicz","year":"2005","unstructured":"Gmytrasiewicz, P. J., & Doshi, P. (2005). A framework for sequential planning in multiagent settings. Journal of Artificial Intelligence Research, 24, 49\u201379.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"9261_CR18","unstructured":"Guo, Q., & Gmytrasiewicz, P. J. (2011). Modeling bounded rationality of agents during interactions (extended abstract). In International joint conference on autonomous agents and multi agent systems (AAMAS) (pp. 1285\u20131286)."},{"key":"9261_CR19","unstructured":"Hansen, E. (1998). Solving POMDPs by searching in policy space. In Uncertainty in artificial intelligence (UAI) (pp. 211\u2013219)."},{"key":"9261_CR20","unstructured":"Hansen, E. (2008). Sparse stochastic finite-state controllers for POMDPs. In Proceedings of the 24th Conference on Uncertainty in Artificial Intelligence (UAI) (pp. 256\u2013263). AUAI Press."},{"issue":"3","key":"9261_CR21","doi-asserted-by":"crossref","first-page":"159","DOI":"10.1287\/mnsc.14.3.159","volume":"14","author":"JC Harsanyi","year":"1967","unstructured":"Harsanyi, J. C. (1967). Games with incomplete information played by Bayesian players. Management Science, 14(3), 159\u2013182.","journal-title":"Management Science"},{"key":"9261_CR22","unstructured":"Hoey, J., & Poupart, P. (2005). Solving POMDPs with continuous or large discrete observation spaces. In International joint conference on AI (IJCAI) (pp. 1332\u20131338)."},{"key":"9261_CR23","volume-title":"Introduction to automata theory, languages, and computation","author":"J Hopcroft","year":"1979","unstructured":"Hopcroft, J., & Ullman, J. (1979). Introduction to automata theory, languages, and computation. Reading, MA: Addison-wesley."},{"key":"9261_CR24","doi-asserted-by":"crossref","first-page":"99","DOI":"10.1016\/S0004-3702(98)00023-X","volume":"101","author":"L Kaelbling","year":"1998","unstructured":"Kaelbling, L., Littman, M., & Cassandra, A. (1998). Planning and acting in partially observable stochastic domains. Artificial Intelligence, 101, 99\u2013134.","journal-title":"Artificial Intelligence"},{"key":"9261_CR25","first-page":"159","volume-title":"Inequalities","author":"V Klee","year":"1972","unstructured":"Klee, V., & Minty, G. J. (1972). How good is the simplex algorithm? In O. Shisha (Ed.), Inequalities (Vol. III, pp. 159\u2013175). New York: Academic Press."},{"key":"9261_CR26","unstructured":"Kurniawati, H., Hsu, D., & Lee, W. S. (2008). SARSOP: Efficient point-based POMDP planning by approximating optimally reachable belief spaces. In Robotics: Science and Systems (pp. 65\u201372)."},{"key":"9261_CR27","unstructured":"Meissner, C. (2011). A complex game of cat and mouse. Lawrence Livermore National Laboratory Science and Technology Review (pp. 18\u201321)."},{"key":"9261_CR28","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/BF01770224","volume":"14","author":"J Mertens","year":"1985","unstructured":"Mertens, J., & Zamir, S. (1985). Formulation of Bayesian analysis for games with incomplete information. International Journal of Game Theory, 14, 1\u201329.","journal-title":"International Journal of Game Theory"},{"key":"9261_CR29","unstructured":"Nair, R., Tambe, M., Yokoo, M., Pynadath, D., & Marsella, S. (2003). Taming decentralized POMDPs: Towards efficient policy computation for multiagent settings. In: International joint conference on artificial intelligence (IJCAI) (pp. 705\u2013711)."},{"key":"9261_CR30","doi-asserted-by":"crossref","unstructured":"Ng, B., Meyers, C., Boakye, K., & Nitao, J. (2010). Towards applying interactive POMDPs to real-world adversary modeling. In Innovative applications in artificial intelligence (IAAI) (pp. 1814\u20131820).","DOI":"10.1609\/aaai.v24i2.18818"},{"issue":"3","key":"9261_CR31","doi-asserted-by":"crossref","first-page":"441","DOI":"10.1287\/moor.12.3.441","volume":"12","author":"CH Papadimitriou","year":"1987","unstructured":"Papadimitriou, C. H., & Tsitsiklis, J. N. (1987). The complexity of Markov decision processes. Mathematics of Operations Research, 12(3), 441\u2013450.","journal-title":"Mathematics of Operations Research"},{"key":"9261_CR32","doi-asserted-by":"crossref","first-page":"335","DOI":"10.1613\/jair.2078","volume":"27","author":"J Pineau","year":"2006","unstructured":"Pineau, J., Gordon, G., & Thrun, S. (2006). Anytime point-based value iteration for large POMDPs. Journal of Artificial Intelligence Research, 27, 335\u2013380.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"9261_CR33","unstructured":"Poupart, P., & Boutilier, C. (2003). Bounded finite state controllers. In Neural Information Processing Systems (NIPS) (pp. 823\u2013830)."},{"key":"9261_CR34","unstructured":"Poupart, P., & Boutilier, C. (2004). VDCBPI: An approximate algorithm scalable for large-scale POMDPs. In Neural Information Processing Systems (NIPS) (pp. 1081\u20131088)."},{"key":"9261_CR35","doi-asserted-by":"crossref","unstructured":"Rathnasabapathy, B., Doshi, P., & Gmytrasiewicz, P.J. (2006). Exact solutions to interactive POMDPs using behavioral equivalence. In Autonomous agents and multi-agent systems conference (AAMAS) (pp. 1025\u20131032).","DOI":"10.1145\/1160633.1160816"},{"issue":"2","key":"9261_CR36","doi-asserted-by":"crossref","first-page":"190","DOI":"10.1007\/s10458-007-9026-5","volume":"17","author":"S Seuken","year":"2008","unstructured":"Seuken, S., & Zilberstein, S. (2008). Formal models and algorithms for decentralized decision making under uncertainty. Journal of Autonomous Agents and Multiagent Systems, 17(2), 190\u2013250.","journal-title":"Journal of Autonomous Agents and Multiagent Systems"},{"key":"9261_CR37","unstructured":"Seymour, R., & Peterson, G. L. (2009). Responding to sneaky agents in multi-agent domains. In Florida artificial intelligence research society conference (FLAIRS) (pp. 99\u2013104)."},{"key":"9261_CR38","doi-asserted-by":"crossref","unstructured":"Seymour, R., & Peterson, G. L. (2009). A trust-based multiagent system. In IEEE international conference on computational science and engineering (pp. 109\u2013116).","DOI":"10.1109\/CSE.2009.297"},{"key":"9261_CR39","doi-asserted-by":"crossref","first-page":"1071","DOI":"10.1287\/opre.21.5.1071","volume":"21","author":"R Smallwood","year":"1973","unstructured":"Smallwood, R., & Sondik, E. (1973). The optimal control of partially observable Markov decision processes over a finite horizon. Operations Research, 21, 1071\u20131088.","journal-title":"Operations Research"},{"key":"9261_CR40","unstructured":"Smith, T., & Simmons, R. (2004). Heuristic search value iteration for POMDPs. In Uncertainty in artificial intelligence (UAI) (pp. 520\u2013527)."},{"issue":"2","key":"9261_CR41","doi-asserted-by":"crossref","first-page":"282","DOI":"10.1287\/opre.26.2.282","volume":"26","author":"EJ Sondik","year":"1978","unstructured":"Sondik, E. J. (1978). The optimal control of partially observable Markov processes over the infinite horizon: Discounted cost. Operations Research, 26(2), 282\u2013304.","journal-title":"Operations Research"},{"issue":"3","key":"9261_CR42","doi-asserted-by":"crossref","first-page":"385","DOI":"10.1145\/990308.990310","volume":"51","author":"DA Spielman","year":"2004","unstructured":"Spielman, D. A., & Teng, S. H. (2004). Smoothed analysis of algorithms: Why the simplex algorithm usually takes polynomial time. Journal of the ACM, 51(3), 385\u2013463.","journal-title":"Journal of the ACM"},{"key":"9261_CR43","doi-asserted-by":"crossref","unstructured":"Wang, F. (2013). An I-POMDP based multi-agent architecture for dialogue tutoring. In: International conference on advanced information and communication technology for education (ICAICTE) (pp. 486\u2013489).","DOI":"10.2991\/icaicte.2013.96"},{"key":"9261_CR44","unstructured":"Woodward, M. P., & Wood, R. J. (2012). Learning from humans as an I-POMDP. CoRR abs\/1204.0274."},{"key":"9261_CR45","unstructured":"Wunder, M., Kaisers, M., Yaros, J., & Littman, M. (2011). Using iterated reasoning to predict opponent strategies. In International conference on autonomous agents and multi-agent systems (AAMAS) (pp. 593\u2013600)."},{"issue":"2","key":"9261_CR46","doi-asserted-by":"crossref","first-page":"150","DOI":"10.1016\/j.csl.2009.04.001","volume":"24","author":"S Young","year":"2010","unstructured":"Young, S., Gai, M., Keizer, S., Mairesse, F., Schatzmann, J., Thomson, B., et al. (2010). The hidden information state model: A practical framework for pomdp-based spoken dialogue management. Computer Speech and Language, 24(2), 150\u2013174.","journal-title":"Computer Speech and Language"},{"key":"9261_CR47","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1613\/jair.3461","volume":"43","author":"Y Zeng","year":"2012","unstructured":"Zeng, Y., & Doshi, P. (2012). Exploiting model equivalences for solving interactive dynamic influence diagrams. Journal of Artificial Intelligence Research, 43, 211\u2013255.","journal-title":"Journal of Artificial Intelligence Research"}],"container-title":["Autonomous Agents and Multi-Agent Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-014-9261-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10458-014-9261-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-014-9261-5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,12]],"date-time":"2023-07-12T02:28:17Z","timestamp":1689128897000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10458-014-9261-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,4,13]]},"references-count":47,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2015,5]]}},"alternative-id":["9261"],"URL":"https:\/\/doi.org\/10.1007\/s10458-014-9261-5","relation":{},"ISSN":["1387-2532","1573-7454"],"issn-type":[{"value":"1387-2532","type":"print"},{"value":"1573-7454","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,4,13]]}}}