{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T23:19:21Z","timestamp":1776122361683,"version":"3.50.1"},"publisher-location":"Berlin, Heidelberg","reference-count":114,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642276446","type":"print"},{"value":"9783642276453","type":"electronic"}],"license":[{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-27645-3_12","type":"book-chapter","created":{"date-parts":[[2012,3,5]],"date-time":"2012-03-05T22:18:12Z","timestamp":1330985892000},"page":"387-414","source":"Crossref","is-referenced-by-count":197,"title":["Partially Observable Markov Decision Processes"],"prefix":"10.1007","author":[{"given":"Matthijs T. J.","family":"Spaan","sequence":"first","affiliation":[]}],"member":"297","reference":[{"key":"12_CR1","unstructured":"Aberdeen, D., Baxter, J.: Scaling internal-state policy-gradient methods for POMDPs. In: International Conference on Machine Learning (2002)"},{"issue":"1","key":"12_CR2","doi-asserted-by":"publisher","first-page":"174","DOI":"10.1016\/0022-247X(65)90154-X","volume":"10","author":"K.J. \u00c5str\u00f6m","year":"1965","unstructured":"\u00c5str\u00f6m, K.J.: Optimal control of Markov processes with incomplete state information. Journal of Mathematical Analysis and Applications\u00a010(1), 174\u2013205 (1965)","journal-title":"Journal of Mathematical Analysis and Applications"},{"key":"12_CR3","unstructured":"Bagnell, J.A., Kakade, S., Ng, A.Y., Schneider, J.: Policy search by dynamic programming. In: Advances in Neural Information Processing Systems, vol.\u00a016. MIT Press (2004)"},{"key":"12_CR4","unstructured":"Baird, L., Moore, A.: Gradient descent for general reinforcement learning. In: Advances in Neural Information Processing Systems, vol.\u00a011. MIT Press (1999)"},{"key":"12_CR5","unstructured":"Bakker, B.: Reinforcement learning with long short-term memory. In: Advances in Neural Information Processing Systems, vol.\u00a014. MIT Press (2002)"},{"key":"12_CR6","doi-asserted-by":"crossref","first-page":"319","DOI":"10.1613\/jair.806","volume":"15","author":"J. Baxter","year":"2001","unstructured":"Baxter, J., Bartlett, P.L.: Infinite-horizon policy-gradient estimation. Journal of Artificial Intelligence Research\u00a015, 319\u2013350 (2001)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"12_CR7","doi-asserted-by":"crossref","first-page":"351","DOI":"10.1613\/jair.807","volume":"15","author":"J. Baxter","year":"2001","unstructured":"Baxter, J., Bartlett, P.L., Weaver, L.: Experiments with infinite-horizon, policy-gradient estimation. Journal of Artificial Intelligence Research\u00a015, 351\u2013381 (2001)","journal-title":"Journal of Artificial Intelligence Research"},{"issue":"4","key":"12_CR8","doi-asserted-by":"publisher","first-page":"819","DOI":"10.1287\/moor.27.4.819.297","volume":"27","author":"D.S. Bernstein","year":"2002","unstructured":"Bernstein, D.S., Givan, R., Immerman, N., Zilberstein, S.: The complexity of decentralized control of Markov decision processes. Mathematics of Operations Research\u00a027(4), 819\u2013840 (2002)","journal-title":"Mathematics of Operations Research"},{"key":"12_CR9","unstructured":"Bonet, B.: An epsilon-optimal grid-based algorithm for partially observable Markov decision processes. In: International Conference on Machine Learning (2002)"},{"key":"12_CR10","unstructured":"Boutilier, C., Poole, D.: Computing optimal policies for partially observable decision processes using compact representations. In: Proc. of the National Conference on Artificial Intelligence (1996)"},{"key":"12_CR11","unstructured":"Brafman, R.I.: A heuristic variable grid solution method for POMDPs. In: Proc. of the National Conference on Artificial Intelligence (1997)"},{"key":"12_CR12","unstructured":"Braziunas, D., Boutilier, C.: Stochastic local search for POMDP controllers. In: Proc. of the National Conference on Artificial Intelligence (2004)"},{"key":"12_CR13","unstructured":"Brunskill, E., Kaelbling, L., Lozano-Perez, T., Roy, N.: Continuous-state POMDPs with hybrid dynamics. In: Proc. of the Int. Symposium on Artificial Intelligence and Mathematics (2008)"},{"key":"12_CR14","unstructured":"Cassandra, A.R.: Exact and approximate algorithms for partially observable Markov decision processes. PhD thesis, Brown University (1998)"},{"key":"12_CR15","unstructured":"Cassandra, A.R., Kaelbling, L.P., Littman, M.L.: Acting optimally in partially observable stochastic domains. In: Proc. of the National Conference on Artificial Intelligence (1994)"},{"key":"12_CR16","unstructured":"Cassandra, A.R., Kaelbling, L.P., Kurien, J.A.: Acting under uncertainty: Discrete Bayesian models for mobile robot navigation. In: Proc. of International Conference on Intelligent Robots and Systems (1996)"},{"key":"12_CR17","unstructured":"Cassandra, A.R., Littman, M.L., Zhang, N.L.: Incremental pruning: A simple, fast, exact method for partially observable Markov decision processes. In: Proc. of Uncertainty in Artificial Intelligence (1997)"},{"key":"12_CR18","unstructured":"Cheng, H.T.: Algorithms for partially observable Markov decision processes. PhD thesis, University of British Columbia (1988)"},{"key":"12_CR19","unstructured":"Doshi, F., Roy, N.: The permutable POMDP: fast solutions to POMDPs for preference elicitation. In: Proc. of Int. Conference on Autonomous Agents and Multi Agent Systems (2008)"},{"key":"12_CR20","unstructured":"Drake, A.W.: Observation of a Markov process through a noisy channel. Sc.D. thesis, Massachusetts Institute of Technology (1962)"},{"key":"12_CR21","unstructured":"Duff, M.: Optimal learning: Computational procedures for Bayes-adaptive Markov decision processes. PhD thesis, University of Massachusetts, Amherst (2002)"},{"issue":"1","key":"12_CR22","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1137\/1110001","volume":"10","author":"E.B. Dynkin","year":"1965","unstructured":"Dynkin, E.B.: Controlled random sequences. Theory of Probability and its Applications\u00a010(1), 1\u201314 (1965)","journal-title":"Theory of Probability and its Applications"},{"issue":"2","key":"12_CR23","doi-asserted-by":"publisher","first-page":"92","DOI":"10.1061\/(ASCE)1076-0342(1995)1:2(92)","volume":"1","author":"J.H. Ellis","year":"1995","unstructured":"Ellis, J.H., Jiang, M., Corotis, R.: Inspection, maintenance, and repair with partial observability. Journal of Infrastructure Systems\u00a01(2), 92\u201399 (1995)","journal-title":"Journal of Infrastructure Systems"},{"key":"12_CR24","unstructured":"Feng, Z., Zilberstein, S.: Region-based incremental pruning for POMDPs. In: Proc. of Uncertainty in Artificial Intelligence (2004)"},{"issue":"7","key":"12_CR25","doi-asserted-by":"publisher","first-page":"561","DOI":"10.1016\/j.robot.2007.01.004","volume":"55","author":"A. Foka","year":"2007","unstructured":"Foka, A., Trahanias, P.: Real-time hierarchical POMDPs for autonomous robot navigation. Robotics and Autonomous Systems\u00a055(7), 561\u2013571 (2007)","journal-title":"Robotics and Autonomous Systems"},{"key":"12_CR26","doi-asserted-by":"crossref","first-page":"391","DOI":"10.1613\/jair.616","volume":"11","author":"D. Fox","year":"1999","unstructured":"Fox, D., Burgard, W., Thrun, S.: Markov localization for mobile robots in dynamic environments. Journal of Artificial Intelligence Research\u00a011, 391\u2013427 (1999)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"12_CR27","doi-asserted-by":"crossref","unstructured":"Haight, R.G., Polasky, S.: Optimal control of an invasive species with imperfect information about the level of infestation. Resource and Energy Economics (2010) (in Press, Corrected Proof)","DOI":"10.1016\/j.reseneeco.2010.04.005"},{"key":"12_CR28","unstructured":"Hansen, E.A.: Finite-memory control of partially observable systems. PhD thesis, University of Massachusetts, Amherst (1998a)"},{"key":"12_CR29","unstructured":"Hansen, E.A.: Solving POMDPs by searching in policy space. In: Proc. of Uncertainty in Artificial Intelligence (1998b)"},{"key":"12_CR30","unstructured":"Hansen, E.A., Feng, Z.: Dynamic programming for POMDPs using a factored state representation. In: Int. Conf. on Artificial Intelligence Planning and Scheduling (2000)"},{"key":"12_CR31","doi-asserted-by":"crossref","first-page":"33","DOI":"10.1613\/jair.678","volume":"13","author":"M. Hauskrecht","year":"2000","unstructured":"Hauskrecht, M.: Value function approximations for partially observable Markov decision processes. Journal of Artificial Intelligence Research\u00a013, 33\u201395 (2000)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"12_CR32","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1016\/S0933-3657(99)00042-1","volume":"18","author":"M. Hauskrecht","year":"2000","unstructured":"Hauskrecht, M., Fraser, H.: Planning treatment of ischemic heart disease with partially observable Markov decision processes. Artificial Intelligence in Medicine\u00a018, 221\u2013244 (2000)","journal-title":"Artificial Intelligence in Medicine"},{"issue":"8","key":"12_CR33","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S. Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Computation\u00a09(8), 1735\u20131780 (1997)","journal-title":"Neural Computation"},{"issue":"7","key":"12_CR34","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TPAMI.2007.1145","volume":"29","author":"J. Hoey","year":"2007","unstructured":"Hoey, J., Little, J.J.: Value-directed human behavior analysis from video using partially observable Markov decision processes. IEEE Transactions on Pattern Analysis and Machine Intelligence\u00a029(7), 1\u201315 (2007)","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"12_CR35","unstructured":"Hoey, J., Poupart, P.: Solving POMDPs with continuous or large discrete observation spaces. In: Proc. Int. Joint Conf. on Artificial Intelligence (2005)"},{"key":"12_CR36","doi-asserted-by":"crossref","unstructured":"Hsiao, K., Kaelbling, L., Lozano-Perez, T.: Grasping pomdps. In: Proc. of the IEEE Int. Conf. on Robotics and Automation, pp. 4685\u20134692 (2007)","DOI":"10.1109\/ROBOT.2007.364201"},{"key":"12_CR37","unstructured":"Jaakkola, T., Singh, S.P., Jordan, M.I.: Reinforcement learning algorithm for partially observable Markov decision problems. In: Advances in Neural Information Processing Systems, vol.\u00a07 (1995)"},{"key":"12_CR38","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"601","DOI":"10.1007\/11564096_59","volume-title":"Machine Learning: ECML 2005","author":"R. Jaulmes","year":"2005","unstructured":"Jaulmes, R., Pineau, J., Precup, D.: Active Learning in Partially Observable Markov Decision Processes. In: Gama, J., Camacho, R., Brazdil, P.B., Jorge, A.M., Torgo, L. (eds.) ECML 2005. LNCS (LNAI), vol.\u00a03720, pp. 601\u2013608. Springer, Heidelberg (2005)"},{"key":"12_CR39","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1016\/S0004-3702(98)00023-X","volume":"101","author":"L.P. Kaelbling","year":"1998","unstructured":"Kaelbling, L.P., Littman, M.L., Cassandra, A.R.: Planning and acting in partially observable stochastic domains. Artificial Intelligence\u00a0101, 99\u2013134 (1998)","journal-title":"Artificial Intelligence"},{"key":"12_CR40","unstructured":"Kearns, M., Mansour, Y., Ng, A.Y.: Approximate planning in large POMDPs via reusable trajectories. In: Advances in Neural Information Processing Systems, vol.\u00a012. MIT Press (2000)"},{"key":"12_CR41","unstructured":"Koenig, S., Simmons, R.: Unsupervised learning of probabilistic models for robot navigation. In: Proc. of the IEEE Int. Conf. on Robotics and Automation (1996)"},{"key":"12_CR42","doi-asserted-by":"crossref","unstructured":"Kurniawati, H., Hsu, D., Lee, W.: SARSOP: Efficient point-based POMDP planning by approximating optimally reachable belief spaces. In: Robotics: Science and Systems (2008)","DOI":"10.15607\/RSS.2008.IV.009"},{"key":"12_CR43","unstructured":"Lin, L., Mitchell, T.: Memory approaches to reinforcement learning in non-Markovian domains. Tech. rep., Carnegie Mellon University, Pittsburgh, PA, USA (1992)"},{"issue":"1","key":"12_CR44","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1287\/ijoc.1020.0024","volume":"16","author":"Z.Z. Lin","year":"2004","unstructured":"Lin, Z.Z., Bean, J.C., White, C.C.: A hybrid genetic\/optimization algorithm for finite horizon, partially observed Markov decision processes. INFORMS Journal on Computing\u00a016(1), 27\u201338 (2004)","journal-title":"INFORMS Journal on Computing"},{"key":"12_CR45","first-page":"238","volume-title":"Proc. of the 3rd Int. Conf. on Simulation of Adaptive Behavior : from Animals to Animats 3","author":"M.L. Littman","year":"1994","unstructured":"Littman, M.L.: Memoryless policies: theoretical limitations and practical results. In: Proc. of the 3rd Int. Conf. on Simulation of Adaptive Behavior: from Animals to Animats 3, pp. 238\u2013245. MIT Press, Cambridge (1994)"},{"key":"12_CR46","unstructured":"Littman, M.L.: Algorithms for sequential decision making. PhD thesis, Brown University (1996)"},{"key":"12_CR47","doi-asserted-by":"crossref","unstructured":"Littman, M.L., Cassandra, A.R., Kaelbling, L.P.: Learning policies for partially observable environments: Scaling up. In: International Conference on Machine Learning (1995)","DOI":"10.1016\/B978-1-55860-377-6.50052-9"},{"key":"12_CR48","unstructured":"Littman, M.L., Sutton, R.S., Singh, S.: Predictive representations of state. In: Advances in Neural Information Processing Systems, vol.\u00a014. MIT Press (2002)"},{"key":"12_CR49","unstructured":"Loch, J., Singh, S.: Using eligibility traces to find the best memoryless policy in partially observable Markov decision processes. In: International Conference on Machine Learning (1998)"},{"issue":"1","key":"12_CR50","doi-asserted-by":"publisher","first-page":"162","DOI":"10.1287\/opre.39.1.162","volume":"39","author":"W.S. Lovejoy","year":"1991","unstructured":"Lovejoy, W.S.: Computationally feasible bounds for partially observed Markov decision processes. Operations Research\u00a039(1), 162\u2013175 (1991)","journal-title":"Operations Research"},{"issue":"1-2","key":"12_CR51","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1016\/S0004-3702(02)00378-8","volume":"147","author":"O. Madani","year":"2003","unstructured":"Madani, O., Hanks, S., Condon, A.: On the undecidability of probabilistic planning and related stochastic optimization problems. Artificial Intelligence\u00a0147(1-2), 5\u201334 (2003)","journal-title":"Artificial Intelligence"},{"key":"12_CR52","doi-asserted-by":"crossref","unstructured":"McCallum, R.A.: Overcoming incomplete perception with utile distinction memory. In: International Conference on Machine Learning (1993)","DOI":"10.1016\/B978-1-55860-307-3.50031-9"},{"key":"12_CR53","doi-asserted-by":"crossref","unstructured":"McCallum, R.A.: Instance-based utile distinctions for reinforcement learning with hidden state. In: International Conference on Machine Learning (1995)","DOI":"10.1016\/B978-1-55860-377-6.50055-4"},{"key":"12_CR54","unstructured":"McCallum, R.A.: Reinforcement learning with selective perception and hidden state. PhD thesis, University of Rochester (1996)"},{"key":"12_CR55","unstructured":"Meuleau, N., Kim, K.E., Kaelbling, L.P., Cassandra, A.R.: Solving POMDPs by searching the space of finite policies. In: Proc. of Uncertainty in Artificial Intelligence (1999a)"},{"key":"12_CR56","unstructured":"Meuleau, N., Peshkin, L., Kim, K.E., Kaelbling, L.P.: Learning finite-state controllers for partially observable environments. In: Proc. of Uncertainty in Artificial Intelligence (1999b)"},{"key":"12_CR57","doi-asserted-by":"crossref","unstructured":"Monahan, G.E.: A survey of partially observable Markov decision processes: theory, models and algorithms. Management Science\u00a028(1) (1982)","DOI":"10.1287\/mnsc.28.1.1"},{"key":"12_CR58","unstructured":"Ng, A.Y., Jordan, M.: PEGASUS: A policy search method for large MDPs and POMDPs. In: Proc. of Uncertainty in Artificial Intelligence (2000)"},{"key":"12_CR59","doi-asserted-by":"crossref","first-page":"289","DOI":"10.1613\/jair.2447","volume":"32","author":"F.A. Oliehoek","year":"2008","unstructured":"Oliehoek, F.A., Spaan, M.T.J., Vlassis, N.: Optimal and approximate Q-value functions for decentralized POMDPs. Journal of Artificial Intelligence Research\u00a032, 289\u2013353 (2008)","journal-title":"Journal of Artificial Intelligence Research"},{"issue":"3","key":"12_CR60","doi-asserted-by":"publisher","first-page":"441","DOI":"10.1287\/moor.12.3.441","volume":"12","author":"C.H. Papadimitriou","year":"1987","unstructured":"Papadimitriou, C.H., Tsitsiklis, J.N.: The complexity of Markov decision processes. Mathematics of Operations Research\u00a012(3), 441\u2013450 (1987)","journal-title":"Mathematics of Operations Research"},{"key":"12_CR61","unstructured":"Parr, R., Russell, S.: Approximating optimal policies for partially observable stochastic domains. In: Proc. Int. Joint Conf. on Artificial Intelligence (1995)"},{"key":"12_CR62","doi-asserted-by":"crossref","unstructured":"Peters, J., Bagnell, J.A.D.: Policy gradient methods. In: Springer Encyclopedia of Machine Learning. Springer, Heidelberg (2010)","DOI":"10.1007\/978-0-387-30164-8_640"},{"key":"12_CR63","doi-asserted-by":"publisher","first-page":"1180","DOI":"10.1016\/j.neucom.2007.11.026","volume":"71","author":"J. Peters","year":"2008","unstructured":"Peters, J., Schaal, S.: Natural actor-critic. Neurocomputing\u00a071, 1180\u20131190 (2008)","journal-title":"Neurocomputing"},{"key":"12_CR64","unstructured":"Pineau, J., Thrun, S.: An integrated approach to hierarchy and abstraction for POMDPs. Tech. Rep. CMU-RI-TR-02-21, Robotics Institute, Carnegie Mellon University (2002)"},{"key":"12_CR65","unstructured":"Pineau, J., Gordon, G., Thrun, S.: Point-based value iteration: An anytime algorithm for POMDPs. In: Proc. Int. Joint Conf. on Artificial Intelligence (2003)"},{"key":"12_CR66","unstructured":"Platzman, L.K.: A feasible computational approach to infinite-horizon partially-observed Markov decision problems. Tech. Rep. J-81-2, School of Industrial and Systems Engineering, Georgia Institute of Technology, reprinted in working notes AAAI, Fall Symposium on Planning with POMDPs (1981)"},{"key":"12_CR67","unstructured":"Poon, K.M.: A fast heuristic algorithm for decision-theoretic planning. Master\u2019s thesis, The Hong-Kong University of Science and Technology (2001)"},{"key":"12_CR68","unstructured":"Porta, J.M., Spaan, M.T.J., Vlassis, N.: Robot planning in partially observable continuous domains. In: Robotics: Science and Systems (2005)"},{"key":"12_CR69","first-page":"2329","volume":"7","author":"J.M. Porta","year":"2006","unstructured":"Porta, J.M., Vlassis, N., Spaan, M.T.J., Poupart, P.: Point-based value iteration for continuous POMDPs. Journal of Machine Learning Research\u00a07, 2329\u20132367 (2006)","journal-title":"Journal of Machine Learning Research"},{"key":"12_CR70","unstructured":"Poupart, P.: Exploiting structure to efficiently solve large scale partially observable Markov decision processes. PhD thesis, University of Toronto (2005)"},{"key":"12_CR71","unstructured":"Poupart, P., Boutilier, C.: Bounded finite state controllers. In: Advances in Neural Information Processing Systems, vol.\u00a016. MIT Press (2004)"},{"key":"12_CR72","unstructured":"Poupart, P., Vlassis, N.: Model-based Bayesian reinforcement learning in partially observable domains. In: International Symposium on Artificial Intelligence and Mathematics, ISAIM (2008)"},{"key":"12_CR73","doi-asserted-by":"crossref","unstructured":"Poupart, P., Vlassis, N., Hoey, J., Regan, K.: An analytic solution to discrete Bayesian reinforcement learning. In: International Conference on Machine Learning (2006)","DOI":"10.1145\/1143844.1143932"},{"key":"12_CR74","unstructured":"Ross, S., Chaib-draa, B., Pineau, J.: Bayes-adaptive POMDPs. In: Advances in Neural Information Processing Systems, vol.\u00a020, pp. 1225\u20131232. MIT Press (2008a)"},{"key":"12_CR75","doi-asserted-by":"crossref","first-page":"664","DOI":"10.1613\/jair.2567","volume":"32","author":"S. Ross","year":"2008","unstructured":"Ross, S., Pineau, J., Paquet, S., Chaib-draa, B.: Online planning algorithms for POMDPs. Journal of Artificial Intelligence Research\u00a032, 664\u2013704 (2008b)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"12_CR76","unstructured":"Roy, N., Gordon, G.: Exponential family PCA for belief compression in POMDPs. In: Advances in Neural Information Processing Systems, vol.\u00a015. MIT Press (2003)"},{"key":"12_CR77","unstructured":"Roy, N., Thrun, S.: Coastal navigation with mobile robots. In: Advances in Neural Information Processing Systems, vol.\u00a012. MIT Press (2000)"},{"key":"12_CR78","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.artint.2005.06.002","volume":"23","author":"N. Roy","year":"2005","unstructured":"Roy, N., Gordon, G., Thrun, S.: Finding approximate POMDP solutions through belief compression. Journal of Artificial Intelligence Research\u00a023, 1\u201340 (2005)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"12_CR79","doi-asserted-by":"crossref","unstructured":"Sanner, S., Kersting, K.: Symbolic dynamic programming for first-order POMDPs. In: Proc. of the National Conference on Artificial Intelligence (2010)","DOI":"10.1609\/aaai.v24i1.7747"},{"issue":"1","key":"12_CR80","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1287\/mnsc.20.1.1","volume":"20","author":"J.K. Satia","year":"1973","unstructured":"Satia, J.K., Lave, R.E.: Markovian decision processes with probabilistic observation of states. Management Science\u00a020(1), 1\u201313 (1973)","journal-title":"Management Science"},{"key":"12_CR81","doi-asserted-by":"crossref","unstructured":"Seuken, S., Zilberstein, S.: Formal models and algorithms for decentralized decision making under uncertainty. Autonomous Agents and Multi-Agent Systems (2008)","DOI":"10.1007\/s10458-007-9026-5"},{"key":"12_CR82","first-page":"1249","volume-title":"Advances in Neural Information Processing Systems","author":"G. Shani","year":"2005","unstructured":"Shani, G., Brafman, R.I.: Resolving perceptual aliasing in the presence of noisy sensors. In: Saul, L.K., Weiss, Y., Bottou, L. (eds.) Advances in Neural Information Processing Systems, vol.\u00a017, pp. 1249\u20131256. MIT Press, Cambridge (2005)"},{"key":"12_CR83","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"353","DOI":"10.1007\/11564096_35","volume-title":"Machine Learning: ECML 2005","author":"G. Shani","year":"2005","unstructured":"Shani, G., Brafman, R.I., Shimony, S.E.: Model-Based Online Learning of POMDPs. In: Gama, J., Camacho, R., Brazdil, P.B., Jorge, A.M., Torgo, L. (eds.) ECML 2005. LNCS (LNAI), vol.\u00a03720, pp. 353\u2013364. Springer, Heidelberg (2005)"},{"key":"12_CR84","unstructured":"Shani, G., Brafman, R.I., Shimony, S.E.: Forward search value iteration for POMDPs. In: Proc. Int. Joint Conf. on Artificial Intelligence (2007)"},{"key":"12_CR85","unstructured":"Shani, G., Poupart, P., Brafman, R.I., Shimony, S.E.: Efficient ADD operations for point-based algorithms. In: Int. Conf. on Automated Planning and Scheduling (2008)"},{"key":"12_CR86","unstructured":"Silver, D., Veness, J.: Monte-carlo planning in large POMDPs. In: Lafferty, J., Williams, C.K.I., Shawe-Taylor, J., Zemel, R., Culotta, A. (eds.) Advances in Neural Information Processing Systems, vol.\u00a023, pp. 2164\u20132172 (2010)"},{"key":"12_CR87","unstructured":"Simmons, R., Koenig, S.: Probabilistic robot navigation in partially observable environments. In: Proc. Int. Joint Conf. on Artificial Intelligence (1995)"},{"key":"12_CR88","doi-asserted-by":"crossref","unstructured":"Singh, S., Jaakkola, T., Jordan, M.: Learning without state-estimation in partially observable Markovian decision processes. In: International Conference on Machine Learning (1994)","DOI":"10.1016\/B978-1-55860-335-6.50042-8"},{"key":"12_CR89","unstructured":"Singh, S., James, M.R., Rudary, M.R.: Predictive state representations: A new theory for modeling dynamical systems. In: Proc. of Uncertainty in Artificial Intelligence (2004)"},{"key":"12_CR90","doi-asserted-by":"publisher","first-page":"1071","DOI":"10.1287\/opre.21.5.1071","volume":"21","author":"R.D. Smallwood","year":"1973","unstructured":"Smallwood, R.D., Sondik, E.J.: The optimal control of partially observable Markov decision processes over a finite horizon. Operations Research\u00a021, 1071\u20131088 (1973)","journal-title":"Operations Research"},{"key":"12_CR91","unstructured":"Smith, T., Simmons, R.: Heuristic search value iteration for POMDPs. In: Proc. of Uncertainty in Artificial Intelligence (2004)"},{"key":"12_CR92","unstructured":"Smith, T., Simmons, R.: Point-based POMDP algorithms: Improved analysis and implementation. In: Proc. of Uncertainty in Artificial Intelligence (2005)"},{"key":"12_CR93","unstructured":"Sondik, E.J.: The optimal control of partially observable Markov processes. PhD thesis, Stanford University (1971)"},{"key":"12_CR94","doi-asserted-by":"crossref","unstructured":"Spaan, M.T.J., Vlassis, N.: A point-based POMDP algorithm for robot planning. In: Proc. of the IEEE Int. Conf. on Robotics and Automation (2004)","DOI":"10.1109\/ROBOT.2004.1307420"},{"key":"12_CR95","doi-asserted-by":"crossref","first-page":"195","DOI":"10.1613\/jair.1659","volume":"24","author":"M.T.J. Spaan","year":"2005","unstructured":"Spaan, M.T.J., Vlassis, N.: Perseus: Randomized point-based value iteration for POMDPs. Journal of Artificial Intelligence Research\u00a024, 195\u2013220 (2005a)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"12_CR96","unstructured":"Spaan, M.T.J., Vlassis, N.: Planning with continuous actions in partially observable environments. In: Proc. of the IEEE Int. Conf. on Robotics and Automation (2005b)"},{"key":"12_CR97","doi-asserted-by":"crossref","unstructured":"Spaan, M.T.J., Veiga, T.S., Lima, P.U.: Active cooperative perception in network robot systems using POMDPs. In: Proc. of International Conference on Intelligent Robots and Systems (2010)","DOI":"10.1109\/IROS.2010.5648856"},{"key":"12_CR98","doi-asserted-by":"publisher","first-page":"704","DOI":"10.1016\/j.artint.2010.04.022","volume":"174","author":"M. Sridharan","year":"2010","unstructured":"Sridharan, M., Wyatt, J., Dearden, R.: Planning to see: A hierarchical approach to planning visual actions on a robot using POMDPs. Artificial Intelligence\u00a0174, 704\u2013725 (2010)","journal-title":"Artificial Intelligence"},{"issue":"6","key":"12_CR99","doi-asserted-by":"publisher","first-page":"970","DOI":"10.1109\/TSMCA.2007.897713","volume":"37","author":"B. Stankiewicz","year":"2007","unstructured":"Stankiewicz, B., Cassandra, A., McCabe, M., Weathers, W.: Development and evaluation of a Bayesian low-vision navigation aid. IEEE Transactions on Systems, Man and Cybernetics, Part A: Systems and Humans\u00a037(6), 970\u2013983 (2007)","journal-title":"IEEE Transactions on Systems, Man and Cybernetics, Part A: Systems and Humans"},{"issue":"2","key":"12_CR100","doi-asserted-by":"publisher","first-page":"156","DOI":"10.1137\/1105015","volume":"5","author":"R.L. Stratonovich","year":"1960","unstructured":"Stratonovich, R.L.: Conditional Markov processes. Theory of Probability and Its Applications\u00a05(2), 156\u2013178 (1960)","journal-title":"Theory of Probability and Its Applications"},{"key":"12_CR101","doi-asserted-by":"crossref","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press (1998)","DOI":"10.1109\/TNN.1998.712192"},{"key":"12_CR102","unstructured":"Theocharous, G., Mahadevan, S.: Approximate planning with hierarchical partially observable Markov decision processes for robot navigation. In: Proc. of the IEEE Int. Conf. on Robotics and Automation (2002)"},{"key":"12_CR103","unstructured":"Thrun, S.: Monte Carlo POMDPs. In: Advances in Neural Information Processing Systems, vol.\u00a012. MIT Press (2000)"},{"key":"12_CR104","unstructured":"Thrun, S., Burgard, W., Fox, D.: Probabilistic Robotics. MIT Press (2005)"},{"key":"12_CR105","doi-asserted-by":"crossref","unstructured":"Varakantham, P., Maheswaran, R., Tambe, M.: Exploiting belief bounds: Practical POMDPs for personal assistant agents. In: Proc. of Int. Conference on Autonomous Agents and Multi Agent Systems (2005)","DOI":"10.1145\/1082473.1082621"},{"key":"12_CR106","doi-asserted-by":"crossref","unstructured":"Vlassis, N., Toussaint, M.: Model-free reinforcement learning as mixture learning. In: International Conference on Machine Learning, pp. 1081\u20131088. ACM (2009)","DOI":"10.1145\/1553374.1553512"},{"key":"12_CR107","doi-asserted-by":"crossref","unstructured":"Wang, C., Khardon, R.: Relational partially observable MDPs. In: Proc. of the National Conference on Artificial Intelligence (2010)","DOI":"10.1609\/aaai.v24i1.7742"},{"key":"12_CR108","doi-asserted-by":"crossref","unstructured":"White, C.C.: Partially observed Markov decision processes: a survey. Annals of Operations Research\u00a032 (1991)","DOI":"10.1007\/BF02204836"},{"issue":"2","key":"12_CR109","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1177\/105971239700600202","volume":"6","author":"M. Wiering","year":"1997","unstructured":"Wiering, M., Schmidhuber, J.: HQ-learning. Adaptive Behavior\u00a06(2), 219\u2013246 (1997)","journal-title":"Adaptive Behavior"},{"key":"12_CR110","doi-asserted-by":"crossref","unstructured":"Wierstra, D., Wiering, M.: Utile distinction hidden Markov models. In: International Conference on Machine Learning (2004)","DOI":"10.1145\/1015330.1015346"},{"issue":"2","key":"12_CR111","doi-asserted-by":"publisher","first-page":"393","DOI":"10.1016\/j.csl.2006.06.008","volume":"21","author":"J.D. Williams","year":"2007","unstructured":"Williams, J.D., Young, S.: Partially observable Markov decision processes for spoken dialog systems. Computer Speech and Language\u00a021(2), 393\u2013422 (2007)","journal-title":"Computer Speech and Language"},{"key":"12_CR112","unstructured":"Williams, J.K., Singh, S.: Experimental results on learning stochastic memoryless policies for partially observable Markov decision processes. In: Advances in Neural Information Processing Systems, vol. 11 (1999)"},{"key":"12_CR113","unstructured":"Zhang, N.L., Liu, W.: Planning in stochastic domains: problem characteristics and approximations. Tech. Rep. HKUST-CS96-31, Department of Computer Science, The Hong Kong University of Science and Technology (1996)"},{"key":"12_CR114","unstructured":"Zhou, R., Hansen, E.A.: An improved grid-based approximation algorithm for POMDPs. In: Proc. Int. Joint Conf. on Artificial Intelligence (2001)"}],"container-title":["Adaptation, Learning, and Optimization","Reinforcement Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-27645-3_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,21]],"date-time":"2024-04-21T03:18:04Z","timestamp":1713669484000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-642-27645-3_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642276446","9783642276453"],"references-count":114,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-27645-3_12","relation":{},"ISSN":["1867-4534","1867-4542"],"issn-type":[{"value":"1867-4534","type":"print"},{"value":"1867-4542","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012]]}}}