{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T11:14:01Z","timestamp":1780053241935,"version":"3.54.0"},"publisher-location":"Berlin, Heidelberg","reference-count":31,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783540415978","type":"print"},{"value":"9783540445654","type":"electronic"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2000]]},"DOI":"10.1007\/3-540-44565-x_12","type":"book-chapter","created":{"date-parts":[[2007,8,11]],"date-time":"2007-08-11T05:48:14Z","timestamp":1186811294000},"page":"264-287","source":"Crossref","is-referenced-by-count":15,"title":["Hidden-Mode Markov Decision Processes for Nonstationary Sequential Decision Making"],"prefix":"10.1007","author":[{"given":"Samuel P. M.","family":"Choi","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Dit-Yan","family":"Yeung","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Nevin L.","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2001,12,7]]},"reference":[{"key":"12_CR1","volume-title":"Dynamic Programming","author":"R. E. Bellman","year":"1957","unstructured":"R. E. Bellman, (1957). Dynamic Programming. Princeton University Press, Princeton, NJ."},{"key":"12_CR2","first-page":"679","volume":"6","author":"R. E. Bellman","year":"1957","unstructured":"R. E. Bellman, (1957). A Markovian decision process. Journal of Mathematics and Mechanics, 6:679\u2013684.","journal-title":"Journal of Mathematics and Mechanics"},{"key":"12_CR3","first-page":"671","volume":"6","author":"J. A. Boyan","year":"1994","unstructured":"J. A. Boyan and M. L. Littman, (1994). Packet routing in dynamically changing networks: a reinforcement learning approach. In Advances in Neural Information Processing Systems 6, pages 671\u2013678, San Mateo, California. Morgan Kaugmann.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"12_CR4","unstructured":"A. R. Cassandra, M. L. Littman, and N. Zhang, (1997) Incremental pruning: A simple, fast, exact algorithm for partially observable Markov decision processes. In Uncertainty in Artificial Intelligence, Providence, RI."},{"key":"12_CR5","volume-title":"Algorithms for Partially Observable Markov Decision Processes","author":"H.-T. Cheng","year":"1988","unstructured":"H.-T. Cheng, (1988). Algorithms for Partially Observable Markov Decision Processes. PhD thesis, University of British Columbia, British Columbia, Canada."},{"key":"12_CR6","volume-title":"Reinforcement Learning in Nonstationary Environments","author":"S. P. M. Choi","year":"2000","unstructured":"S. P. M. Choi, (2000). Reinforcement Learning in Nonstationary Environments. PhD thesis, Hong Kong University of Science and Technology, Department of Computer Science, HKUST, Clear Water Bay, Hong Kong, China, Jan."},{"key":"12_CR7","unstructured":"S. P. M. Choi, D. Y. Yeung, and N. L. Zhang, (1999). An environment model for nonstationary reinforcement learning. In Advances in Neural Information Processing Systems 12. To appear."},{"key":"12_CR8","unstructured":"L. Chrisman, (1992). Reinforcement learning with perceptual aliasing: The perceptual distinctions approach. In AAAI-92."},{"key":"12_CR9","unstructured":"R. H. Crites and A. G. Barto, (1996). Improving elevator performance using reinforcement learning. In D. Touretzky, M. Mozer, and M. Hasselmo, editors, Advances in Neural Information Processing Systems 8."},{"issue":"1","key":"12_CR10","first-page":"5","volume":"25","author":"P. Dayan","year":"1996","unstructured":"P. Dayan and T. J. Sejnowski, (1996). Exploration bonuses and dual control. Machine Learning, 25(1):5\u201322, Oct.","journal-title":"Machine Learning"},{"key":"12_CR11","volume-title":"Advances in Neural Information Processing Systems","author":"T. Jaakkola","year":"1995","unstructured":"T. Jaakkola, S. P. Singh, and M. I. Jordan, (1995). Monte-Carlo reinforcement learning in non-Markovian decision problems. In G. Tesauro, D. S. Touretzky, and T. K. Leen, editors, Advances in Neural Information Processing Systems 7, MA. The MIT Press."},{"key":"12_CR12","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"L. P. Kaelbling","year":"1996","unstructured":"L. P. Kaelbling, M. L. Littman, and A. W. Moore, (1996). Reinforcement learning: A survey. Journal of Artificial Intelligence Research, 4:237\u2013285, May.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"12_CR13","unstructured":"L. J. Lin and T. M. Mitchell, (1992). Memory approaches to reinforcement learning in non-Markovian domains. Technical Report CMU-CS-92-138, Carnegie Mellon University, School of Computer Science."},{"key":"12_CR14","doi-asserted-by":"crossref","unstructured":"M. L. Littman, A. R. Cassandra, and L. P. Kaelbling, (1995a). Learning policies for partially observable environments: Scaling up. In A. Prieditis and S. Russell, editors, Proceedings of the Twelfth International Conference on Machine Learning, pages 362\u2013370, San Francisco, CA. Morgan Kaufmann.","DOI":"10.1016\/B978-1-55860-377-6.50052-9"},{"key":"12_CR15","series-title":"Technical Report","volume-title":"Efficient dynamicprogramming updates in partially observable Markov decision processes","author":"M. L. Littman","year":"1995","unstructured":"M. L. Littman, A. R. Cassandra, and L. P. Kaelbling, (1995b). Efficient dynamicprogramming updates in partially observable Markov decision processes. Technical Report TR CS-95-19, Department of Computer Science, Brown University, Providence, Rhode Island 02912, USA."},{"key":"12_CR16","first-page":"136","volume-title":"Proceedings of the Fourth International Conference on Genetic Algorithms","author":"M. L. Littman","year":"1991","unstructured":"M. L. Littman and D. H. Ackley, (1991). Adaptation in constant utility non-stationary environments. In R. K. Belew and L. Booker, editors, Proceedings of the Fourth International Conference on Genetic Algorithms, pages 136\u2013142, San Mateo, CA, Dec. Morgan Kaufmann."},{"key":"12_CR17","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1007\/BF02055574","volume":"28","author":"W. S. Lovejoy","year":"1991","unstructured":"W. S. Lovejoy, (1991). A survey of algorithmic methods for partially observed Markov decision processes. Annals of Operations Research, 28:47\u201366.","journal-title":"Annals of Operations Research"},{"key":"12_CR18","doi-asserted-by":"crossref","unstructured":"A. McCallum, (1993). Overcoming incomplete perception with utile distinction memory. In Tenth International Machine Learning Conference, Amherst, MA.","DOI":"10.1016\/B978-1-55860-307-3.50031-9"},{"key":"12_CR19","unstructured":"A. McCallum, (1995). Reinforcement Learning with Selective Perception and Hidden State. PhD thesis, University of Rochester, Dec."},{"key":"12_CR20","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1287\/mnsc.28.1.1","volume":"28","author":"G. E. Monahan","year":"1982","unstructured":"G. E. Monahan, (1982). A survey of partially observable Markov decision processes: Theory, models and algorithms. Management Science, 28:1\u201316.","journal-title":"Management Science"},{"issue":"3","key":"12_CR21","doi-asserted-by":"publisher","first-page":"441","DOI":"10.1287\/moor.12.3.441","volume":"12","author":"C. H. Papadimitriou","year":"1987","unstructured":"C. H. Papadimitriou and J. N. Tsitsiklis (1987). The complexity of Markov decision processes. Mathematics of Operations Research, 12(3):441\u2013450.","journal-title":"Mathematics of Operations Research"},{"key":"12_CR22","doi-asserted-by":"crossref","unstructured":"M. L. Puterman (1994). Markov Decision Processes: Discrete Stochastic Dynamic Programming. John Wiley and Sons.","DOI":"10.1002\/9780470316887"},{"key":"12_CR23","doi-asserted-by":"crossref","unstructured":"L. R. Rabiner, (1989). A tutorial on hidden Markov models and selected applications in speech recognition. Proceedings of the IEEE, 77(2), Feb.","DOI":"10.1109\/5.18626"},{"key":"12_CR24","unstructured":"J. H. Schmidhuber (1990). Reinforcement learning in Markovian and non-Markovian environments. In D. S. Lippman, J. E. Moody, and D. S. Touretzky, editors, Advances in Neural Information Processing Systems, volume 3, pages 500\u2013506, San Mateo, CA. Morgan Kaufmann."},{"key":"12_CR25","unstructured":"S. Singh and D. P. Bertsekas, (1997). Reinforcement learning for dynamic channel allocation in cellular telephone systems. In Advances in Neural Information Processing Systems 9, 1997."},{"key":"12_CR26","volume-title":"The Optimal Control of Partially Observable Markov Processes","author":"E. J. Sondik","year":"1971","unstructured":"E. J. Sondik, (1971). The Optimal Control of Partially Observable Markov Processes. PhD thesis, Stanford University, Stanford, California, USA."},{"key":"12_CR27","doi-asserted-by":"crossref","unstructured":"R. S. Sutton, (1990). Integrated architectures for learning, planning, and reacting based on approximating dynamic programming. In Proceedings of the Seventh International Conference on Machine Learning, pages 216\u2013224. Morgan Kaufmann.","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"key":"12_CR28","doi-asserted-by":"crossref","unstructured":"R. S. Sutton and A. G. Barto, (1998). Reinforcement Learning: An Introduction. The MIT Press.","DOI":"10.1109\/TNN.1998.712192"},{"key":"12_CR29","doi-asserted-by":"crossref","unstructured":"C. C. White III, (1991). Partially observed markov decision processes: A survey. Annals of Operations Research, 32.","DOI":"10.1007\/BF02204836"},{"key":"12_CR30","unstructured":"N. L. Zhang, S. S. Lee, and W. Zhang, (1999). A method for speeding up value iteration in partially observable markov decision processes. In Proceeding of 15th Conference on Uncertainties in Artificial Intelligence."},{"key":"12_CR31","doi-asserted-by":"crossref","first-page":"199","DOI":"10.1613\/jair.419","volume":"7","author":"N. L. Zhang","year":"1997","unstructured":"N. L. Zhang and W. Liu, (1997). A model approximation scheme for planning in partially observable stochastic domains. Journal of Artificial Intelligence Research, 7:199\u2013230.","journal-title":"Journal of Artificial Intelligence Research"}],"container-title":["Lecture Notes in Computer Science","Sequence Learning"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/3-540-44565-X_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,1]],"date-time":"2019-05-01T19:03:36Z","timestamp":1556737416000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/3-540-44565-X_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2000]]},"ISBN":["9783540415978","9783540445654"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/3-540-44565-x_12","relation":{},"ISSN":["0302-9743"],"issn-type":[{"value":"0302-9743","type":"print"}],"subject":[],"published":{"date-parts":[[2000]]}}}