{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,15]],"date-time":"2025-12-15T13:44:57Z","timestamp":1765806297191,"version":"3.41.2"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[1999,5,1]],"date-time":"1999-05-01T00:00:00Z","timestamp":925516800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[1999,5,1]],"date-time":"1999-05-01T00:00:00Z","timestamp":925516800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Learning"],"published-print":{"date-parts":[[1999,5]]},"DOI":"10.1023\/a:1007541107674","type":"journal-article","created":{"date-parts":[[2002,12,22]],"date-time":"2002-12-22T05:04:10Z","timestamp":1040533450000},"page":"117-154","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":42,"title":["Exploration of Multi-State Environments: Local Measures and Back-Propagation of Uncertainty"],"prefix":"10.1007","volume":"35","author":[{"given":"Nicolas","family":"Meuleau","sequence":"first","affiliation":[]},{"given":"Paul","family":"Bourgine","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"207059_CR1","doi-asserted-by":"crossref","first-page":"1184","DOI":"10.1109\/TAC.1981.1102793","volume":"26","author":"Y. Bar-Shalom","year":"1981","unstructured":"Bar-Shalom, Y. (1981). Stochastic dynamic programming: Caution and probing. IEEE Trans. Automat. Control, 26, 1184\u20131195.","journal-title":"IEEE Trans. Automat. Control"},{"key":"207059_CR2","unstructured":"Barto, A.G., Bradtke, S.J., & Singh, S.P. (1991). Real-time learning and control using asynchronous dynamic programming. Technical Report 91-57, University of Massachusetts, Dept. of Computer Science."},{"key":"207059_CR3","doi-asserted-by":"crossref","first-page":"81","DOI":"10.1016\/0004-3702(94)00011-O","volume":"72","author":"A.G. Barto","year":"1995","unstructured":"Barto, A.G., Bradtke, S.J., & Singh S.P. (1995). Learning to act using real-time dynamic programming. Artificial Intelligence, 72, 81\u2013138.","journal-title":"Artificial Intelligence"},{"key":"207059_CR4","doi-asserted-by":"crossref","DOI":"10.1007\/978-94-015-3711-7","volume-title":"Bandit problems: Sequential allocation of experiments","author":"D.A. Berry","year":"1985","unstructured":"Berry, D.A., & Fristedt, B. (1985). Bandit problems: Sequential allocation of experiments. London, England: Chapman and Hall."},{"key":"207059_CR5","doi-asserted-by":"crossref","first-page":"610","DOI":"10.1109\/TAC.1982.1102980","volume":"27","author":"D. Bertsekas","year":"1982","unstructured":"Bertsekas, D. (1982). Distributed dynamic programming. IEEE Trans. Automat. Control, 27, 610\u2013616.","journal-title":"IEEE Trans. Automat. Control"},{"key":"207059_CR6","volume-title":"Optimal control systems","author":"A. Feldbaum","year":"1965","unstructured":"Feldbaum, A. (1965). Optimal control systems. New-York, NY: Academic Press."},{"key":"207059_CR7","doi-asserted-by":"crossref","unstructured":"Fiechter, C.N. (1994). Efficient reinforcement learning. Proceedings of the Seventh Annual ACM Conference on Computational Learning Theory (pp. 88\u201397). New Brunswick, NJ.","DOI":"10.1145\/180139.181019"},{"key":"207059_CR8","volume-title":"Multi-armed bandit allocation indices","author":"J.C. Gittins","year":"1989","unstructured":"Gittins, J.C. (1989). Multi-armed bandit allocation indices. New-York, NY: John Wiley and Sons."},{"key":"207059_CR9","volume-title":"Dynamic programming and Markov processes","author":"R.A. Howard","year":"1960","unstructured":"Howard, R.A. (1960). Dynamic programming and Markov processes. Cambridge, MA: MIT Press."},{"key":"207059_CR10","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/4168.001.0001","volume-title":"Learning in embedded systems","author":"L.P. Kaelbling","year":"1993","unstructured":"Kaelbling, L.P. (1993). Learning in embedded systems. Cambridge, MA: MIT Press."},{"key":"207059_CR11","doi-asserted-by":"crossref","first-page":"7","DOI":"10.1023\/A:1018091703869","volume":"22","author":"L.P. Kaelbling","year":"1996","unstructured":"Kaelbling, L.P. (1996). Introduction to Machine Learning, 22, 7\u20139.","journal-title":"Introduction to Machine Learning"},{"key":"207059_CR12","doi-asserted-by":"crossref","unstructured":"Kaelbling, L.P., Littman, M.L., & Moore, A.W. (1996). Reinforcement learning: A survey. Journal of Artificial Intelligence Research, 4.","DOI":"10.1613\/jair.301"},{"key":"207059_CR13","doi-asserted-by":"crossref","first-page":"329","DOI":"10.1137\/0323023","volume":"23","author":"P.R. Kumar","year":"1985","unstructured":"Kumar, P.R. (1985). A survey of some results in stochastic adaptive control. SIAM Journal of Control and Optimization, 23, 329\u2013380.","journal-title":"SIAM Journal of Control and Optimization"},{"key":"207059_CR14","doi-asserted-by":"crossref","first-page":"137","DOI":"10.1109\/TAC.1982.1102878","volume":"27","author":"P.R. Kumar","year":"1982","unstructured":"Kumar, P.R., & Becker, A. (1982). A new family of optimal adaptive controllers for Markov chains. IEEE Trans. Automat. Control, 27, 137\u2013145.","journal-title":"IEEE Trans. Automat. Control"},{"key":"207059_CR15","doi-asserted-by":"crossref","first-page":"765","DOI":"10.1109\/TAC.1982.1103017","volume":"27","author":"P.R. Kumar","year":"1982","unstructured":"Kumar, P.R., & Lin, W. (1982). Optimal adaptive controllers for unknown Markov chains. IEEE Trans. Automat. Control, 27, 765\u2013774.","journal-title":"IEEE Trans. Automat. Control"},{"key":"207059_CR16","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1023\/A:1018068507504","volume":"22","author":"S. Koenig","year":"1996","unstructured":"Koenig, S., & Simmons, R.G. (1996). The effect of representation and knowledge on goal-directed exploration. Machine Learning, 22, 227\u2013250.","journal-title":"Machine Learning"},{"key":"207059_CR17","volume-title":"An introduction to mathematical statistics and its applications","author":"R.J. Larsen","year":"1986","unstructured":"Larsen, R.J., & Marx, M.L. (1986). An introduction to mathematical statistics and its applications. Englewood Cliffs, NJ: Prentice-Hall."},{"key":"207059_CR18","first-page":"89","volume":"17","author":"S. Mahadevan","year":"1996","unstructured":"Mahadevan, S., & Kaelbling, L.P. (1996). The NSF workshop on reinforcement learning: Summary and observations. AI Magazine, 17, 89\u201393.","journal-title":"AI Magazine"},{"key":"207059_CR19","volume-title":"Bayesian decision problems and Markov chains","author":"J.J. Martin","year":"1967","unstructured":"Martin, J.J. (1967). Bayesian decision problems and Markov chains. New-York, NY: John Wiley and Sons."},{"key":"207059_CR20","volume-title":"Le dilemme entre exploration et explotation dans l'apprentissage par renforcement: Optimisation adaptative des mod\u00e8les de d\u00e9cision multi-\u00e9tats","author":"N. Meuleau","year":"1996","unstructured":"Meuleau, N. (1996). Le dilemme entre exploration et explotation dans l'apprentissage par renforcement: Optimisation adaptative des mod\u00e8les de d\u00e9cision multi-\u00e9tats. Ph.D. Thesis, Universit\u00e9 de Caen, Caen, France."},{"key":"207059_CR21","volume-title":"Efficient memory-based learning for robot control","author":"A.W. Moore","year":"1990","unstructured":"Moore, A.W. (1990). Efficient memory-based learning for robot control. Ph.D. Thesis, Trinity Hall, University of Cambridge, England."},{"key":"207059_CR22","doi-asserted-by":"crossref","first-page":"103","DOI":"10.1023\/A:1022635613229","volume":"13","author":"A.W. Moore","year":"1993","unstructured":"Moore, A.W., & Atkeson, C.G. (1993). Prioritized sweeping: Reinforcement learning with less data and less time. Machine Learning, 13, 103\u2013130.","journal-title":"Machine Learning"},{"key":"207059_CR23","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","volume-title":"Markov decision processes","author":"M.L. Puterman","year":"1994","unstructured":"Puterman, M.L. (1994). Markov decision processes. New-York, NY: John Wiley and Sons."},{"key":"207059_CR24","doi-asserted-by":"crossref","first-page":"502","DOI":"10.1109\/TAC.1982.1102893","volume":"27","author":"M. Sato","year":"1982","unstructured":"Sato, M., Abe, K., & Takeda, H. (1982). Learning control of finite Markov chains with unknown transition probabilities. IEEE Trans. Automat. Control, 27, 502\u2013505.","journal-title":"IEEE Trans. Automat. Control"},{"key":"207059_CR25","doi-asserted-by":"crossref","first-page":"1147","DOI":"10.1109\/TAC.1985.1103853","volume":"30","author":"M. Sato","year":"1985","unstructured":"Sato, M., Abe, K., & Takeda, H. (1985). An asymptotically optimal controller for finite Markov chains with unknown transition probabilities. IEEE Trans. Automat. Control, 30, 1147\u20131149.","journal-title":"IEEE Trans. Automat. Control"},{"key":"207059_CR26","doi-asserted-by":"crossref","first-page":"677","DOI":"10.1109\/21.21595","volume":"18","author":"M. Sato","year":"1988","unstructured":"Sato, M., Abe, K., & Takeda, H. (1988). Learning control of finite Markov chains with an explicit trade-off between estimation and control. IEEE Trans. Systems, Man and Cybernetics, 18, 677\u2013684.","journal-title":"IEEE Trans. Systems, Man and Cybernetics"},{"key":"207059_CR27","first-page":"287","volume-title":"Proceedings of the 1990 Connectionist Models Summer School","author":"M. Sato","year":"1990","unstructured":"Sato, M., Abe, K., & Takeda, H. (1990). Learning control of finite Markov chains with an explicit trade-off between estimation and control. In D.S. Touretsky et al. (Eds.), Proceedings of the 1990 Connectionist Models Summer School (pp. 287\u2013300). San Mateo, CA: Morgan Kaufmann."},{"key":"207059_CR28","series-title":"Technical Report","volume-title":"Adaptive confidence and adaptive curiosity","author":"J. Schmidhuber","year":"1991","unstructured":"Schmidhuber, J. (1991a). Adaptive confidence and adaptive curiosity. Technical Report FKI-149-91, Technische Universitat M\u00fcnchen, M\u00fcnchen, Germany."},{"key":"207059_CR29","first-page":"1458","volume":"2","author":"J. Schmidhuber","year":"1991","unstructured":"Schmidhuber, J. (1991b). Curious model-building control systems. In Proceedings of International Joint Conference on Neural Networks, 2, 1458\u20131463. IEEE.","journal-title":"Proceedings of International Joint Conference on Neural Networks"},{"key":"207059_CR30","volume-title":"Statistical methods","author":"G.W. Snedecor","year":"1989","unstructured":"Snedecor, G.W., & Cochran, G. (1989). Statistical methods. New-York, NY: Mac Graw-Hill."},{"key":"207059_CR31","first-page":"216","volume-title":"Proceedings of the Seventh International Conference on Machine Learning","author":"R. Sutton","year":"1990","unstructured":"Sutton, R. (1990). Integrated architectures for learning, planning and reacting based on approximating dynamic programming. Proceedings of the Seventh International Conference on Machine Learning (pp. 216\u2013224). San-Mateo, CA: Morgan Kaufmann."},{"key":"207059_CR32","first-page":"471","volume-title":"Advances in neural information processing","author":"R. Sutton","year":"1991","unstructured":"Sutton, R. (1991a). Integrated modeling and control based on reinforcement learning and dynamic programming. In R.P. Lippmann et al. (Eds.), Advances in neural information processing 3, 471\u2013478. San-Mateo, CA: Morgan Kaufmann."},{"key":"207059_CR33","doi-asserted-by":"crossref","first-page":"160","DOI":"10.1145\/122344.122377","volume":"2","author":"R. Sutton","year":"1991","unstructured":"Sutton, R. (1991b). Dyna, an integrated architecture for learning, planning and reacting. SIGART Bulletin, 2, 160\u2013163.","journal-title":"SIGART Bulletin"},{"key":"207059_CR34","first-page":"1","volume":"8","author":"R. Sutton","year":"1992","unstructured":"Sutton, R. (1992). The challenge of reinforcement learning. Introduction to Machine Learning, 8, 1\u20133.","journal-title":"Introduction to Machine Learning"},{"key":"207059_CR35","doi-asserted-by":"crossref","unstructured":"Sutton, R., Barto, A.G., & Williams, R.J. (1991). Reinforcement learning is direct adaptive optimal control. Proceedings of the 1991 American Control Conference (pp. 2143\u20132146). Boston, MA.","DOI":"10.23919\/ACC.1991.4791776"},{"key":"207059_CR36","series-title":"Technical Report","volume-title":"Efficient exploration in reinforcement learning","author":"S. Thrun","year":"1992","unstructured":"Thrun, S. (1992a). Efficient exploration in reinforcement learning. Technical Report CS-92-102, Carnegie Mellon University, Pittsburgh, PA."},{"key":"207059_CR37","volume-title":"Handbook of intelligent control: Neural, fuzzy and adaptive approaches","author":"S. Thrun","year":"1992","unstructured":"Thrun, S. (1992b). The role of exploration in learning control. In D.A. White & D.A. Sofge (Eds.), Handbook of intelligent control: Neural, fuzzy and adaptive approaches. NY: Van Nostrand Reinhold."},{"key":"207059_CR38","series-title":"Technical Report","volume-title":"On planning and exploration in non-discrete environments","author":"S. Thrun","year":"1991","unstructured":"Thrun, S., & M\u00f6ller, K. (1991). On planning and exploration in non-discrete environments. Technical Report 528, GMD, Sankt Augustin, Germany."},{"key":"207059_CR39","volume-title":"Advances in neural information processing","author":"S. Thrun","year":"1992","unstructured":"Thrun, S., & M\u00f6ller, K. (1992). Active exploration in dynamic environments. In J.E. Moody et al. (Eds.), Advances in neural information processing 4. San-Mateo, CA: Morgan Kaufmann."},{"key":"207059_CR40","first-page":"185","volume":"16","author":"J.N. Tsitsiklis","year":"1994","unstructured":"Tsitsiklis, J.N. (1994). Asynchronous stochastic approximations and Q-learning. Machine Learning, 16, 185\u2013202.","journal-title":"Machine Learning"},{"key":"207059_CR41","volume-title":"Learning from delayed reward","author":"C. Watkins","year":"1989","unstructured":"Watkins, C. (1989). Learning from delayed reward. Ph.D. Thesis, University of Cambridge, Cambridge, England."},{"key":"207059_CR42","first-page":"279","volume":"8","author":"C. Watkins","year":"1992","unstructured":"Watkins, C., & Dayan, P. (1992). Technical note: Q-learning. Machine Learning, 8, 279\u2013292.","journal-title":"Machine Learning"},{"key":"207059_CR43","doi-asserted-by":"crossref","first-page":"45","DOI":"10.1023\/A:1022619109594","volume":"7","author":"S.D. Whitehead","year":"1991","unstructured":"Whitehead, S.D., & Ballard, D.H. (1991). Learning to perceive and act by trial and error. Machine Learning, 7, 45\u201383.","journal-title":"Machine Learning"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1007541107674.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1023\/A:1007541107674\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1007541107674.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,10]],"date-time":"2025-07-10T11:39:34Z","timestamp":1752147574000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1023\/A:1007541107674"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[1999,5]]},"references-count":43,"journal-issue":{"issue":"2","published-print":{"date-parts":[[1999,5]]}},"alternative-id":["207059"],"URL":"https:\/\/doi.org\/10.1023\/a:1007541107674","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"type":"print","value":"0885-6125"},{"type":"electronic","value":"1573-0565"}],"subject":[],"published":{"date-parts":[[1999,5]]},"assertion":[{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}