{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,30]],"date-time":"2026-01-30T22:03:07Z","timestamp":1769810587010,"version":"3.49.0"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"1-2","license":[{"start":{"date-parts":[[2003,1,1]],"date-time":"2003-01-01T00:00:00Z","timestamp":1041379200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2003,1,1]],"date-time":"2003-01-01T00:00:00Z","timestamp":1041379200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Discrete Event Dynamic Systems"],"published-print":{"date-parts":[[2003,1]]},"DOI":"10.1023\/a:1022188803039","type":"journal-article","created":{"date-parts":[[2003,3,21]],"date-time":"2003-03-21T19:29:05Z","timestamp":1048274945000},"page":"9-39","source":"Crossref","is-referenced-by-count":36,"title":["From Perturbation Analysis to Markov Decision Processes and Reinforcement Learning"],"prefix":"10.1007","volume":"13","author":[{"given":"Xi-Ren","family":"Cao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"5110820_CR1","unstructured":"Abounadi, J., Bertsekas, D., and Borkar, V. Learning algorithms for Markov decision processes with average cost, Report LIDS-P-2434, Lab. for Info. and Decision Systems, October 1998; to appear in SIAM J. on Control and Optimization."},{"key":"5110820_CR2","unstructured":"Altman, E. 1999. Constrained Markov Decision Processes. Chapman Hall\/CRC."},{"key":"5110820_CR3","first-page":"835","volume":"13","author":"A. Barto","year":"1983","unstructured":"Barto, A., Sutton, R., and Anderson, C. 1983. Neuron-like elements that can solve difficult learning control problems. IEEE Transactions on Systems, Man, and Cybernetics13: 835\u2013846.","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics"},{"key":"5110820_CR4","doi-asserted-by":"crossref","first-page":"319","DOI":"10.1613\/jair.806","volume":"15","author":"J. Baxter","year":"2001","unstructured":"Baxter, J., and Bartlett, P. L. 2001. Innite-horizon policy-gradient estimation. Journal of Artificial Intelligence Research 15: 319\u2013350.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"5110820_CR5","doi-asserted-by":"crossref","first-page":"351","DOI":"10.1613\/jair.807","volume":"15","author":"J. Baxter","year":"2001","unstructured":"Baxter, J., Bartlett, P. L., and Weaver, L. 2001. Experiments with innite-horizon policy-gradient estimation. Journal of Artificial Intelligence Research 15: 351\u2013381.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"5110820_CR6","volume-title":"Dynamic Programming and Optimal Control","author":"D. P. Bertsekas","year":"1995","unstructured":"Bertsekas, D. P. 1995. Dynamic Programming and Optimal Control. Vols. I, II, Athena Scientic, Belmont, Massachusetts."},{"key":"5110820_CR7","doi-asserted-by":"crossref","unstructured":"Berman, A., and Plemmons, R. J. 1994. Nonnegative matrices in the mathematical sciences. SIAM, Philadelphia.","DOI":"10.1137\/1.9781611971262"},{"key":"5110820_CR8","volume-title":"Neuro-Dynamic Programming","author":"D. P. Bertsekas","year":"1996","unstructured":"Bertsekas, D. P., and Tsitsiklis, T. N. 1996. Neuro-Dynamic Programming. Athena Scientic, Belmont, Massachusetts."},{"key":"5110820_CR9","volume-title":"The Dynamics of Queuing Systems","author":"X. R. Cao","year":"1994","unstructured":"Cao, X. R. 1994. Realization Probabilities: The Dynamics of Queuing Systems. Springer-Verlag, New York."},{"key":"5110820_CR10","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1023\/A:1008260528575","volume":"8","author":"X. R. Cao","year":"1998","unstructured":"Cao, X. R. 1998. The relation among potentials, perturbation analysis, Markov decision processes, and other topics. Journal of Discrete Event Dynamic Systems 8: 71\u201387.","journal-title":"Journal of Discrete Event Dynamic Systems"},{"key":"5110820_CR11","first-page":"527","volume":"100","author":"X. R. Cao","year":"1999","unstructured":"Cao, X. R. 1999. Single sample path-based optimization of markov chains. Journal of Optimization: Theory and Application 100: 527\u2013548.","journal-title":"Journal of Optimization: Theory and Application"},{"key":"5110820_CR12","doi-asserted-by":"crossref","first-page":"771","DOI":"10.1016\/S0005-1098(99)00207-1","volume":"36","author":"X. R. Cao","year":"2000","unstructured":"Cao, X. R. 2000. A unified approach to Markov decision problems and performance sensitivity analysis. Automatica36: 771\u2013774.","journal-title":"Automatica"},{"key":"5110820_CR13","doi-asserted-by":"crossref","first-page":"1382","DOI":"10.1109\/9.633827","volume":"42","author":"X. R. Cao","year":"1997","unstructured":"Cao, X. R., and Chen, H. F. 1997. Potentials, perturbation realization, and sensitivity analysis of Markov processes. IEEE Transactions on AC 42: 1382\u20131393.","journal-title":"IEEE Transactions on AC"},{"key":"5110820_CR14","doi-asserted-by":"crossref","first-page":"929","DOI":"10.1016\/S0005-1098(01)00282-5","volume":"38","author":"X. R. Cao","year":"2002","unstructured":"Cao, X. R., Ren, Z. Y., Bhatnagar, S., Fu, M., and Marcus, S. 2002. A time aggregation approach to Markov decision processes. Automatica 38: 929\u2013943.","journal-title":"Automatica"},{"key":"5110820_CR15","unstructured":"Cao, X. R., and Fang, H. T. Gradient-based policy iteration: an example. To appear in 2002 IEEE Conference on Decision and Control."},{"key":"5110820_CR16","doi-asserted-by":"crossref","first-page":"482","DOI":"10.1109\/87.701341","volume":"6","author":"X. R. Cao","year":"1998","unstructured":"Cao, X. R., and Wan, Y. W. 1998. Algorithms for sensitivity analysis of Markov systems through potentials and perturbation realization. IEEE Transactions on Control Systems Technology 6: 482\u2013494.","journal-title":"IEEE Transactions on Control Systems Technology"},{"key":"5110820_CR17","doi-asserted-by":"crossref","unstructured":"Cassandras, C. G., and Lafortune, S. 1999. Introduction to Discrete Event Systems. Kluwer Academic Publishers.","DOI":"10.1007\/978-1-4757-4070-7"},{"key":"5110820_CR18","first-page":"339","volume":"1","author":"E. K. P. Chong","year":"1992","unstructured":"Chong, E. K. P., and Ramadge, P. J. 1992. Convergence of recursive optimization algorithms using infinitesimal perturbation analysis estimates. Journal of Discrete Event Dynamic Systems1: 339\u2013372.","journal-title":"Journal of Discrete Event Dynamic Systems"},{"key":"5110820_CR19","volume-title":"Introduction to Stochastic Processes","author":"E. C\u00cbinlar","year":"1975","unstructured":"C\u00cbinlar, E. 1975. Introduction to Stochastic Processes. Prentice Hall, Englewood cliffs, NJ."},{"key":"5110820_CR20","unstructured":"Fang, H. T., and Cao, X. R. Single sample path-based recursive algorithms for Markov decision processes. IEEE Trans.on Automatic Control, submitted."},{"key":"5110820_CR21","doi-asserted-by":"crossref","unstructured":"Feinberg, E. A., and Adam Shwartz (ed.) 2002. Handbook of Markov Decision Processes. Kluwer, 2002.","DOI":"10.1007\/978-1-4615-0805-2"},{"key":"5110820_CR22","doi-asserted-by":"crossref","first-page":"149","DOI":"10.1007\/BF00941166","volume":"65","author":"M. C. Fu","year":"1990","unstructured":"Fu, M. C. 1990. Convergence of a stochastic approximation algorithm for the GI\/G\/1 queue using infinitesimal perturbation analysis. Journal of Optimization Theory and Applications 65: 149\u2013160.","journal-title":"Journal of Optimization Theory and Applications"},{"key":"5110820_CR23","volume-title":"Conditional Monte Carlo: Gradient Estimation and Optimization Applications","author":"M. C. Fu","year":"1997","unstructured":"Fu, M. C. and Hu, J. Q. 1997. Conditional Monte Carlo: Gradient Estimation and Optimization Applications. Kluwer Academic Publishers, Boston."},{"key":"5110820_CR24","volume-title":"Gradient Estimation Via Perturbation Analysis","author":"P. Glasserman","year":"1991","unstructured":"Glasserman, P. 1991. Gradient Estimation Via Perturbation Analysis. Kluwer Academic Publishers, Boston."},{"key":"5110820_CR25","doi-asserted-by":"crossref","first-page":"916","DOI":"10.1214\/aop\/1039639370","volume":"24","author":"P. W. Glynn","year":"1996","unstructured":"Glynn, P. W., and Meyn, S. P. 1996. A Lyapunov bound for solutions of Poisson's equation. Ann.Probab. 24: 916\u2013931.","journal-title":"Ann. Probab"},{"key":"5110820_CR26","first-page":"858","volume":"32","author":"W. B. Gong","year":"1987","unstructured":"Gong, W. B., and Ho, Y. C. 1987. Smoothed perturbation analysis of discrete event systems. IEEE Transactions on Control Systems Technology 32: 858\u2013866.","journal-title":"IEEE Transactions on Control Systems Technology"},{"key":"5110820_CR27","volume-title":"Perturbation Analysis of Discrete-Event Dynamic Systems","author":"Y. C. Ho","year":"1991","unstructured":"Ho, Y. C., and Cao, X. R. 1991. Perturbation Analysis of Discrete-Event Dynamic Systems. Kluwer Academic Publisher, Boston."},{"issue":"4","key":"5110820_CR28","doi-asserted-by":"crossref","first-page":"559","DOI":"10.1007\/BF00933971","volume":"40","author":"Y. C. Ho","year":"1983","unstructured":"Ho, Y. C., and Cao, X. R. 1983. Perturbation analysis and optimization of queuing networks. Journal of Optimization Theory and Applications 40(4): 559\u2013582.","journal-title":"Journal of Optimization Theory and Applications"},{"key":"5110820_CR29","doi-asserted-by":"crossref","unstructured":"Kaelbling, L. P., Littman, M. L., and Cassandra, A. R. 1998. Planning and acting in partially observable stochastic domains. Artificial Intelligence 101.","DOI":"10.1016\/S0004-3702(98)00023-X"},{"key":"5110820_CR30","volume-title":"Finite Markov Chains","author":"J. G. Kemeny","year":"1960","unstructured":"Kemeny, J. G., and Snell, J. L. 1960. Finite Markov Chains. Van Nostrand, New York."},{"key":"5110820_CR31","doi-asserted-by":"crossref","first-page":"94","DOI":"10.1137\/S036301299731669X","volume":"38","author":"V. R. Konda","year":"1990","unstructured":"Konda, V. R., and Borkar, V. S. 1990. Actor-critic like learning algorithms for Markov decision processes. SIAM Journal on Control and Optimization 38: 94\u2013123.","journal-title":"SIAM Journal on Control and Optimization"},{"key":"5110820_CR32","unstructured":"Konda, V. R., and Tsitsiklis, J. N. 2001. Actor-critic Algorithms. Submitted to SIAM Journal on Control and Optimization, February."},{"key":"5110820_CR33","doi-asserted-by":"crossref","first-page":"191","DOI":"10.1109\/9.905687","volume":"46","author":"P. Marbach","year":"2001","unstructured":"Marbach, P., and Tsitsiklis, T. N. 2001. Simulation-based optimization of Markov reward processes. IEEE Transactions on Automatic Control 46: 191\u2013209.","journal-title":"IEEE Transactions on Automatic Control"},{"key":"5110820_CR34","doi-asserted-by":"crossref","first-page":"1663","DOI":"10.1109\/9.650016","volume":"42","author":"S. P. Meyn","year":"1997","unstructured":"Meyn, S. P. 1997. The policy improvement algorithm for Markov decision processes with general state space. IEEE Transactions on Automatic Control 42: 1663\u20131680.","journal-title":"IEEE Transactions on Automatic Control"},{"key":"5110820_CR35","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4471-3267-7","volume-title":"Markov Chains and Stochastic Stability","author":"S. P. Meyn","year":"1993","unstructured":"Meyn, S. P., and Tweedie, R. L. 1993. Markov Chains and Stochastic Stability. Springer-Verlag, London."},{"key":"5110820_CR36","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"M. L. Puterman","year":"1994","unstructured":"Puterman, M. L. 1994. Markov Decision Processes: Discrete Stochastic Dynamic Programming. Wiley, New York."},{"key":"5110820_CR37","unstructured":"Singh, S. P. 1994. Reinforcement learning algorithms for average-payoff Markovain decision processes. Proceedings of the Twelfth National Conference on Artificial Intelligence 202\u2013207."},{"key":"5110820_CR38","unstructured":"Smart, W. D., and Kaelbling, L. P. 2000. Practical reinforcement learning in continuous spaces. Proceedings of the Seventeenth International Conference on Machine Learning."},{"key":"5110820_CR39","doi-asserted-by":"crossref","first-page":"835","DOI":"10.1023\/A:1022633531479","volume":"3","author":"R. S. Sutton","year":"1988","unstructured":"Sutton, R. S. 1988. Learning to predict by the methods of temporal differences. Machine Learning 3: 835\u2013846.","journal-title":"Machine Learning"},{"key":"5110820_CR40","volume-title":"Reinforcement Learning: An Introduction","author":"R. S. Sutton","year":"1998","unstructured":"Sutton, R. S., and Barto, A. G. 1998. Reinforcement Learning: An Introduction. MIT Press, Cambridge, MA."},{"key":"5110820_CR41","doi-asserted-by":"crossref","first-page":"1799","DOI":"10.1016\/S0005-1098(99)00099-0","volume":"35","author":"J. N. Tsitsiklis","year":"1999","unstructured":"Tsitsiklis, J. N., and Van Roy, B. 1999. Average cost temporal-difference learning. Automatica 35: 1799\u20131808.","journal-title":"Automatica"},{"key":"5110820_CR42","doi-asserted-by":"crossref","first-page":"631","DOI":"10.1109\/9.668830","volume":"43","author":"F. J. Vazquez-Abad","year":"1998","unstructured":"Vazquez-Abad, F. J., Cassandras, C. G., and Julka, V. 1998. Centralized and decentralized asynchronous optimization of stochastic discret event systems. IEEE Transactions on Automatic Control 43: 631\u2013655.","journal-title":"IEEE Transactions on Automatic Control"},{"key":"5110820_CR43","doi-asserted-by":"crossref","first-page":"1218","DOI":"10.1109\/9.100931","volume":"36","author":"B. Zhang","year":"1991","unstructured":"Zhang, B., and Ho, Y. C. 1991. Performance gradient estimation for very large finite Markov chains. IEEE Transactions on Automatic Control 36: 1218\u20131227.","journal-title":"IEEE Transactions on Automatic Control"}],"container-title":["Discrete Event Dynamic Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1022188803039.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1023\/A:1022188803039\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1022188803039.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,29]],"date-time":"2025-07-29T03:59:35Z","timestamp":1753761575000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1023\/A:1022188803039"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2003,1]]},"references-count":43,"journal-issue":{"issue":"1-2","published-print":{"date-parts":[[2003,1]]}},"alternative-id":["5110820"],"URL":"https:\/\/doi.org\/10.1023\/a:1022188803039","relation":{},"ISSN":["0924-6703","1573-7594"],"issn-type":[{"value":"0924-6703","type":"print"},{"value":"1573-7594","type":"electronic"}],"subject":[],"published":{"date-parts":[[2003,1]]}}}