{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,26]],"date-time":"2025-10-26T13:55:57Z","timestamp":1761486957233,"version":"3.41.2"},"reference-count":19,"publisher":"Springer Science and Business Media LLC","issue":"1-2","license":[{"start":{"date-parts":[[2003,1,1]],"date-time":"2003-01-01T00:00:00Z","timestamp":1041379200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2003,1,1]],"date-time":"2003-01-01T00:00:00Z","timestamp":1041379200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Discrete Event Dynamic Systems"],"published-print":{"date-parts":[[2003,1]]},"DOI":"10.1023\/a:1022145020786","type":"journal-article","created":{"date-parts":[[2003,3,21]],"date-time":"2003-03-21T19:29:05Z","timestamp":1048274945000},"page":"111-148","source":"Crossref","is-referenced-by-count":32,"title":["Approximate Gradient Methods in Policy-Space Optimization of Markov Reward Processes"],"prefix":"10.1007","volume":"13","author":[{"given":"Peter","family":"Marbach","sequence":"first","affiliation":[]},{"given":"John N.","family":"Tsitsiklis","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"5110823_CR1","unstructured":"Baxter, J., and Bartlett, P. L. 1999. Direct Gradient-Based Reinforcement Learning: I. Gradient Estimation Algorithms. Unpublished manuscript, November."},{"key":"5110823_CR2","volume-title":"Dynamic Programming and Optimal Control","author":"D. P. Bertsekas","year":"1995","unstructured":"Bertsekas, D. P. 1995. Dynamic Programming and Optimal Control, Vol. I and II. Belmont, MA: Athena Scientific."},{"key":"5110823_CR3","volume-title":"Nonlinear Programming","author":"D. P. Bertsekas","year":"1995","unstructured":"Bertsekas, D. P. 1995. Nonlinear Programming. Belmont, MA: Athena Scientific."},{"key":"5110823_CR4","doi-asserted-by":"crossref","first-page":"771","DOI":"10.1016\/S0005-1098(99)00207-1","volume":"36","author":"X. R. Cao","year":"2000","unstructured":"Cao, X. R. 2000. A unified approach to Markov decision problems and performance sensitivity analysis. Automatica 36: 771\u2013774.","journal-title":"Automatica"},{"key":"5110823_CR5","doi-asserted-by":"crossref","first-page":"1382","DOI":"10.1109\/9.633827","volume":"42","author":"X. R. Cao","year":"1997","unstructured":"Cao, X. R., and Chen, H. F. 1997. Perturbation realization, potentials, and sensitivity analysis of Markov processes. IEEE Transactions on Automatic Control 42: 1382\u20131393.","journal-title":"IEEE Transactions on Automatic Control"},{"key":"5110823_CR6","doi-asserted-by":"crossref","first-page":"1400","DOI":"10.1109\/9.299620","volume":"39","author":"E. K. P. Chong","year":"1994","unstructured":"Chong, E. K. P., and Ramadage, P. J. 1994. Stochastic optimization of regenerative systems using infinitesimal perturbation analysis. IEEE Trans. on Automatic Control 39: 1400\u20131410.","journal-title":"IEEE Trans. on Automatic Control"},{"key":"5110823_CR7","doi-asserted-by":"crossref","first-page":"482","DOI":"10.1109\/87.701341","volume":"6","author":"X. R. Cao","year":"1998","unstructured":"Cao, X. R., and Wan, Y. W. 1998. Algorithms for sensitivity analysis of Markov systems through potentials and perturbation realization. IEEE Trans. on Control Systems Technology 6: 482\u2013494.","journal-title":"IEEE Trans. on Control Systems Technology"},{"key":"5110823_CR8","doi-asserted-by":"crossref","first-page":"241","DOI":"10.1016\/0167-6377(94)90084-1","volume":"15","author":"M. C. Fu","year":"1994","unstructured":"Fu, M. C., and Hu, J.-Q. 1994. Smoothed perturbation analysis derivative estimation for Markov chains. Operations Research Letters 15: 241\u2013251.","journal-title":"Operations Research Letters"},{"key":"5110823_CR9","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4615-6293-1","volume-title":"Conditional Monte Carlo: Gradient Estimation and Optimization Applications","author":"M. Fu","year":"1997","unstructured":"Fu, M. and Hu, J.-Q. 1997. Conditional Monte Carlo: Gradient Estimation and Optimization Applications. Boston, MA: Kluwer Academic Publishers."},{"key":"5110823_CR10","volume-title":"Discrete Stochastic Processes","author":"R. G. Gallager","year":"1995","unstructured":"Gallager, R. G. 1995. Discrete Stochastic Processes. Boston\/Dordrech\/London: Kluwer Academic Publishers."},{"key":"5110823_CR11","unstructured":"Glynn, P. W. 1986. Stochastic approximation for Monte Carlo optimization. Proceedings of the 1986 Winter Simulation Conference, pp. 285\u2013289."},{"key":"5110823_CR12","doi-asserted-by":"crossref","unstructured":"Glynn, P. W. 1987. Likelihood ratio gradient estimation: An overview. Proceedings of the 1987 Winter Simulation Conference, pp. 366\u2013375.","DOI":"10.1145\/318371.318612"},{"key":"5110823_CR13","first-page":"345","volume-title":"Advances in Neural Information Processing Systems","author":"T. Jaakkola","year":"1995","unstructured":"Jaakkola, T., Singh, S. P., and Jordan, M. I. 1995. Reinforcement learning algorithm for partially observable Markov decision problems. Advances in Neural Information Processing Systems. Vol. 7, San Francisco, CA: Morgan Kaufman, pp. 345\u2013352."},{"key":"5110823_CR14","unstructured":"Kimura, H., Miyazaki, K., and Kobayashi, S. (1997) Reinforcement learning in POMDPs with function approximation. In D. H. Fisher (editor), Proceedings of the 14th International Conference on Machine Learning, pp. 152\u2013160."},{"key":"5110823_CR15","unstructured":"Marbach, P. 1998. Simulation-based optimization of Markov decision processes. Ph.D. Thesis, Department of Electrical Engineering and Computer Science, Massachusetts Institute of Technology, MA."},{"issue":"2","key":"5110823_CR16","doi-asserted-by":"crossref","first-page":"191","DOI":"10.1109\/9.905687","volume":"46","author":"P. Marbach","year":"2001","unstructured":"Marbach, P., and Tsitsiklis, J. N. (2001) Simulation-based optimization of Markov reward processes. IEEE Transactions on Automatic Control 46(2): 191\u2013209.","journal-title":"IEEE Transactions on Automatic Control"},{"key":"5110823_CR17","doi-asserted-by":"crossref","unstructured":"Marbach, P. and Tsitsiklis, J. N. (1999) Simulation-based optimization of Markov reward processes: Implementation issues. Proceedings of the 38th IEEE Conference on Decision and Control, Phoenix, Arizona, pp. 1769\u20131774, December.","DOI":"10.1109\/CDC.1999.830889"},{"key":"5110823_CR18","first-page":"1","volume-title":"Neural Networks for Signal Processing","author":"V. Tresp","year":"1995","unstructured":"Tresp, V., and Hofmann, R. 1995. Missing and Noisy Data in Nonlinear Time-Series Prediction. In Neural Networks for Signal Processing, S. F. Girosi, J. Mahoul, E. Manolakos and E. Wilson (editors), New York: IEEE Signal Processing Society, pp. 1\u201310."},{"key":"5110823_CR19","doi-asserted-by":"crossref","first-page":"229","DOI":"10.1023\/A:1022672621406","volume":"8","author":"R. J. Williams","year":"1992","unstructured":"Williams, R. J. 1992. Simple statistical gradient-following algorithms for connectionist reinforcement learning. Machine Learning 8: 229\u2013256.","journal-title":"Machine Learning"}],"container-title":["Discrete Event Dynamic Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1022145020786.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1023\/A:1022145020786\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1022145020786.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,29]],"date-time":"2025-07-29T04:00:11Z","timestamp":1753761611000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1023\/A:1022145020786"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2003,1]]},"references-count":19,"journal-issue":{"issue":"1-2","published-print":{"date-parts":[[2003,1]]}},"alternative-id":["5110823"],"URL":"https:\/\/doi.org\/10.1023\/a:1022145020786","relation":{},"ISSN":["0924-6703","1573-7594"],"issn-type":[{"type":"print","value":"0924-6703"},{"type":"electronic","value":"1573-7594"}],"subject":[],"published":{"date-parts":[[2003,1]]}}}