{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,7]],"date-time":"2025-10-07T14:30:01Z","timestamp":1759847401775},"reference-count":12,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2009,7,28]],"date-time":"2009-07-28T00:00:00Z","timestamp":1248739200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Discrete Event Dyn Syst"],"published-print":{"date-parts":[[2010,3]]},"DOI":"10.1007\/s10626-009-0078-3","type":"journal-article","created":{"date-parts":[[2009,7,27]],"date-time":"2009-07-27T09:25:58Z","timestamp":1248686758000},"page":"3-17","source":"Crossref","is-referenced-by-count":4,"title":["On-Line Policy Gradient Estimation with Multi-Step Sampling"],"prefix":"10.1007","volume":"20","author":[{"given":"Yan-Jie","family":"Li","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fang","family":"Cao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xi-Ren","family":"Cao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2009,7,28]]},"reference":[{"key":"78_CR1","doi-asserted-by":"crossref","first-page":"319","DOI":"10.1613\/jair.806","volume":"15","author":"J Baxter","year":"2001","unstructured":"Baxter J, Bartlett PL (2001) Infinite-horizon policy-gradient estimation. J Artif Intell Res 15:319\u2013350","journal-title":"J Artif Intell Res"},{"key":"78_CR2","doi-asserted-by":"crossref","first-page":"351","DOI":"10.1613\/jair.807","volume":"15","author":"J Baxter","year":"2001","unstructured":"Baxter J, Bartlett PL, Weaver L (2001) Experiments with infinite-horizon policy-gradient estimation. J Artif Intell Res 15:351\u2013381","journal-title":"J Artif Intell Res"},{"key":"78_CR3","volume-title":"Dynamic programming and optimal control, vols I and II","author":"DP Bertsekas","year":"1995","unstructured":"Bertsekas DP (1995) Dynamic programming and optimal control, vols I and II. Athena Scientific, Belmont"},{"issue":"5","key":"78_CR4","doi-asserted-by":"crossref","first-page":"696","DOI":"10.1109\/TAC.2005.847037","volume":"50","author":"XR Cao","year":"2005","unstructured":"Cao XR (2005) A basic formula for online policy gradient algorithms. IEEE Trans Automat Contr 50(5):696\u2013699","journal-title":"IEEE Trans Automat Contr"},{"key":"78_CR5","volume-title":"Stochastic learning and optimization: a sensitivity-based approach","author":"XR Cao","year":"2007","unstructured":"Cao XR (2007) Stochastic learning and optimization: a sensitivity-based approach. Springer, New York"},{"issue":"10","key":"78_CR6","doi-asserted-by":"crossref","first-page":"1382","DOI":"10.1109\/9.633827","volume":"42","author":"XR Cao","year":"1997","unstructured":"Cao XR, Chen HF (1997) Perturbation realization, potentials and sensitivity analysis of Markov processes. IEEE Trans Automat Contr 42(10):1382\u20131393","journal-title":"IEEE Trans Automat Contr"},{"issue":"4","key":"78_CR7","doi-asserted-by":"crossref","first-page":"482","DOI":"10.1109\/87.701341","volume":"6","author":"XR Cao","year":"1998","unstructured":"Cao XR, Wan YW (1998) Algorithms for sensitivity analysis of Markov systems through potentials and perturbation realization. IEEE Trans Control Syst Technol 6(4):482\u2013494","journal-title":"IEEE Trans Control Syst Technol"},{"key":"78_CR8","volume-title":"Introduction to stochastic processes","author":"E Cinlar","year":"1975","unstructured":"Cinlar E (1975) Introduction to stochastic processes. Prentice Hall, Englewood Cliffs"},{"key":"78_CR9","volume-title":"Introduction to algorithms","author":"TH Cormen","year":"2001","unstructured":"Cormen TH, Leiserson CE, Rivest RL, Stein C (2001) Introduction to algorithms, 2nd edn. MIT and McGraw-Hill, Cambridge","edition":"2"},{"key":"78_CR10","first-page":"1471","volume":"5","author":"E Greensmith","year":"2004","unstructured":"Greensmith E, Bartlett PL, Baxter J (2004) Variance reduction techniques for gradient estimates in reinforcement learning. J Mach Learn Res 5:1471\u20131530","journal-title":"J Mach Learn Res"},{"issue":"2","key":"78_CR11","doi-asserted-by":"crossref","first-page":"191","DOI":"10.1109\/9.905687","volume":"46","author":"P Marbach","year":"2001","unstructured":"Marbach P, Tsitsiklis JN (2001) Simulation-based optimization of Markov reward processes. IEEE Trans Automat Contr 46(2):191\u2013209","journal-title":"IEEE Trans Automat Contr"},{"key":"78_CR12","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","volume-title":"Markov decision processes: discrete stochastic dynamic programming","author":"ML Puterman","year":"1994","unstructured":"Puterman ML (1994) Markov decision processes: discrete stochastic dynamic programming. Wiley, New York"}],"container-title":["Discrete Event Dynamic Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10626-009-0078-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10626-009-0078-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10626-009-0078-3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,30]],"date-time":"2019-05-30T19:58:52Z","timestamp":1559246332000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10626-009-0078-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,7,28]]},"references-count":12,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2010,3]]}},"alternative-id":["78"],"URL":"https:\/\/doi.org\/10.1007\/s10626-009-0078-3","relation":{},"ISSN":["0924-6703","1573-7594"],"issn-type":[{"value":"0924-6703","type":"print"},{"value":"1573-7594","type":"electronic"}],"subject":[],"published":{"date-parts":[[2009,7,28]]}}}