{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T17:51:33Z","timestamp":1775065893406,"version":"3.50.1"},"reference-count":21,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2012,1,19]],"date-time":"2012-01-19T00:00:00Z","timestamp":1326931200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J Optim Theory Appl"],"published-print":{"date-parts":[[2012,6]]},"DOI":"10.1007\/s10957-012-9989-5","type":"journal-article","created":{"date-parts":[[2012,1,17]],"date-time":"2012-01-17T21:51:53Z","timestamp":1326837113000},"page":"688-708","source":"Crossref","is-referenced-by-count":43,"title":["An Online Actor\u2013Critic Algorithm with Function Approximation for Constrained Markov Decision Processes"],"prefix":"10.1007","volume":"153","author":[{"given":"Shalabh","family":"Bhatnagar","sequence":"first","affiliation":[]},{"given":"K.","family":"Lakshmanan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2012,1,19]]},"reference":[{"key":"9989_CR1","volume-title":"Constrained Markov Decision Processes","author":"E. Altman","year":"1999","unstructured":"Altman, E.: Constrained Markov Decision Processes. Chapman and Hall\/CRC Press, London (1999)"},{"key":"9989_CR2","volume-title":"Neuro-Dynamic Programming","author":"D.P. Bertsekas","year":"1996","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Neuro-Dynamic Programming. Athena Scientific, Belmont (1996)"},{"key":"9989_CR3","volume-title":"Reinforcement Learning: An Introduction","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"issue":"4","key":"9989_CR4","doi-asserted-by":"crossref","first-page":"1143","DOI":"10.1137\/S0363012901385691","volume":"42","author":"V.R. Konda","year":"2003","unstructured":"Konda, V.R., Tsitsiklis, J.N.: On actor\u2013critic algorithms. SIAM J. Control Optim. 42(4), 1143\u20131166 (2003)","journal-title":"SIAM J. Control Optim."},{"key":"9989_CR5","doi-asserted-by":"crossref","first-page":"2471","DOI":"10.1016\/j.automatica.2009.07.008","volume":"45","author":"S. Bhatnagar","year":"2009","unstructured":"Bhatnagar, S., Sutton, R.S., Ghavamzadeh, M., Lee, M.: Natural actor\u2013critic algorithms. Automatica 45, 2471\u20132482 (2009)","journal-title":"Automatica"},{"key":"9989_CR6","doi-asserted-by":"crossref","first-page":"1799","DOI":"10.1016\/S0005-1098(99)00099-0","volume":"35","author":"J.N. Tsitsiklis","year":"1999","unstructured":"Tsitsiklis, J.N., Van Roy, B.: Average cost temporal-difference learning. Automatica 35, 1799\u20131808 (1999)","journal-title":"Automatica"},{"key":"9989_CR7","first-page":"1057","volume-title":"Advances in Neural Information Processing Systems (NIPS)","author":"R.S. Sutton","year":"2000","unstructured":"Sutton, R.S., McAllester, D., Singh, S., Mansour, Y.: Policy gradient methods for reinforcement learning with function approximation. In: Advances in Neural Information Processing Systems (NIPS), vol. 12, pp. 1057\u20131063. MIT Press, Cambridge (2000)"},{"key":"9989_CR8","doi-asserted-by":"crossref","first-page":"191","DOI":"10.1109\/9.905687","volume":"46","author":"P. Marbach","year":"2001","unstructured":"Marbach, P., Tsitsiklis, J.N.: Simulation-based optimization of Markov reward processes. IEEE Trans. Autom. Control 46, 191\u2013209 (2001)","journal-title":"IEEE Trans. Autom. Control"},{"key":"9989_CR9","doi-asserted-by":"crossref","first-page":"1001","DOI":"10.1109\/TAC.1983.1103166","volume":"28","author":"A. Lazar","year":"1983","unstructured":"Lazar, A.: Optimal flow control of a class of queuing networks in equilibrium. IEEE Trans. Autom. Control 28, 1001\u20131007 (1983)","journal-title":"IEEE Trans. Autom. Control"},{"key":"9989_CR10","doi-asserted-by":"crossref","first-page":"760","DOI":"10.1016\/j.sysconle.2010.08.013","volume":"59","author":"S. Bhatnagar","year":"2010","unstructured":"Bhatnagar, S.: An actor\u2013critic algorithm with function approximation for discounted cost constrained Markov decision processes. Syst. Control Lett. 59, 760\u2013766 (2010)","journal-title":"Syst. Control Lett."},{"issue":"3","key":"9989_CR11","doi-asserted-by":"crossref","first-page":"332","DOI":"10.1109\/9.119632","volume":"37","author":"J.C. Spall","year":"1992","unstructured":"Spall, J.C.: Multivariate stochastic approximation using a simultaneous perturbation gradient approximation. IEEE Trans. Autom. Control 37(3), 332\u2013341 (1992)","journal-title":"IEEE Trans. Autom. Control"},{"key":"9989_CR12","doi-asserted-by":"crossref","first-page":"207","DOI":"10.1016\/j.sysconle.2004.08.007","volume":"54","author":"V.S. Borkar","year":"2005","unstructured":"Borkar, V.S.: An actor\u2013critic algorithm for constrained Markov decision processes. Syst. Control Lett. 54, 207\u2013213 (2005)","journal-title":"Syst. Control Lett."},{"key":"9989_CR13","volume-title":"An Introduction to Queueing Networks","author":"J. Walrand","year":"1988","unstructured":"Walrand, J.: An Introduction to Queueing Networks. Prentice Hall, New Jersey (1988)"},{"key":"9989_CR14","doi-asserted-by":"crossref","first-page":"472","DOI":"10.1016\/j.sysconle.2011.04.002","volume":"60","author":"S. Bhatnagar","year":"2011","unstructured":"Bhatnagar, S.: The Borkar-Meyn theorem for asynchronous stochastic approximations. Syst. Control Lett. 60, 472\u2013478 (2011)","journal-title":"Syst. Control Lett."},{"issue":"3","key":"9989_CR15","doi-asserted-by":"crossref","first-page":"840","DOI":"10.1137\/S0363012995282784","volume":"36","author":"V.S. Borkar","year":"1998","unstructured":"Borkar, V.S.: Asynchronous stochastic approximations. SIAM J. Control Optim. 36(3), 840\u2013851 (1998)","journal-title":"SIAM J. Control Optim."},{"key":"9989_CR16","doi-asserted-by":"crossref","DOI":"10.1007\/978-93-86279-38-5","volume-title":"Stochastic Approximation: A Dynamical Systems Viewpoint","author":"V.S. Borkar","year":"2008","unstructured":"Borkar, V.S.: Stochastic Approximation: A Dynamical Systems Viewpoint. Cambridge University Press and Hindustan Book Agency, Cambridge (2008)"},{"key":"9989_CR17","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-642-75894-2","volume-title":"Adaptive Algorithms and Stochastic Approximations","author":"A. Benveniste","year":"1990","unstructured":"Benveniste, A., M\u00e9tivier, M., Priouret, P.: Adaptive Algorithms and Stochastic Approximations. Springer, Berlin (1990)"},{"issue":"2","key":"9989_CR18","doi-asserted-by":"crossref","first-page":"447","DOI":"10.1137\/S0363012997331639","volume":"38","author":"V.S. Borkar","year":"2000","unstructured":"Borkar, V.S., Meyn, S.P.: The O.D.E. method for convergence of stochastic approximation and reinforcement learning. SIAM J. Control Optim. 38(2), 447\u2013469 (2000)","journal-title":"SIAM J. Control Optim."},{"key":"9989_CR19","doi-asserted-by":"crossref","first-page":"401","DOI":"10.2307\/3212261","volume":"5","author":"P.J. Schweitzer","year":"1968","unstructured":"Schweitzer, P.J.: Perturbation theory and finite Markov chains. J. Appl. Probab. 5, 401\u2013413 (1968)","journal-title":"J. Appl. Probab."},{"key":"9989_CR20","doi-asserted-by":"crossref","first-page":"331","DOI":"10.1016\/0893-6080(89)90018-X","volume":"2","author":"M.W. Hirsch","year":"1989","unstructured":"Hirsch, M.W.: Convergent activation dynamics in continuous time networks. Neural Netw. 2, 331\u2013349 (1989)","journal-title":"Neural Netw."},{"key":"9989_CR21","volume-title":"Microeconomic Theory","author":"A. Mas-Colell","year":"1995","unstructured":"Mas-Colell, A., Whinston, M.D., Green, J.R.: Microeconomic Theory. Oxford University Press, Oxford (1995)"}],"container-title":["Journal of Optimization Theory and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10957-012-9989-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10957-012-9989-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10957-012-9989-5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,22]],"date-time":"2019-06-22T12:25:10Z","timestamp":1561206310000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10957-012-9989-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,1,19]]},"references-count":21,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2012,6]]}},"alternative-id":["9989"],"URL":"https:\/\/doi.org\/10.1007\/s10957-012-9989-5","relation":{},"ISSN":["0022-3239","1573-2878"],"issn-type":[{"value":"0022-3239","type":"print"},{"value":"1573-2878","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012,1,19]]}}}