{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,6]],"date-time":"2026-01-06T15:35:56Z","timestamp":1767713756709},"reference-count":32,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2008,11,1]],"date-time":"2008-11-01T00:00:00Z","timestamp":1225497600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Discrete Event Dyn Syst"],"published-print":{"date-parts":[[2009,3]]},"DOI":"10.1007\/s10626-008-0055-2","type":"journal-article","created":{"date-parts":[[2008,10,31]],"date-time":"2008-10-31T12:10:31Z","timestamp":1225455031000},"page":"91-113","source":"Crossref","is-referenced-by-count":5,"title":["A New Learning Algorithm for Optimal Stopping"],"prefix":"10.1007","volume":"19","author":[{"given":"Vivek S.","family":"Borkar","sequence":"first","affiliation":[]},{"given":"Jervis","family":"Pinto","sequence":"additional","affiliation":[]},{"given":"Tarun","family":"Prabhu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2008,11,1]]},"reference":[{"key":"55_CR1","doi-asserted-by":"crossref","first-page":"489","DOI":"10.1287\/opre.1060.0291","volume":"54","author":"TKI Ahamed","year":"2006","unstructured":"Ahamed TKI, Borkar VS, Juneja S (2006) Adaptive importance sampling for Markov chains using stochastic approximation. Oper Res 54:489\u2013504","journal-title":"Oper Res"},{"key":"55_CR2","doi-asserted-by":"crossref","first-page":"1222","DOI":"10.1287\/mnsc.1040.0258","volume":"50","author":"L Andersen","year":"2004","unstructured":"Andersen L, Broadie M (2004) Primal-dual simulation algorithm for pricing multidimensional American options. Manage Sci 50:1222\u20131234","journal-title":"Manage Sci"},{"key":"55_CR3","doi-asserted-by":"crossref","first-page":"784","DOI":"10.1016\/j.sysconle.2008.03.003","volume":"57","author":"K Barman","year":"2008","unstructured":"Barman K, Borkar VS (2008) A note on linear function approximation using random projections. Syst Control Lett 57:784\u2013786","journal-title":"Syst Control Lett"},{"key":"55_CR4","volume-title":"Stochastic control by functional analysis methods","author":"A Bensoussan","year":"1982","unstructured":"Bensoussan A (1982) Stochastic control by functional analysis methods. North Holland, Amsterdam"},{"key":"55_CR5","volume-title":"Nonlinear programming","author":"DP Bertsekas","year":"1999","unstructured":"Bertsekas DP (1999) Nonlinear programming, 2nd edn. Athena Scientific, Belmont","edition":"2"},{"key":"55_CR6","volume-title":"Dynamic programming and optimal control, vol 1","author":"DP Bertsekas","year":"2005","unstructured":"Bertsekas DP (2005) Dynamic programming and optimal control, vol 1, 3rd edn. Athena Scientific, Belmont","edition":"3"},{"key":"55_CR7","volume-title":"Neuro-dynamic programming","author":"DP Bertsekas","year":"1996","unstructured":"Bertsekas DP, Tsitsiklis JN (1996) Neuro-dynamic programming. Athena Scientific, Belmont"},{"key":"55_CR8","first-page":"604","volume-title":"Proceedings of the 2004 winter simulation conference","author":"N Bolia","year":"2004","unstructured":"Bolia N, Glasserman P, Juneja S (2004) Function-approximation-based importance sampling for pricing American options. In: Proceedings of the 2004 winter simulation conference. IEEE, New York, pp 604\u2013611"},{"key":"55_CR9","doi-asserted-by":"crossref","first-page":"291","DOI":"10.1016\/S0167-6911(97)90015-3","volume":"29","author":"VS Borkar","year":"1997","unstructured":"Borkar VS (1997) Stochastic approximation with two time scales. Syst Control Lett 29:291\u2013294","journal-title":"Syst Control Lett"},{"key":"55_CR10","doi-asserted-by":"crossref","first-page":"207","DOI":"10.1016\/j.sysconle.2004.08.007","volume":"54","author":"VS Borkar","year":"2005","unstructured":"Borkar VS (2005) An actor-critic algorithm for constrained Markov decision processes. Syst Control Lett 54:207\u2013213","journal-title":"Syst Control Lett"},{"key":"55_CR11","doi-asserted-by":"crossref","DOI":"10.1007\/978-93-86279-38-5","volume-title":"Stochastic approximation: a dynamical systems view","author":"VS Borkar","year":"2008","unstructured":"Borkar VS (2008) Stochastic approximation: a dynamical systems view. Hindustan Publ. Co., New Delhi, India and Cambridge Uni. Press, Cambridge, UK"},{"key":"55_CR12","doi-asserted-by":"crossref","first-page":"447","DOI":"10.1137\/S0363012997331639","volume":"38","author":"VS Borkar","year":"2000","unstructured":"Borkar VS, Meyn SP (2000) The O.D.E. method for convergence of stochastic approximation and reinforcement learning. SIAM J Control Optim 38:447\u2013469","journal-title":"SIAM J Control Optim"},{"key":"55_CR13","doi-asserted-by":"crossref","first-page":"1965","DOI":"10.1137\/S0363012900377663","volume":"40","author":"MJ Cho","year":"2002","unstructured":"Cho MJ, Stockbridge RH (2002) Linear programming formulation for optimal stopping problems. SIAM J Control Optim 40:1965\u20131982","journal-title":"SIAM J Control Optim"},{"key":"55_CR14","doi-asserted-by":"crossref","first-page":"207","DOI":"10.1007\/s10626-006-8134-8","volume":"16","author":"D Choi","year":"2006","unstructured":"Choi D, Van Roy B (2006) A generalized Kalman filter for fixed point approximation and efficient temporal difference learning. Disc Event Dyn Syst 16:207\u2013239","journal-title":"Disc Event Dyn Syst"},{"key":"55_CR15","doi-asserted-by":"crossref","first-page":"850","DOI":"10.1287\/opre.51.6.850.24925","volume":"51","author":"DP Farias De","year":"2003","unstructured":"De Farias DP, Van Roy B (2003) The linear programming approach to approximate dynamic programming. Oper Res 51:850\u2013865","journal-title":"Oper Res"},{"key":"55_CR16","doi-asserted-by":"crossref","first-page":"462","DOI":"10.1287\/moor.1040.0094","volume":"29","author":"DP Farias De","year":"2004","unstructured":"De Farias DP, Van Roy B (2004) On constraint sampling in the linear programming approach to approximate dynamic programming. Math Oper Res 29:462\u2013478","journal-title":"Math Oper Res"},{"key":"55_CR17","first-page":"238","volume":"150","author":"EB Dynkin","year":"1963","unstructured":"Dynkin EB (1963) The optimum choice of the instant of stopping a Markov process. Dokl Acad Nauk SSSR 150:238\u2013240 (in Russian; English translation in Sov Math Dokl 4:627\u2013629).","journal-title":"Sov Math Dokl"},{"key":"55_CR18","doi-asserted-by":"crossref","DOI":"10.1007\/978-0-387-21617-1","volume-title":"Monte Carlo methods in financial engineering","author":"P Glasserman","year":"2003","unstructured":"Glasserman P (2003) Monte Carlo methods in financial engineering. Springer, New York"},{"key":"55_CR19","doi-asserted-by":"crossref","first-page":"258","DOI":"10.1287\/opre.1030.0070","volume":"52","author":"MB Haugh","year":"2004","unstructured":"Haugh MB, Kogan L (2004) Pricing American options: a duality approach. Oper Res 52:258\u2013270","journal-title":"Oper Res"},{"key":"55_CR20","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4612-0729-0","volume-title":"Discrete-time Markov control processes","author":"O Hern\u00e1ndez-Lerma","year":"1996","unstructured":"Hern\u00e1ndez-Lerma O, Lasserre J-B (1996) Discrete-time Markov control processes. Springer, New York"},{"key":"55_CR21","doi-asserted-by":"crossref","first-page":"331","DOI":"10.1016\/0893-6080(89)90018-X","volume":"2","author":"MW Hirsch","year":"1989","unstructured":"Hirsch MW (1989) Convergent activation dynamics in continuous time networks. Neural Netw 2:331\u2013349","journal-title":"Neural Netw"},{"key":"55_CR22","doi-asserted-by":"crossref","first-page":"1143","DOI":"10.1137\/S0363012901385691","volume":"42","author":"VR Konda","year":"2003","unstructured":"Konda VR, Tsitsiklis JN (2003) On actor-critic algorithms. SIAM J Control Optim 42:1143\u20131166","journal-title":"SIAM J Control Optim"},{"key":"55_CR23","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4684-9352-8","volume-title":"Stochastic approximation for constrained and unconstrained systems","author":"HJ Kushner","year":"1978","unstructured":"Kushner HJ, Clark DS (1978) Stochastic approximation for constrained and unconstrained systems. Springer, New York"},{"key":"55_CR24","volume-title":"Optimization by vector space methods","author":"DG Luenberger","year":"1968","unstructured":"Luenberger DG (1968) Optimization by vector space methods. Wiley, New York"},{"key":"55_CR25","doi-asserted-by":"crossref","first-page":"113","DOI":"10.1093\/rfs\/14.1.113","volume":"14","author":"FA Longstaff","year":"2001","unstructured":"Longstaff FA, Schwartz ES (2001) Valuing American options by simulation: a simple least-square approach. Rev Financ Stud 14:113\u2013147","journal-title":"Rev Financ Stud"},{"key":"55_CR26","volume-title":"Microeconomic theory","author":"A Mas-Colell","year":"1995","unstructured":"Mas-Colell A, Whinston MD, Green JR (1995) Microeconomic theory. Oxford University Press, Oxford"},{"key":"55_CR27","doi-asserted-by":"crossref","first-page":"271","DOI":"10.1111\/1467-9965.02010","volume":"12","author":"LCG Rogers","year":"2002","unstructured":"Rogers LCG (2002) Monte Carlo valuations of American options. Math Finance 12:271\u2013286","journal-title":"Math Finance"},{"key":"55_CR28","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton RS, Barto AG (1998) Reinforcement learning: an introduction. MIT, Cambridge"},{"key":"55_CR29","doi-asserted-by":"crossref","unstructured":"Szepesvari C, Smart WD (2004) Interpolation-based Q-learning. In: Proc. of the 21st intl. conf. on machine learning. Banff, Alberta, pp 100\u2013108","DOI":"10.1145\/1015330.1015445"},{"key":"55_CR30","doi-asserted-by":"crossref","first-page":"1840","DOI":"10.1109\/9.793723","volume":"44","author":"JN Tsitsiklis","year":"1999","unstructured":"Tsitsiklis JN, Van Roy B (1999) Optimal stopping of Markov processes: Hilbert space theory, approximation algorithms, and an application to pricing high-dimensional financial derivatives. IEEE Trans Autom Control 44:1840\u20131851","journal-title":"IEEE Trans Autom Control"},{"issue":"4","key":"55_CR31","doi-asserted-by":"crossref","first-page":"694","DOI":"10.1109\/72.935083","volume":"12","author":"JN Tsitsiklis","year":"2001","unstructured":"Tsitsiklis JN, Van Roy B (2001) Regression methods for pricing complex American-style options. IEEE Trans Neural Netw 12(4):694\u2013703 (special issue on computational finance)","journal-title":"IEEE Trans Neural Netw"},{"key":"55_CR32","volume-title":"A least squares Q-learning algorithm for optimal stopping problems","author":"H Yu","year":"2007","unstructured":"Yu H, Bertsekas DP (2007) A least squares Q-learning algorithm for optimal stopping problems. Lab. for Information and Decision Systems Report 2731. MIT, Cambridge"}],"container-title":["Discrete Event Dynamic Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10626-008-0055-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10626-008-0055-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10626-008-0055-2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,30]],"date-time":"2019-05-30T15:58:52Z","timestamp":1559231932000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10626-008-0055-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2008,11,1]]},"references-count":32,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2009,3]]}},"alternative-id":["55"],"URL":"https:\/\/doi.org\/10.1007\/s10626-008-0055-2","relation":{},"ISSN":["0924-6703","1573-7594"],"issn-type":[{"value":"0924-6703","type":"print"},{"value":"1573-7594","type":"electronic"}],"subject":[],"published":{"date-parts":[[2008,11,1]]}}}