{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,15]],"date-time":"2026-04-15T17:10:38Z","timestamp":1776273038055,"version":"3.50.1"},"reference-count":19,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2010,9]]},"DOI":"10.1109\/allerton.2010.5707071","type":"proceedings-article","created":{"date-parts":[[2011,2,3]],"date-time":"2011-02-03T21:50:52Z","timestamp":1296769852000},"page":"1355-1359","source":"Crossref","is-referenced-by-count":7,"title":["Finite horizon Markov control with one-step variance penalties"],"prefix":"10.1109","author":[{"given":"Abhijit","family":"Gosavi","sequence":"first","affiliation":[]}],"member":"263","reference":[{"key":"ref10","first-page":"77","article-title":"Portfolio selection","volume":"7","author":"markowitz","year":"1952","journal-title":"Journal of Finance"},{"key":"ref11","author":"bertsekas","year":"2000","journal-title":"Dynamic Programming and Optimal Control"},{"key":"ref12","author":"bertsekas","year":"1996","journal-title":"Neuro-Dynamic Programming"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6911(97)90015-3"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1023\/B:MACH.0000019802.64038.6c"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012997331639"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1287\/trsc.33.2.233"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1080\/07408170208928908"},{"key":"ref18","article-title":"A learning rate analysis of reinforcement learning algorithms in finite horizon","author":"garcia","year":"1998","journal-title":"Proceedings of the 15th International Conference on Machine Learning"},{"key":"ref19","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-84628-690-2","author":"chang","year":"2007","journal-title":"Simulation-Based Algorithms for Markov Decision Processes"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.2307\/3213832"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1287\/moor.14.1.147"},{"key":"ref6","article-title":"Value iteration on two time-scales for variance-penalized Markov control","author":"gosavi","year":"2010","journal-title":"Proc Proceedings of the 2010 Industrial Engineering Research Conference"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/BF00938524"},{"key":"ref8","article-title":"Learning from delayed rewards","author":"watkins","year":"1989","journal-title":"Ph D Dissertation"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2006.02.006"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1287\/moor.16.3.580"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/BF01539805"},{"key":"ref9","doi-asserted-by":"crossref","first-page":"16","DOI":"10.1080\/10429247.2009.11431841","volume":"21","author":"bahill","year":"2009","journal-title":"Engineering Management Journal"}],"event":{"name":"2010 48th Annual Allerton Conference on Communication, Control, and Computing (Allerton)","location":"Monticello, IL, USA","start":{"date-parts":[[2010,9,29]]},"end":{"date-parts":[[2010,10,1]]}},"container-title":["2010 48th Annual Allerton Conference on Communication, Control, and Computing (Allerton)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/5701578\/5706874\/05707071.pdf?arnumber=5707071","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,19]],"date-time":"2017-06-19T19:22:22Z","timestamp":1497900142000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/5707071\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,9]]},"references-count":19,"URL":"https:\/\/doi.org\/10.1109\/allerton.2010.5707071","relation":{},"subject":[],"published":{"date-parts":[[2010,9]]}}}