{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,2]],"date-time":"2025-11-02T16:52:33Z","timestamp":1762102353393,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":22,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,3,12]],"date-time":"2019-03-12T00:00:00Z","timestamp":1552348800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100002183","name":"Ministry of Electronics and Information technology","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002183","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Indo-French Centre for the Promotion of Advanced Research"},{"name":"DST, Government of India"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,3,12]]},"DOI":"10.1145\/3306309.3306321","type":"proceedings-article","created":{"date-parts":[[2019,2,25]],"date-time":"2019-02-25T13:32:31Z","timestamp":1551101551000},"page":"71-78","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["A Structure-aware Online Learning Algorithm for Markov Decision Processes"],"prefix":"10.1145","author":[{"given":"Arghyadip","family":"Roy","sequence":"first","affiliation":[{"name":"Dept. of Electrical Engineering, IIT Bombay, Mumbai, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vivek","family":"Borkar","sequence":"additional","affiliation":[{"name":"Dept. of Electrical Engineering, IIT Bombay, Mumbai, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Abhay","family":"Karandikar","sequence":"additional","affiliation":[{"name":"Dept. of Electrical Engineering, IIT Bombay, IIT Kanpur Kanpur, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Prasanna","family":"Chaporkar","sequence":"additional","affiliation":[{"name":"Dept. of Electrical Engineering, IIT Bombay, Mumbai, India"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2019,3,12]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012999361974"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2008.925856"},{"key":"e_1_3_2_1_3_1","volume-title":"An actor-critic algorithm for constrained Markov decision processes. Systems & control letters 54, 3","author":"Borkar Vivek S","year":"2005","unstructured":"Vivek S Borkar . 2005. An actor-critic algorithm for constrained Markov decision processes. Systems & control letters 54, 3 ( 2005 ), 207--213. Vivek S Borkar. 2005. An actor-critic algorithm for constrained Markov decision processes. Systems & control letters 54, 3 (2005), 207--213."},{"volume-title":"Stochastic approximation: A dynamical systems viewpoint","author":"Borkar Vivek S","key":"e_1_3_2_1_4_1","unstructured":"Vivek S Borkar . 2008. Stochastic approximation: A dynamical systems viewpoint . Cambridge University Press . Vivek S Borkar. 2008. Stochastic approximation: A dynamical systems viewpoint. Cambridge University Press."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/INDIANCC.2018.8307959"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012997331639"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11134-006-8307-z"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2012.2213850"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1137\/S036301299731669X"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1019177307418"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1287\/ijoc.1070.0240"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/9.905687"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCOMM.2009.08.070350"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2009.2027735"},{"volume-title":"Approximate Dynamic Programming: Solving the curses of dimensionality","author":"Powell Warren B","key":"e_1_3_2_1_15_1","unstructured":"Warren B Powell . 2007. Approximate Dynamic Programming: Solving the curses of dimensionality . Vol. 703 . John Wiley & Sons . Warren B Powell. 2007. Approximate Dynamic Programming: Solving the curses of dimensionality. Vol. 703. John Wiley & Sons."},{"volume-title":"Markov decision processes: discrete stochastic dynamic programming","author":"Puterman Martin L","key":"e_1_3_2_1_16_1","unstructured":"Martin L Puterman . 2014. Markov decision processes: discrete stochastic dynamic programming . John Wiley & Sons . Martin L Puterman. 2014. Markov decision processes: discrete stochastic dynamic programming. John Wiley & Sons."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2008.080514"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/NCC.2012.6176889"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.5555\/2778192.2778196"},{"volume-title":"Reinforcement learning: An introduction","author":"Sutton Richard S","key":"e_1_3_2_1_20_1","unstructured":"Richard S Sutton and Andrew G Barto . 1998. Reinforcement learning: An introduction . MIT press Cambridge . Richard S Sutton and Andrew G Barto. 1998. Reinforcement learning: An introduction. MIT press Cambridge."},{"key":"e_1_3_2_1_21_1","unstructured":"Richard S Sutton David A McAllester Satinder P Singh and Yishay Mansour. 2000. Policy gradient methods for reinforcement learning with function approximation. In Advances in neural information processing systems. 1057--1063.   Richard S Sutton David A McAllester Satinder P Singh and Yishay Mansour. 2000. Policy gradient methods for reinforcement learning with function approximation. In Advances in neural information processing systems. 1057--1063."},{"key":"e_1_3_2_1_22_1","volume-title":"Machine learning 8, 3--4","author":"Watkins Christopher JCH","year":"1992","unstructured":"Christopher JCH Watkins and Peter Dayan . 1992. Q-learning. Machine learning 8, 3--4 ( 1992 ), 279--292. Christopher JCH Watkins and Peter Dayan. 1992. Q-learning. Machine learning 8, 3--4 (1992), 279--292."}],"event":{"name":"VALUETOOLS 2019: 12th EAI International Conference on Performance Evaluation Methodologies and Tools","sponsor":["EAI The European Alliance for Innovation","Universitat de les Illes Balears Universitat de les Illes Balears"],"location":"Palma Spain","acronym":"VALUETOOLS 2019"},"container-title":["Proceedings of the 12th EAI International Conference on Performance Evaluation Methodologies and Tools"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3306309.3306321","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3306309.3306321","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T00:25:43Z","timestamp":1750206343000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3306309.3306321"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,3,12]]},"references-count":22,"alternative-id":["10.1145\/3306309.3306321","10.1145\/3306309"],"URL":"https:\/\/doi.org\/10.1145\/3306309.3306321","relation":{},"subject":[],"published":{"date-parts":[[2019,3,12]]},"assertion":[{"value":"2019-03-12","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}