{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T05:23:50Z","timestamp":1778045030453,"version":"3.51.4"},"reference-count":35,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,9,1]],"date-time":"2019-09-01T00:00:00Z","timestamp":1567296000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,9,1]],"date-time":"2019-09-01T00:00:00Z","timestamp":1567296000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,9]]},"DOI":"10.1109\/allerton.2019.8919665","type":"proceedings-article","created":{"date-parts":[[2019,12,6]],"date-time":"2019-12-06T04:23:39Z","timestamp":1575606219000},"page":"663-670","source":"Crossref","is-referenced-by-count":19,"title":["Reinforcement Learning for Optimal Control of Queueing Systems"],"prefix":"10.1109","author":[{"given":"Bai","family":"Liu","sequence":"first","affiliation":[{"name":"Massachusetts Institute of Technology,Laboratory for Information and Decision Systems,Cambridge,MA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qiaomin","family":"Xie","sequence":"additional","affiliation":[{"name":"Cornell University,School of Operations Research and Information Engineering,Ithaca,NY"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Eytan","family":"Modiano","sequence":"additional","affiliation":[{"name":"Massachusetts Institute of Technology,Laboratory for Information and Decision Systems,Cambridge,MA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref33","volume":"1","author":"bertsekas","year":"2017","journal-title":"Dynamic Programming and Optimal Control"},{"key":"ref32","author":"weissman","year":"2003","journal-title":"Inequalities for the L1 deviation of the empirical distribution"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2006.890695"},{"key":"ref30","author":"bertsimas","year":"1998","journal-title":"Geometric Bounds for Stationary Distributions of Infinite Markov Chains Via Lyapunov Functions"},{"key":"ref35","author":"hou","year":"2012","journal-title":"Homogeneous Denumerable Markov Processes"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4757-3124-8_5"},{"key":"ref10","article-title":"A distributed algorithm for throughput optimal routing in overlay networks","author":"rai","year":"2016","journal-title":"arXiv preprint arXiv 1612 05537"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ISSNIP.2010.5706760"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2016.2525518"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2019.8737528"},{"key":"ref14","article-title":"Posterior sampling for large scale reinforcement learning","author":"theocharous","year":"2017","journal-title":"arXiv preprint arXiv 1711 03890"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/GLOCOM.2017.8254101"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TCCN.2018.2809722"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/18.212277"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/0167-6911(85)90037-4"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.2307\/1427064"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2015.2448107"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1007\/BF00992698","article-title":"Q-learning","volume":"8","author":"watkins","year":"1992","journal-title":"Machine Learning"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2013.2246869"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2007.900405"},{"key":"ref6","first-page":"1563","article-title":"Near-optimal regret bounds for reinforcement learning","volume":"11","author":"jaksch","year":"2010","journal-title":"Journal of Machine Learning Research"},{"key":"ref29","author":"meyn","year":"2012","journal-title":"Markov Chains and Stochastic Stability"},{"key":"ref5","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"2000","journal-title":"Advances in neural information processing systems"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"2554","DOI":"10.1109\/INFCOM.2005.1498540","article-title":"Topology aware overlay networks","volume":"4","author":"han","year":"2005","journal-title":"Proceedings IEEE 24th Annual Joint Conference of the IEEE Computer and Communications Societies"},{"key":"ref7","first-page":"3003","article-title":"(more) efficient reinforcement learning via posterior sampling","author":"osband","year":"2013","journal-title":"Advances in neural information processing systems"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.1990.204000"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2003.818782"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1002\/9781118909690.ch16"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1214\/aoap\/1177005200"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/9.310033"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1287\/moor.22.4.921"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1214\/aoap\/1015345407"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/9.341782"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1287\/opre.1040.0170"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2010.5717235"}],"event":{"name":"2019 57th Annual Allerton Conference on Communication, Control, and Computing (Allerton)","location":"Monticello, IL, USA","start":{"date-parts":[[2019,9,24]]},"end":{"date-parts":[[2019,9,27]]}},"container-title":["2019 57th Annual Allerton Conference on Communication, Control, and Computing (Allerton)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8911248\/8919648\/08919665.pdf?arnumber=8919665","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T19:51:45Z","timestamp":1774986705000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8919665\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,9]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/allerton.2019.8919665","relation":{},"subject":[],"published":{"date-parts":[[2019,9]]}}}