{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T23:10:32Z","timestamp":1768432232945,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,10,3]],"date-time":"2022-10-03T00:00:00Z","timestamp":1664755200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2104880,2148309"],"award-info":[{"award-number":["2104880,2148309"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Department of Energy","award":["DE-EE0009341"],"award-info":[{"award-number":["DE-EE0009341"]}]},{"name":"SERB Grant","award":["SRG\/2021\/002308, PC 39010B"],"award-info":[{"award-number":["SRG\/2021\/002308, PC 39010B"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,10,3]]},"DOI":"10.1145\/3492866.3549726","type":"proceedings-article","created":{"date-parts":[[2022,9,21]],"date-time":"2022-09-21T16:34:33Z","timestamp":1663778073000},"page":"81-90","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["Index-aware reinforcement learning for adaptive video streaming at the wireless edge"],"prefix":"10.1145","author":[{"given":"Guojun","family":"Xiong","sequence":"first","affiliation":[{"name":"SUNY-Binghamton University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xudong","family":"Qin","sequence":"additional","affiliation":[{"name":"Pennsylvania State University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bin","family":"Li","sequence":"additional","affiliation":[{"name":"Pennsylvania State University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rahul","family":"Singh","sequence":"additional","affiliation":[{"name":"Indian Institute of Science"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jian","family":"Li","sequence":"additional","affiliation":[{"name":"SUNY-Binghamton University"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2022,10,3]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3230543.3230558"},{"key":"e_1_3_2_1_2_1","volume-title":"Constrained Markov Decision Processes","author":"Altman Eitan","unstructured":"Eitan Altman. 1999. Constrained Markov Decision Processes. Vol. 7. CRC Press."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1013689704352"},{"key":"e_1_3_2_1_4_1","volume-title":"Whittle Index Based Q-learning for Restless Bandits with Average Reward. arXiv preprint arXiv:2004.14427","author":"Avrachenkov Konstantin","year":"2020","unstructured":"Konstantin Avrachenkov and Vivek S Borkar. 2020. Whittle Index Based Q-learning for Restless Bandits with Average Reward. arXiv preprint arXiv:2004.14427 (2020)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2016.2533496"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3323679.3326523"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2013.2254896"},{"key":"e_1_3_2_1_8_1","volume-title":"Exploration-Exploitation in Constrained MDPs. arXiv preprint arXiv:2003.02189","author":"Efroni Yonathan","year":"2020","unstructured":"Yonathan Efroni, Shie Mannor, and Matteo Pirotta. 2020. Exploration-Exploitation in Constrained MDPs. arXiv preprint arXiv:2003.02189 (2020)."},{"key":"e_1_3_2_1_9_1","volume-title":"Towards Q-Learning the Whittle Index for Restless Bandits. In 2019 Australian & New Zealand Control Conference (ANZCC). IEEE, 249--254","author":"Fu Jing","year":"2019","unstructured":"Jing Fu, Yoni Nazarathy, Sarat Moka, and Peter G Taylor. 2019. Towards Q-Learning the Whittle Index for Restless Bandits. In 2019 Australian & New Zealand Control Conference (ANZCC). IEEE, 249--254."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2013.112613.121033"},{"key":"e_1_3_2_1_11_1","volume-title":"Proc. of COLT.","author":"Gopalan Aditya","year":"2015","unstructured":"Aditya Gopalan and Shie Mannor. 2015. Thompson Sampling for Learning Parameterized Markov Decision Processes. In Proc. of COLT."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2017.2756887"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1239\/aap\/1444308876"},{"key":"e_1_3_2_1_14_1","volume-title":"An Asymptotically Optimal Index Policy for Finite-Horizon Restless Bandits. arXiv preprint arXiv:1707.00205","author":"Hu Weici","year":"2017","unstructured":"Weici Hu and Peter Frazier. 2017. An Asymptotically Optimal Index Policy for Finite-Horizon Restless Bandits. arXiv preprint arXiv:1707.00205 (2017)."},{"key":"e_1_3_2_1_15_1","article-title":"Near-Optimal Regret Bounds for Reinforcement Learning","volume":"11","author":"Jaksch Thomas","year":"2010","unstructured":"Thomas Jaksch, Ronald Ortner, and Peter Auer. 2010. Near-Optimal Regret Bounds for Reinforcement Learning. Journal of Machine Learning Research 11, 4 (2010).","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_16_1","volume-title":"Learning Adversarial MDPs with Bandit Feedback and Unknown Transition. arXiv preprint arXiv:1912.01192","author":"Jin Chi","year":"2019","unstructured":"Chi Jin, Tiancheng Jin, Haipeng Luo, Suvrit Sra, and Tiancheng Yu. 2019. Learning Adversarial MDPs with Bandit Feedback and Unknown Transition. arXiv preprint arXiv:1912.01192 (2019)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i9.16979"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2017.2685630"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2020.2995944"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2155555.2155570"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2014.140405"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3098822.3098843"},{"key":"e_1_3_2_1_23_1","volume-title":"Empirical Bernstein Bounds and Sample Variance Penalization. arXiv preprint arXiv:0907.3740","author":"Maurer Andreas","year":"2009","unstructured":"Andreas Maurer and Massimiliano Pontil. 2009. Empirical Bernstein Bounds and Sample Variance Penalization. arXiv preprint arXiv:0907.3740 (2009)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11750-007-0025-0"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-34106-9_19"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/SCT.1994.315792"},{"key":"e_1_3_2_1_27_1","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"Puterman Martin L","unstructured":"Martin L Puterman. 1994. Markov Decision Processes: Discrete Stochastic Dynamic Programming. John Wiley & Sons."},{"key":"e_1_3_2_1_28_1","volume-title":"Proc. of ICML.","author":"Rosenberg Aviv","year":"2019","unstructured":"Aviv Rosenberg and Yishay Mansour. 2019. Online Convex Optimization in Adversarial Markov Decision Processes. In Proc. of ICML."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/MMUL.2011.71"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2020.2996964"},{"key":"e_1_3_2_1_31_1","volume-title":"Proc. of ACM MMSys.","author":"Stockhammer Thomas","year":"2011","unstructured":"Thomas Stockhammer. 2011. Dynamic Adaptive Streaming Over HTTP-Standards and Design Principles. In Proc. of ACM MMSys."},{"key":"e_1_3_2_1_32_1","volume-title":"Cisco Visual Networking Index: Global Mobile Data Traffic Forecast Update","author":"Systems Cisco","year":"2017","unstructured":"Cisco Systems. 2019. Cisco Visual Networking Index: Global Mobile Data Traffic Forecast Update, 2017--2022 White Paper. [Online.] Available: https:\/\/s3.amazonaws.com\/media.mediapost.com\/uploads\/CiscoForecast.pdf (2019)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2020.2980587"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/LCOMM.2016.2601087"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1214\/15-AAP1137"},{"key":"e_1_3_2_1_36_1","volume-title":"Proc. of NeurIPS.","author":"Wang Siwei","year":"2020","unstructured":"Siwei Wang, Longbo Huang, and John Lui. 2020. Restless-UCB, an Efficient and Low-complexity Algorithm for Online Restless Bandits. In Proc. of NeurIPS."},{"key":"e_1_3_2_1_37_1","volume-title":"On An Index Policy for Restless Bandits. Journal of Applied Probability","author":"Weber Richard R","year":"1990","unstructured":"Richard R Weber and Gideon Weiss. 1990. On An Index Policy for Restless Bandits. Journal of Applied Probability (1990), 637--648."},{"key":"e_1_3_2_1_38_1","volume-title":"Restless Bandits: Activity Allocation in A Changing World. Journal of Applied Probability","author":"Whittle Peter","year":"1988","unstructured":"Peter Whittle. 1988. Restless Bandits: Activity Allocation in A Changing World. Journal of Applied Probability (1988), 287--298."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i8.20852"},{"key":"e_1_3_2_1_40_1","volume-title":"Model-free Reinforcement Learning for Content Caching at the Wireless Edge via Restless Bandits. arXiv preprint arXiv:2202.13187","author":"Xiong Guojun","year":"2022","unstructured":"Guojun Xiong, Shufan Wang, Jian Li, and Rahul Singh. 2022. Model-free Reinforcement Learning for Content Caching at the Wireless Edge via Restless Bandits. arXiv preprint arXiv:2202.13187 (2022)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM48880.2022.9796809"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/2785956.2787486"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1017\/apr.2019.29"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2016.2522650"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3466772.3467030"}],"event":{"name":"MobiHoc '22: The Twenty-third International Symposium on Theory, Algorithmic Foundations, and Protocol Design for Mobile Networks and Mobile Computing","location":"Seoul Republic of Korea","acronym":"MobiHoc '22","sponsor":["SIGMOBILE ACM Special Interest Group on Mobility of Systems, Users, Data and Computing"]},"container-title":["Proceedings of the Twenty-Third International Symposium on Theory, Algorithmic Foundations, and Protocol Design for Mobile Networks and Mobile Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3492866.3549726","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3492866.3549726","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3492866.3549726","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:48:27Z","timestamp":1750193307000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3492866.3549726"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,3]]},"references-count":45,"alternative-id":["10.1145\/3492866.3549726","10.1145\/3492866"],"URL":"https:\/\/doi.org\/10.1145\/3492866.3549726","relation":{},"subject":[],"published":{"date-parts":[[2022,10,3]]},"assertion":[{"value":"2022-10-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}