{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:28:45Z","timestamp":1750220925057,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":20,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,5,17]],"date-time":"2019-05-17T00:00:00Z","timestamp":1558051200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,5,17]]},"DOI":"10.1145\/3321408.3323078","type":"proceedings-article","created":{"date-parts":[[2019,7,19]],"date-time":"2019-07-19T13:17:21Z","timestamp":1563542241000},"page":"1-6","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Second-order multi-armed bandit learning for online optimization in communication and networks"],"prefix":"10.1145","author":[{"given":"Zhiyong","family":"Du","sequence":"first","affiliation":[{"name":"National University of Defense Technology, China"}]},{"given":"Bin","family":"Jiang","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, China"}]},{"given":"Kun","family":"Xu","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, China"}]},{"given":"Shengyun","family":"Wei","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, China"}]},{"given":"Shengqing","family":"Wang","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, China"}]},{"given":"Huatao","family":"Zhu","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, China"}]}],"member":"320","published-online":{"date-parts":[[2019,5,17]]},"reference":[{"volume-title":"Using Confidence Bounds for Exploitation-exploration Trade-offs. J. Mach. Learn. Res. 3 (March","year":"2003","author":"Auer P.","key":"e_1_3_2_1_1_1"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1013689704352"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2014.2363139"},{"key":"e_1_3_2_1_4_1","unstructured":"Z. Du et al. 2019. Second-Order Reinforcement Learning for End-to-End Path Selection with QoE Dynamics. https:\/\/arxiv.org\/ (2019).  Z. Du et al. 2019. Second-Order Reinforcement Learning for End-to-End Path Selection with QoE Dynamics. https:\/\/arxiv.org\/ (2019)."},{"volume-title":"Decentralized Online Learning Algorithms for Opportunistic Spectrum Access. In 2011 IEEE Global Telecommunications Conference - GLOBECOM","year":"2011","author":"Gai Y.","key":"e_1_3_2_1_5_1"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAP.2013.2276414"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2018.2856302"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2017.2727878"},{"volume-title":"2011 Information Theory and Applications Workshop. 1--7.","author":"Liu H.","key":"e_1_3_2_1_9_1"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2010.2062509"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2018.2798164"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"R. S. Sutton and A. G. Barto. 1998. Reinforcement learning: An Introduction. MIT Press.   R. S. Sutton and A. G. Barto. 1998. Reinforcement learning: An Introduction. MIT Press.","DOI":"10.1109\/TNN.1998.712192"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1016\/0196-8858(85)90002-8"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2012.2198613"},{"volume-title":"Online Learning in Opportunistic Spectrum Access: A Restless Bandit Approach. In IEEE International Conference on Computer Communications (INFOCOM). 2462--2470","author":"Tekin C.","key":"e_1_3_2_1_15_1"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2013.33"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2013.2263494"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2015.2395457"},{"key":"e_1_3_2_1_19_1","unstructured":"J. Yu and S. Mannor. 2011. Unimodal Bandits. In ICML.   J. Yu and S. Mannor. 2011. Unimodal Bandits. In ICML."},{"key":"e_1_3_2_1_20_1","first-page":"1","article-title":"Virtual Wireless User: A Practical Design for Parallel MultiConnect Using WiFi Direct in Group Communication","volume":"2013","author":"Zhanikeev M.","year":"2013","journal-title":"Mobile and Ubiquitous Systems: Computing, Networking, and Services. MobiQuitous"}],"event":{"name":"ACM TURC 2019: ACM Turing Celebration Conference - China","acronym":"ACM TURC 2019","location":"Chengdu China"},"container-title":["Proceedings of the ACM Turing Celebration Conference - China"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3321408.3323078","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3321408.3323078","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:53:12Z","timestamp":1750204392000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3321408.3323078"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,5,17]]},"references-count":20,"alternative-id":["10.1145\/3321408.3323078","10.1145\/3321408"],"URL":"https:\/\/doi.org\/10.1145\/3321408.3323078","relation":{},"subject":[],"published":{"date-parts":[[2019,5,17]]},"assertion":[{"value":"2019-05-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}