{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:15:47Z","timestamp":1775229347003,"version":"3.50.1"},"reference-count":51,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"U.S. National Science Foundation","doi-asserted-by":"crossref","award":["CNS-2002902"],"award-info":[{"award-number":["CNS-2002902"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/100000001","name":"U.S. National Science Foundation","doi-asserted-by":"crossref","award":["CNS-2003131"],"award-info":[{"award-number":["CNS-2003131"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/100000001","name":"U.S. National Science Foundation","doi-asserted-by":"crossref","award":["ECCS-2029978"],"award-info":[{"award-number":["ECCS-2029978"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/100000001","name":"U.S. National Science Foundation","doi-asserted-by":"crossref","award":["ECCS-2030026"],"award-info":[{"award-number":["ECCS-2030026"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/100000001","name":"U.S. National Science Foundation","doi-asserted-by":"crossref","award":["ECCS-2143559"],"award-info":[{"award-number":["ECCS-2143559"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/100000001","name":"U.S. National Science Foundation","doi-asserted-by":"crossref","award":["SII-2132700"],"award-info":[{"award-number":["SII-2132700"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"crossref"}]},{"name":"U.S. NSF","award":["CNS-1956276"],"award-info":[{"award-number":["CNS-1956276"]}]},{"name":"U.S. NSF","award":["CNS-2003131"],"award-info":[{"award-number":["CNS-2003131"]}]},{"name":"U.S. NSF","award":["CNS-2030026"],"award-info":[{"award-number":["CNS-2030026"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Wireless Commun."],"published-print":{"date-parts":[[2024,10]]},"DOI":"10.1109\/twc.2024.3395624","type":"journal-article","created":{"date-parts":[[2024,5,10]],"date-time":"2024-05-10T17:32:17Z","timestamp":1715362337000},"page":"12703-12716","source":"Crossref","is-referenced-by-count":18,"title":["Offline Reinforcement Learning for Wireless Network Optimization With Mixture Datasets"],"prefix":"10.1109","volume":"23","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9714-4291","authenticated-orcid":false,"given":"Kun","family":"Yang","sequence":"first","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Virginia, Charlottesville, VA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2727-8251","authenticated-orcid":false,"given":"Chengshuai","family":"Shi","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Virginia, Charlottesville, VA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3148-4453","authenticated-orcid":false,"given":"Cong","family":"Shen","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Virginia, Charlottesville, VA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6009-864X","authenticated-orcid":false,"given":"Jing","family":"Yang","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering, The Pennsylvania State University, State College, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5255-2681","authenticated-orcid":false,"given":"Shu-Ping","family":"Yeh","sequence":"additional","affiliation":[{"name":"Intel Corporation, Santa Clara, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-6493-7710","authenticated-orcid":false,"given":"Jaroslaw J.","family":"Sydir","sequence":"additional","affiliation":[{"name":"Intel Corporation, Santa Clara, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/IEEECONF59524.2023.10477008"},{"key":"ref2","article-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems","author":"Levine","year":"2020","journal-title":"arXiv:2005.01643"},{"key":"ref3","volume-title":"Power control for a network of access points","author":"Nagaraja","year":"2017"},{"key":"ref4","volume-title":"Base station employing shared resources among antenna units","author":"Valliappan","year":"2016"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2018.2798164"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2019.2893168"},{"key":"ref7","article-title":"A deep Q-learning method for downlink power allocation in multi-cell networks","author":"Ishfaq Ahmed","year":"2019","journal-title":"arXiv:1904.13032"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICC.2019.8761431"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2937438"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/IEEECONF51394.2020.9443301"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOMWKSHPS50562.2020.9162925"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/GCWkshps52748.2021.9681985"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2933973"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/IEEECONF56349.2022.10051992"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2021.3051163"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/Allerton58177.2023.10313455"},{"key":"ref17","first-page":"2052","article-title":"Off-policy deep reinforcement learning without exploration","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","volume":"97","author":"Fujimoto"},{"key":"ref18","first-page":"1179","article-title":"Conservative Q-learning for offline reinforcement learning","volume-title":"Proc. Int. Conf. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Kumar"},{"key":"ref19","article-title":"Offline reinforcement learning with implicit Q-learning","author":"Kostrikov","year":"2021","journal-title":"arXiv:2110.06169"},{"key":"ref20","first-page":"11702","article-title":"Bridging offline reinforcement learning and imitation learning: A tale of pessimism","volume-title":"Proc. Int. Conf. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Rashidinejad"},{"key":"ref21","first-page":"27395","article-title":"Policy finetuning: Bridging sample-efficient offline and online reinforcement learning","volume-title":"Proc. Int. Conf. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Xie"},{"key":"ref22","article-title":"When should we prefer offline reinforcement learning over behavioral cloning?","author":"Kumar","year":"2022","journal-title":"arXiv:2204.05618"},{"key":"ref23","first-page":"1","article-title":"Deep exploration via bootstrapped DQN","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Osband"},{"key":"ref24","article-title":"UCB exploration via Q-ensembles","author":"Chen","year":"2017","journal-title":"arXiv:1706.01502"},{"key":"ref25","first-page":"6131","article-title":"Sunrise: A simple unified framework for ensemble learning in deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Lee"},{"key":"ref26","first-page":"1","article-title":"Double Q-learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"23","author":"Hasselt"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"ref28","article-title":"Randomized ensembled double Q-learning: Learning fast without a model","author":"Chen","year":"2021","journal-title":"arXiv:2101.05982"},{"key":"ref29","first-page":"7436","article-title":"Uncertainty-based offline reinforcement learning with diversified Q-ensemble","volume-title":"Proc. Int. Conf. Adv. Neural Inf. Process. Syst.","volume":"34","author":"An"},{"key":"ref30","first-page":"7513","article-title":"Offline RL policies should be trained to be adaptive","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Ghosh"},{"key":"ref31","article-title":"AWAC: Accelerating online reinforcement learning with offline datasets","author":"Nair","year":"2020","journal-title":"arXiv:2006.09359"},{"key":"ref32","first-page":"35300","article-title":"Leveraging offline data in online reinforcement learning","volume-title":"Proc. 40th Int. Conf. Mach. Learn.","author":"Wagenmaker"},{"key":"ref33","first-page":"1702","article-title":"Offline-to-online reinforcement learning via balanced replay and pessimistic Q-ensemble","volume-title":"Proc. Conf. Robot Learn.","author":"Lee"},{"key":"ref34","article-title":"Hybrid RL: Using both offline and online data can make RL efficient","author":"Song","year":"2022","journal-title":"arXiv:2210.06718"},{"key":"ref35","article-title":"Finetuning from offline reinforcement learning: Challenges, trade-offs and practical solutions","author":"Luo","year":"2023","journal-title":"arXiv:2303.17396"},{"key":"ref36","article-title":"Cal-QL: Calibrated offline RL pre-training for efficient online fine-tuning","author":"Nakamoto","year":"2023","journal-title":"arXiv:2303.05479"},{"key":"ref37","first-page":"1","article-title":"Fine-tuning offline policies with optimistic action selection","volume-title":"Proc. NIPS","author":"Mark"},{"key":"ref38","volume-title":"Simulation Assumptions and Parameters for FDD HeNB RF Requirements","year":"2009"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1812.05905"},{"key":"ref40","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2014.2328102"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1177\/1471082X14561155"},{"key":"ref43","article-title":"Revisiting the minimalist approach to offline reinforcement learning","author":"Tarasov","year":"2023","journal-title":"arXiv:2305.09836"},{"key":"ref44","first-page":"19527","article-title":"Near-optimal conservative exploration in reinforcement learning under episode-wise constraints","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Li"},{"key":"ref45","article-title":"D4RL: Datasets for deep data-driven reinforcement learning","author":"Fu","year":"2021","journal-title":"arXiv:2004.07219"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1214\/23-aos2342"},{"key":"ref47","first-page":"5084","article-title":"Is pessimism provably efficient for offline RL?","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Jin"},{"key":"ref48","first-page":"19967","article-title":"Pessimistic Q-learning for offline reinforcement learning: Towards optimal sample complexity","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Shi"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1287\/moor.2022.1309"},{"key":"ref50","article-title":"Advancing RAN slicing with offline reinforcement learning","author":"Yang","year":"2023","journal-title":"arXiv:2312.10547"},{"key":"ref51","first-page":"3909","article-title":"Q-transformer: Scalable offline reinforcement learning via autoregressive Q-functions","volume-title":"Proc. Conf. Robot Learn.","author":"Chebotar"}],"container-title":["IEEE Transactions on Wireless Communications"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/7693\/10713425\/10529190-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7693\/10713425\/10529190.pdf?arnumber=10529190","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,11]],"date-time":"2024-10-11T04:19:41Z","timestamp":1728620381000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10529190\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10]]},"references-count":51,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/twc.2024.3395624","relation":{},"ISSN":["1536-1276","1558-2248"],"issn-type":[{"value":"1536-1276","type":"print"},{"value":"1558-2248","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10]]}}}