{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T16:36:48Z","timestamp":1777653408316,"version":"3.51.4"},"reference-count":165,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key Research and Development Program of China","award":["2019YFB180003400"],"award-info":[{"award-number":["2019YFB180003400"]}]},{"DOI":"10.13039\/501100002858","name":"China Postdoctoral Science Foundation","doi-asserted-by":"publisher","award":["2019M650587"],"award-info":[{"award-number":["2019M650587"]}],"id":[{"id":"10.13039\/501100002858","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61801036"],"award-info":[{"award-number":["61801036"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Nature Science Foundation of China","doi-asserted-by":"publisher","award":["U1936217"],"award-info":[{"award-number":["U1936217"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Nature Science Foundation of China","doi-asserted-by":"publisher","award":["61971267"],"award-info":[{"award-number":["61971267"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Nature Science Foundation of China","doi-asserted-by":"publisher","award":["61972223"],"award-info":[{"award-number":["61972223"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Nature Science Foundation of China","doi-asserted-by":"publisher","award":["61941117"],"award-info":[{"award-number":["61941117"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Nature Science Foundation of China","doi-asserted-by":"publisher","award":["61861136003"],"award-info":[{"award-number":["61861136003"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004826","name":"Beijing Natural Science Foundation","doi-asserted-by":"publisher","award":["L182038"],"award-info":[{"award-number":["L182038"]}],"id":[{"id":"10.13039\/501100004826","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100017582","name":"Beijing National Research Center for Information Science and Technology","doi-asserted-by":"publisher","award":["20031887521"],"award-info":[{"award-number":["20031887521"]}],"id":[{"id":"10.13039\/501100017582","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004147","name":"Research Fund of Tsinghua University\u2014Tencent Joint Laboratory for Internet Innovation Technology","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004147","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Internet Things J."],"published-print":{"date-parts":[[2021,1,1]]},"DOI":"10.1109\/jiot.2020.3025365","type":"journal-article","created":{"date-parts":[[2020,9,21]],"date-time":"2020-09-21T21:52:36Z","timestamp":1600725156000},"page":"85-111","source":"Crossref","is-referenced-by-count":38,"title":["Reinforcement Learning Meets Wireless Networks: A Layering Perspective"],"prefix":"10.1109","volume":"8","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5228-3984","authenticated-orcid":false,"given":"Yawen","family":"Chen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2399-2829","authenticated-orcid":false,"given":"Yu","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7464-893X","authenticated-orcid":false,"given":"Ming","family":"Zeng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1505-674X","authenticated-orcid":false,"given":"Umber","family":"Saleem","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhaoming","family":"Lu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2793-6696","authenticated-orcid":false,"given":"Xiangming","family":"Wen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Depeng","family":"Jin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6606-5822","authenticated-orcid":false,"given":"Zhu","family":"Han","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1388-7478","authenticated-orcid":false,"given":"Tao","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5617-1659","authenticated-orcid":false,"given":"Yong","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICC.2019.8761663"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICC.2019.8761888"},{"key":"ref33","first-page":"1008","article-title":"Actor&#x2013;critic algorithms","author":"konda","year":"2000","journal-title":"Proc NIPS"},{"key":"ref32","first-page":"1","article-title":"Deep recurrent Q-learning for partially observable MDPS","author":"hausknecht","year":"2015","journal-title":"Proc AAAI"},{"key":"ref31","first-page":"1","article-title":"Deep reinforcement learning with double Q-learning","author":"van hasselt","year":"2016","journal-title":"Proc AAAI"},{"key":"ref30","author":"watkins","year":"1989","journal-title":"Learning from delayed rewards"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2005.843547"},{"key":"ref36","author":"de vrieze","year":"2018","journal-title":"Cooperative multi-agent reinforcement learning for low-level wireless communication"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"ref34","first-page":"183","article-title":"Multi-agent reinforcement learning: An overview","author":"bu?oniu","year":"2010","journal-title":"Innovations in Multi-Agent Systems and Applications - 1"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/203330.203343"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.2200\/S00268ED1V01Y201005AIM009"},{"key":"ref29","volume":"37","author":"rummery","year":"1994","journal-title":"On-line Q-learning using connectionist systems"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2014.2352118"},{"key":"ref22","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/MCOM.001.1900196"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1287\/moor.12.2.262"},{"key":"ref23","author":"puterman","year":"2014","journal-title":"Markov Decision Processes Discrete Stochastic Dynamic Programming"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2019.2922668"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/BF00114726"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2933893"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/BF02055574"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TCOMM.2019.2926715"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/JSYST.2019.2933536"},{"key":"ref154","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2019.2957778"},{"key":"ref153","doi-asserted-by":"publisher","DOI":"10.1145\/3219752"},{"key":"ref156","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2019.2913695"},{"key":"ref155","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2933886"},{"key":"ref150","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2014.2387212"},{"key":"ref152","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2019.2924579"},{"key":"ref151","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240545"},{"key":"ref146","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2020.2981657"},{"key":"ref147","doi-asserted-by":"publisher","DOI":"10.1109\/TCOMM.2019.2955490"},{"key":"ref148","article-title":"Joint computing and caching in 5G-envisioned Internet of Vehicles: A deep reinforcement learning-based traffic control system","author":"ning","year":"2020","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"ref149","doi-asserted-by":"publisher","DOI":"10.1145\/3098822.3098843"},{"key":"ref59","doi-asserted-by":"crossref","first-page":"1204","DOI":"10.1109\/TMC.2017.2744620","article-title":"Intelligent spectrum management based on transfer actor&#x2013;critic learning for rateless transmissions in cognitive radio networks","volume":"17","author":"koushik","year":"2018","journal-title":"IEEE Trans Mobile Comput"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2019.2961405"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2018.2829773"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2018.2879433"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TCCN.2018.2809722"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2019.2945951"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2933973"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2020.2981320"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/WCNCW.2018.8369007"},{"key":"ref165","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2017.2751641"},{"key":"ref164","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2015.2477041"},{"key":"ref163","article-title":"Delay-aware VNF scheduling: A reinforcement learning approach with variable action set","author":"li","year":"2020","journal-title":"IEEE Trans Cogn Commun Netw"},{"key":"ref162","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2959182"},{"key":"ref161","doi-asserted-by":"publisher","DOI":"10.1109\/GLOBECOM38437.2019.9013214"},{"key":"ref160","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2959181"},{"key":"ref4","author":"calabrese","year":"2016","journal-title":"Learning radio resource management in 5G networks Framework opportunities and challenges"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/MCOM.2014.6736747"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref5","first-page":"665","article-title":"Reinforcement learning","volume":"15","author":"barto","year":"1998","journal-title":"A Bradford Book"},{"key":"ref159","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2959263"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2014.2320099"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/GLOCOM.2018.8647699"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/SURV.2012.100412.00017"},{"key":"ref157","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2019.2930073"},{"key":"ref158","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2019.2946797"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2016.2539923"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/LCOMM.2020.2974958"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ITA.2018.8503086"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/LWC.2020.2969167"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/TCOMM.2019.2961332"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/LCOMM.2018.2802902"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2020.2979446"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2018.8485876"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2018.2869244"},{"key":"ref127","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2019.2960033"},{"key":"ref126","first-page":"279","article-title":"Automatic quality of experience management for WLAN networks using multi-armed bandit","author":"moura","year":"2019","journal-title":"Proc IFIP\/IEEE Symp Integr Netw Service Manag (IM)"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1109\/ICNP.2019.8888034"},{"key":"ref124","first-page":"1","article-title":"A scalable SON coordination framework for 5G","author":"rojas","year":"2020","journal-title":"Proc IEEE\/IFIP Netw Oper Manage Symp (NOMS)"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2019.2913162"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/GLOBECOM38437.2019.9014270"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2019.2935201"},{"key":"ref129","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2015.2499271"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2020.2972274"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1109\/WCNC.2019.8885745"},{"key":"ref76","doi-asserted-by":"crossref","DOI":"10.1109\/TNSE.2020.2978856","article-title":"Deep reinforcement learning based resource management for multi-access edge computing in vehicular networks","author":"peng","year":"2020","journal-title":"IEEE Transactions on Network Science and Engineering"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/ICC.2019.8761349"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2015.2480421"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2019.2935450"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2019.2928811"},{"key":"ref133","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2020.2984038"},{"key":"ref134","doi-asserted-by":"publisher","DOI":"10.1109\/SAHCN.2016.7733018"},{"key":"ref78","author":"chu","year":"2018","journal-title":"Reinforcement learning based multi-access control and battery prediction with energy harvesting in IoT systems"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1080\/01969720590897224"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.4018\/IJMCMC.2018100103"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCB.2004.842418"},{"key":"ref136","doi-asserted-by":"crossref","first-page":"445","DOI":"10.1109\/TNSE.2018.2835758","article-title":"QTCP: Adaptive congestion control with reinforcement learning","volume":"6","author":"wei","year":"2019","journal-title":"IEEE Transactions on Network Science and Engineering"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1109\/ICCW.2018.8403757"},{"key":"ref138","author":"jay","year":"2018","journal-title":"Internet congestion control via deep reinforcement learning"},{"key":"ref137","doi-asserted-by":"publisher","DOI":"10.1063\/1.5033831"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952524"},{"key":"ref139","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2904358"},{"key":"ref62","first-page":"6","article-title":"Cellular network traffic scheduling with deep reinforcement learning","author":"chinchali","year":"2018","journal-title":"Proc AAAI"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/WCNC.2015.7127653"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2933887"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/TCCN.2019.2957224"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/WCNC.2019.8886332"},{"key":"ref140","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2019.8737649"},{"key":"ref66","article-title":"Smart resource allocation for mobile edge computing: A deep reinforcement learning approach","author":"wang","year":"2019","journal-title":"IEEE Trans Emerg Topics Comput Intell"},{"key":"ref141","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2933761"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2020.2971323"},{"key":"ref142","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2959187"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2020.2969148"},{"key":"ref143","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2904350"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/MNET.2019.1800386"},{"key":"ref144","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2018.2818468"},{"key":"ref2","year":"2017","journal-title":"Global Mobile Data Traffic Forecast Update 2016&#x2013;2021"},{"key":"ref145","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2017.2787979"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2016.2532458"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1109\/MCOM.2017.1600951"},{"key":"ref95","author":"ye","year":"2018","journal-title":"DRAG Deep reinforcement learning based base station activation in heterogeneous networks"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2016.2538231"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1109\/ICC.2017.7997286"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1109\/LCOMM.2018.2844243"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2018.2846694"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2015.2403395"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/JSYST.2019.2891520"},{"key":"ref105","author":"wang","year":"2015","journal-title":"Dueling network architectures for deep reinforcement learning"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/TBC.2020.2977577"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1109\/INFCOMW.2019.8845211"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2019.2937079"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1109\/LCOMM.2020.3001227"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2959185"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1109\/JSEN.2019.2932126"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2933968"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1109\/INFCOMW.2019.8845316"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2017.2762668"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2020.2965927"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1109\/WCNC.2015.7127696"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1109\/LCOMM.2017.2776917"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2017.2727878"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2017.2712560"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2018.2846401"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2019.2904897"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2019.2916583"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/MVT.2019.2919236"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2012.02.119"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2019.2933417"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2020.2988367"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/GLOCOM.2018.8647611"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2011.102"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2020.2965856"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/TCOMM.2019.2913871"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2020.2964534"},{"key":"ref119","doi-asserted-by":"crossref","first-page":"796","DOI":"10.1109\/TMC.2010.28","article-title":"QELAR: A machine-learning-based adaptive routing protocol for energy-efficient and lifetime-extended underwater sensor networks","volume":"9","author":"hu","year":"2010","journal-title":"IEEE Trans Mobile Comput"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/LWC.2019.2904486"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2020.2970550"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1109\/WIOPT.2007.4480049"},{"key":"ref113","first-page":"240","article-title":"Mobilized ad-hoc networks: A reinforcement learning approach","author":"chang","year":"2004","journal-title":"Proc IEEE ICAC"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/GLOBECOM38437.2019.9013751"},{"key":"ref116","doi-asserted-by":"crossref","first-page":"1494","DOI":"10.1109\/TNET.2008.2011644","article-title":"Optimal stochastic policies for distributed data aggregation in wireless sensor networks","volume":"17","author":"ye","year":"2009","journal-title":"IEEE\/ACM Trans Netw"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1109\/CIT.2006.34"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1109\/ICTC46691.2019.8939987"},{"key":"ref89","article-title":"Non-uniform time-step deep Q-network for carrier-sense multiple access in heterogeneous wireless networks","author":"yu","year":"2020","journal-title":"IEEE Trans Mobile Comput"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/PIMRC.2017.8292257"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1109\/VTCSpring.2018.8417683"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2016.2571695"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/ICCW.2019.8756797"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/TCCN.2020.2982895"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2020.2984227"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/WCNCW.2019.8902705"}],"container-title":["IEEE Internet of Things Journal"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6488907\/9302559\/09201129.pdf?arnumber=9201129","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T14:53:38Z","timestamp":1652194418000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9201129\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,1]]},"references-count":165,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/jiot.2020.3025365","relation":{},"ISSN":["2327-4662","2372-2541"],"issn-type":[{"value":"2327-4662","type":"electronic"},{"value":"2372-2541","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,1,1]]}}}