{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,6]],"date-time":"2026-04-06T03:47:36Z","timestamp":1775447256198,"version":"3.50.1"},"reference-count":186,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key Research and Development Plan under","award":["2018YFB1003800"],"award-info":[{"award-number":["2018YFB1003800"]}]},{"name":"Macao Science and Technology Development Fund through Macao Funding Scheme for Key Research and Development Projects","award":["0025\/2019\/AKP"],"award-info":[{"award-number":["0025\/2019\/AKP"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61802450"],"award-info":[{"award-number":["61802450"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003453","name":"Natural Science Foundation of Guangdong","doi-asserted-by":"publisher","award":["2018A030313005"],"award-info":[{"award-number":["2018A030313005"]}],"id":[{"id":"10.13039\/501100003453","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Program for Guangdong Introducing Innovative and Entrepreneurial Teams","award":["2017ZT07X355"],"award-info":[{"award-number":["2017ZT07X355"]}]},{"DOI":"10.13039\/100016691","name":"Guangdong Provincial Pearl River Talents Program","doi-asserted-by":"publisher","award":["2019QN01X130"],"award-info":[{"award-number":["2019QN01X130"]}],"id":[{"id":"10.13039\/100016691","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Commun. Surv. Tutorials"],"published-print":{"date-parts":[[2021]]},"DOI":"10.1109\/comst.2021.3073036","type":"journal-article","created":{"date-parts":[[2021,4,13]],"date-time":"2021-04-13T23:53:59Z","timestamp":1618358039000},"page":"1659-1692","source":"Crossref","is-referenced-by-count":269,"title":["Deep Reinforcement Learning for Internet of Things: A Comprehensive Survey"],"prefix":"10.1109","volume":"23","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4430-7904","authenticated-orcid":false,"given":"Wuhui","family":"Chen","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9322-9060","authenticated-orcid":false,"given":"Xiaoyu","family":"Qiu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0245-333X","authenticated-orcid":false,"given":"Ting","family":"Cai","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6165-4196","authenticated-orcid":false,"given":"Hong-Ning","family":"Dai","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7872-7718","authenticated-orcid":false,"given":"Zibin","family":"Zheng","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8561-5092","authenticated-orcid":false,"given":"Yan","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref170","doi-asserted-by":"publisher","DOI":"10.1145\/2658994"},{"key":"ref172","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2018.2890203"},{"key":"ref171","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2019.2897805"},{"key":"ref174","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2017.2652484"},{"key":"ref173","doi-asserted-by":"publisher","DOI":"10.14569\/IJACSA.2017.080757"},{"key":"ref176","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2019.2904303"},{"key":"ref175","doi-asserted-by":"publisher","DOI":"10.1109\/ICCS.2018.8689228"},{"key":"ref178","first-page":"9927","article-title":"Multi-agent common knowledge reinforcement learning","author":"de witt","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref177","doi-asserted-by":"publisher","DOI":"10.1109\/TCOMM.2018.2878025"},{"key":"ref168","doi-asserted-by":"publisher","DOI":"10.1109\/ICC.2019.8761206"},{"key":"ref169","doi-asserted-by":"publisher","DOI":"10.1145\/1323293.1294267"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2019.2903756"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2014.2363844"},{"key":"ref33","author":"xiao","year":"2018","journal-title":"Reinforcement learning-based energy trading for microgrids"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2019.2921475"},{"key":"ref31","author":"wood","year":"2013","journal-title":"Power Generation Operation and Control"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2020.3002936"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TPWRS.2014.2357079"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2801880"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TPWRS.2018.2823641"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2020.2992117"},{"key":"ref181","first-page":"1437","article-title":"A comprehensive survey on safe reinforcement learning","volume":"16","author":"garc\u00eda","year":"2015","journal-title":"J Mach Learn Res"},{"key":"ref180","author":"nagabandi","year":"2018","journal-title":"Deep online learning via meta-learning Continual adaptation for model-based RL"},{"key":"ref185","author":"fox","year":"2015","journal-title":"Taming the Noise in Reinforcement Learning via Soft Updates"},{"key":"ref184","article-title":"Prioritized experience replay","author":"schaul","year":"2016","journal-title":"Proc 4th Int Conf Learn Represent"},{"key":"ref183","first-page":"1","article-title":"Distilling the knowledge in a neural network","author":"hinton","year":"2015","journal-title":"Proc NIPS Deep Learn Represent Learn Workshop"},{"key":"ref182","author":"tamar","year":"2013","journal-title":"Scaling up robust MDPs by reinforcement learning"},{"key":"ref186","doi-asserted-by":"publisher","DOI":"10.1109\/IAT.2007.94"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.3390\/en11082010"},{"key":"ref27","author":"lowe","year":"2017","journal-title":"Multi-agent actor-critic for mixed cooperative-competitive environments"},{"key":"ref179","first-page":"13566","article-title":"RUDDER: Return decomposition for delayed rewards","author":"arjona-medina","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2019.2899673"},{"key":"ref20","author":"mnih","year":"2016","journal-title":"Asynchronous methods for deep reinforcement learning"},{"key":"ref22","author":"lillicrap","year":"2015","journal-title":"Continuous control with deep reinforcement learning"},{"key":"ref21","first-page":"387","article-title":"Deterministic policy gradient algorithms","author":"silver","year":"2014","journal-title":"Proc ICML"},{"key":"ref24","author":"kulkarni","year":"2016","journal-title":"Hierarchical Deep Reinforcement Learning Integrating Temporal Abstraction and Intrinsic Motivation"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2019.2918071"},{"key":"ref26","author":"yang","year":"2018","journal-title":"Mean field multi-agent reinforcement learning"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2904619"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2020.2977374"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2902371"},{"key":"ref51","first-page":"766","article-title":"Cellular network traffic scheduling with deep reinforcement learning","author":"chinchali","year":"2018","journal-title":"Proc 32nd AAAI Conf Artif Intell"},{"key":"ref154","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2017.2737968"},{"key":"ref153","doi-asserted-by":"publisher","DOI":"10.1109\/MNET.2018.1700442"},{"key":"ref156","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2016.2566339"},{"key":"ref155","doi-asserted-by":"publisher","DOI":"10.1109\/MNET.2018.1700468"},{"key":"ref150","doi-asserted-by":"publisher","DOI":"10.1145\/3234463"},{"key":"ref152","author":"ferdowsi","year":"2017","journal-title":"Deep learning-based dynamic watermarking for secure signal authentication in the Internet of Things"},{"key":"ref151","author":"mismar","year":"2017","journal-title":"Deep Q-learning for self-organizing networks fault management and radio performance improvement"},{"key":"ref146","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2019.2938509"},{"key":"ref147","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2018.00164"},{"key":"ref148","doi-asserted-by":"publisher","DOI":"10.1145\/3131671"},{"key":"ref149","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2924184"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/TCIAIG.2012.2186810"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2016.2607701"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1186\/s13638-017-1018-9"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/IWCMC.2018.8450432"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2019.2921159"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2912996"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-94268-1_40"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/TCCN.2018.2809722"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1016\/j.apenergy.2018.03.072"},{"key":"ref167","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2018.2871394"},{"key":"ref166","doi-asserted-by":"publisher","DOI":"10.1109\/MNET.2019.1800286"},{"key":"ref165","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2019.2924015"},{"key":"ref164","doi-asserted-by":"publisher","DOI":"10.1109\/TCOMM.2020.2995371"},{"key":"ref163","doi-asserted-by":"publisher","DOI":"10.1109\/MNET.2019.1800376"},{"key":"ref162","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683228"},{"key":"ref161","author":"gao","year":"2017","journal-title":"Multi-agent Q-learning aided backpressure routing algorithm for delay reduction"},{"key":"ref160","doi-asserted-by":"publisher","DOI":"10.1504\/IJSNET.2015.067591"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/MCI.2019.2937608"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2019.2928811"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2018.2844341"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2019.2927227"},{"key":"ref159","doi-asserted-by":"publisher","DOI":"10.1109\/MWC.2016.7422412"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2743240"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/JSEN.2019.2925719"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TETCI.2019.2907718"},{"key":"ref157","doi-asserted-by":"publisher","DOI":"10.1016\/j.jnca.2018.09.017"},{"key":"ref158","doi-asserted-by":"publisher","DOI":"10.3390\/s19040970"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2019.2916583"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2017.2753408"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1111\/1467-8640.t01-1-00206"},{"key":"ref48","first-page":"2136","article-title":"Recurrent deep multiagent Q-learning for autonomous agents in future smart grid","author":"yang","year":"2018","journal-title":"Proc 17th Int Conf Auton Agents MultiAgent Syst"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2016.12.008"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICCW.2018.8403783"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1016\/j.apenergy.2018.12.061"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2016.2539300"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2015.2495145"},{"key":"ref127","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-04239-4_4"},{"key":"ref126","author":"schoettler","year":"2019","journal-title":"Deep Reinforcement Learning for Industrial Insertion Tasks with Visual Inputs and Natural Rewards"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1016\/j.procir.2017.03.095"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1007\/s10916-018-1045-z"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1016\/j.jnca.2013.09.008"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2018.2887282"},{"key":"ref129","author":"zheng","year":"2019","journal-title":"Manufacturing dispatching using reinforcement and transfer learning"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2019.2946797"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1109\/INDIN.2017.8104770"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/MCOM.001.1900097"},{"key":"ref76","author":"challita","year":"2018","journal-title":"Cellular-connected UAVs over 5G Deep reinforcement learning for interference management"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1145\/3005745.3005750"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2019.2900035"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2018.2890686"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2019.2897134"},{"key":"ref133","doi-asserted-by":"publisher","DOI":"10.1109\/WoWMoM.2019.8792967"},{"key":"ref134","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2016.2647624"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1109\/JSEN.2016.2517084"},{"key":"ref78","author":"gallicchio","year":"2017","journal-title":"Deep echo state network (DeepESN) A brief survey"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1109\/ICWISE.2018.8633291"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2017.2785414"},{"key":"ref136","author":"stampa","year":"2017","journal-title":"A Deep-Reinforcement Learning Approach for Software-Defined Networking Routing Optimization"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1007\/s11276-017-1632-9"},{"key":"ref138","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2019.2903197"},{"key":"ref137","doi-asserted-by":"publisher","DOI":"10.1109\/CISP-BMEI.2017.8302157"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2885530"},{"key":"ref139","article-title":"Inferring social network structure using mobile phone data","volume":"106","author":"eagle","year":"2007","journal-title":"Proc Nat Acad Sci"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/ICSGCE.2018.8556775"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/TPWRS.2019.2941134"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/MED.2018.8442695"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2018.2878570"},{"key":"ref140","doi-asserted-by":"publisher","DOI":"10.1109\/MCOM.2016.7402272"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1016\/j.jesit.2018.01.001"},{"key":"ref141","author":"zhan","year":"2019","journal-title":"Crowdsensing game with demand uncertainties A deep reinforcement learning approach"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2018.2790704"},{"key":"ref142","doi-asserted-by":"publisher","DOI":"10.2307\/2977633"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2016.2622180"},{"key":"ref143","author":"chen","year":"2018","journal-title":"IntelligentCrowd Mobile crowdsensing via multi-agent reinforcement learning"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2018.2793186"},{"key":"ref144","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2019.2927314"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/MCOM.2018.1701310"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2019.2912673"},{"key":"ref145","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2017.2783439"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2015.2444095"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1109\/LCSYS.2018.2847721"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2019.2901791"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2018.2871020"},{"key":"ref94","author":"zhang","year":"2018","journal-title":"Partially observable reinforcement learning for intelligent transportation systems"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1109\/ICCCBDA.2019.8725683"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2020.2980198"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/ICC.2017.7997286"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2018.2878435"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2014.2300753"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1145\/3132211.3134448"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2904353"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/SEC.2016.17"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2019.2909109"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2018.8569448"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2017.2759728"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2019.2926979"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.3390\/s19061395"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2019.2908171"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2017.2687620"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2017.8317730"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2018.2864373"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2020.2988367"},{"key":"ref11","author":"goodfellow","year":"2016","journal-title":"Deep Learning"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1512\/iumj.1957.6.56038"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"key":"ref14","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1007\/BF00992698","article-title":"Q-learning","author":"watkins","year":"1992","journal-title":"Machine Learning"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2019.2902563"},{"key":"ref16","article-title":"On-line Q-learning using connectionist systems","author":"rummery","year":"1994"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2018.2856854"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2019.2913162"},{"key":"ref17","author":"van hasselt","year":"2015","journal-title":"Deep reinforcement learning with double q-learning"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2018.2789466"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2017.2760281"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1109\/COASE.2019.8843338"},{"key":"ref19","article-title":"Actor-critic algorithms","author":"konda","year":"2002"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2020.2996213"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1109\/GLOCOM.2018.8647178"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.1109\/TETC.2019.2902661"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1109\/VTCFall.2018.8690980"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2912420"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-22971-9_37"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1007\/s10845-016-1237-7"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/CFEC.2019.8733153"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1007\/s10845-013-0864-5"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1007\/s10845-012-0711-0"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2018.8569635"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/MCOM.2017.1700246"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2020.3004394"},{"key":"ref87","author":"dulac-arnold","year":"2015","journal-title":"Deep reinforcement learning in large discrete action spaces"},{"key":"ref88","first-page":"3562","article-title":"Learn what not to learn: Action elimination with deep reinforcement learning","author":"zahavy","year":"2018","journal-title":"Proc Adv Neural Inf Process Syst"}],"container-title":["IEEE Communications Surveys &amp; Tutorials"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9739\/9520094\/09403369.pdf?arnumber=9403369","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T14:52:24Z","timestamp":1652194344000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9403369\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"references-count":186,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/comst.2021.3073036","relation":{},"ISSN":["1553-877X","2373-745X"],"issn-type":[{"value":"1553-877X","type":"electronic"},{"value":"2373-745X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]}}}