{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T16:28:08Z","timestamp":1776184088708,"version":"3.50.1"},"reference-count":53,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Wireless Commun."],"published-print":{"date-parts":[[2021,6]]},"DOI":"10.1109\/twc.2021.3051163","type":"journal-article","created":{"date-parts":[[2021,1,20]],"date-time":"2021-01-20T20:57:04Z","timestamp":1611176224000},"page":"3507-3523","source":"Crossref","is-referenced-by-count":151,"title":["Resource Management in Wireless Networks via Multi-Agent Deep Reinforcement Learning"],"prefix":"10.1109","volume":"20","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4891-6726","authenticated-orcid":false,"given":"Navid","family":"Naderializadeh","sequence":"first","affiliation":[]},{"given":"Jaroslaw J.","family":"Sydir","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4040-6358","authenticated-orcid":false,"given":"Meryem","family":"Simsek","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6440-1450","authenticated-orcid":false,"given":"Hosein","family":"Nikopour","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"crossref","first-page":"1503","DOI":"10.1109\/TCOMM.2002.802562","article-title":"the simulation of independent rayleigh faders","volume":"50","author":"li","year":"2002","journal-title":"IEEE Transactions on Communications"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TCOMM.2015.2413412"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/IWSLS.2016.7801582"},{"key":"ref32","first-page":"4295","article-title":"Qmix: Monotonic value function factorisation for deep multi-agent reinforcement learning","author":"rashid","year":"2018","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref31","first-page":"2085","article-title":"Value-decomposition networks for cooperative multi-agent learning based on team reward","author":"sunehag","year":"2018","journal-title":"Proc 17th Int Conf Auto Agents MultiAgent Syst"},{"key":"ref30","first-page":"1","article-title":"Counterfactual multi-agent policy gradients","author":"foerster","year":"2018","journal-title":"Proc 32nd AAAI Conf Artif Intell"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/MCOM.2016.7588290"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-35289-8_3"},{"key":"ref35","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"arXiv 1801 01290"},{"key":"ref34","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"arXiv 1509 02971"},{"key":"ref28","first-page":"1","article-title":"Learning to play guess who? and inventing a grounded language as a consequence","author":"jorge","year":"2016","journal-title":"Proc NIPS Workshop Deep Reinforcement Learn"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2013.13.120485"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0172395"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/MNET.001.1900287"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/SPAWC48557.2020.9154250"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2933962"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICC.2017.7997440"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2959185"},{"key":"ref24","article-title":"A deep Q-Learning method for downlink power allocation in multi-cell networks","author":"ishfaq ahmed","year":"2019","journal-title":"arXiv 1904 13032"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICC.2019.8761431"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2937438"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2933973"},{"key":"ref50","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref51","first-page":"1","article-title":"The reactor: A fast and sample-efficient actor-critic agent for reinforcement learning","author":"gruslys","year":"2018","journal-title":"Proc 7th Int Conf Learn Represent (ICLR)"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460528"},{"key":"ref52","first-page":"1","article-title":"Recurrent experience replay in distributed reinforcement learning","author":"kapturowski","year":"2019","journal-title":"Int Conf Learn Represent (ICLR)"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2019.2926625"},{"key":"ref11","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2015.2408342"},{"key":"ref12","article-title":"Move evaluation in go using deep convolutional neural networks","author":"maddison","year":"2014","journal-title":"arXiv 1412 6564"},{"key":"ref13","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of go with deep neural networks and tree search","volume":"529","author":"silver","year":"2016","journal-title":"Nature"},{"key":"ref14","article-title":"Dota 2 with large scale deep reinforcement learning","year":"2019","journal-title":"arXiv 1912 06680"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/VTCFall.2017.8288417"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2013.060513.120959"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2020.3024629"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2831240"},{"key":"ref19","article-title":"Deep reinforcement learning for distributed uncoordinated cognitive radios resource allocation","author":"tondwalkar","year":"2019","journal-title":"arXiv 1911 03366"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2011.2147784"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2008.070227"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2014.2328102"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/MWC.2014.6845058"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ISIT.2017.8006944"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ACSSC.2015.7421354"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref9","article-title":"Machine learning for wireless networks with artificial intelligence: A tutorial on neural networks","author":"chen","year":"2017","journal-title":"arXiv 1710 02913"},{"key":"ref46","article-title":"Quantifying generalization in reinforcement learning","author":"cobbe","year":"2018","journal-title":"arXiv 1812 02341"},{"key":"ref45","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref48","article-title":"ZPD teaching strategies for deep reinforcement learning from demonstrations","author":"seita","year":"2019","journal-title":"arXiv 1910 12154"},{"key":"ref47","article-title":"On the power of curriculum learning in training deep networks","author":"hacohen","year":"2019","journal-title":"arXiv 1904 03626"},{"key":"ref42","first-page":"2681","article-title":"Deep decentralized multi-task multi-agent reinforcement learning under partial observability","volume":"70","author":"omidshafiei","year":"2017","journal-title":"Proc 34th Int Conf Mach Learn"},{"key":"ref41","first-page":"2094","article-title":"Deep reinforcement learning with double Q-learning","author":"van hasselt","year":"2016","journal-title":"Proc 13eth AAAI Conf Artif Intell"},{"key":"ref44","author":"dhariwal","year":"2017","journal-title":"OpenAI Baselines"},{"key":"ref43","article-title":"Learning to reinforcement learn","author":"wang","year":"2016","journal-title":"arXiv preprint arXiv 1611 05763"}],"container-title":["IEEE Transactions on Wireless Communications"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7693\/9450049\/09329087.pdf?arnumber=9329087","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,10,26]],"date-time":"2021-10-26T20:24:45Z","timestamp":1635279885000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9329087\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6]]},"references-count":53,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/twc.2021.3051163","relation":{},"ISSN":["1536-1276","1558-2248"],"issn-type":[{"value":"1536-1276","type":"print"},{"value":"1558-2248","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,6]]}}}