{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T15:32:13Z","timestamp":1774539133607,"version":"3.50.1"},"reference-count":41,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"9","license":[{"start":{"date-parts":[[2020,9,1]],"date-time":"2020-09-01T00:00:00Z","timestamp":1598918400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,9,1]],"date-time":"2020-09-01T00:00:00Z","timestamp":1598918400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,9,1]],"date-time":"2020-09-01T00:00:00Z","timestamp":1598918400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61501202"],"award-info":[{"award-number":["61501202"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Internet Things J."],"published-print":{"date-parts":[[2020,9]]},"DOI":"10.1109\/jiot.2020.3002936","type":"journal-article","created":{"date-parts":[[2020,6,16]],"date-time":"2020-06-16T21:40:01Z","timestamp":1592343601000},"page":"9091-9102","source":"Crossref","is-referenced-by-count":27,"title":["Throughput Maximization by Deep Reinforcement Learning With Energy Cooperation for Renewable Ultradense IoT Networks"],"prefix":"10.1109","volume":"7","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8397-9546","authenticated-orcid":false,"given":"Ya","family":"Li","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3363-5139","authenticated-orcid":false,"given":"Xiaohui","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6632-0929","authenticated-orcid":false,"given":"Hui","family":"Liang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2019.2911544"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2019.09.016"},{"key":"ref33","author":"boyd","year":"2014","journal-title":"Convex optimization"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2014.2328154"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2016.2520249"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2015.2507781"},{"key":"ref37","author":"peng","year":"2017","journal-title":"Multiagent bidirectionally-coordinated nets for learning to play StarCraft combat games"},{"key":"ref36","first-page":"1","article-title":"Coordinated multi-robot exploration under communication constraints using decentralized Markov decision processes","author":"matignon","year":"2012","journal-title":"Proc 26th AAAI Conf Artif Intell"},{"key":"ref35","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref34","author":"lillicrap","year":"2015","journal-title":"Continuous control with deep reinforcement learning"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/MCOM.2018.1700490"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/NRSC.2012.6208550"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/WCL.2013.081913.130391"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/MWC.2014.6757901"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2016.2520244"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2016.2633723"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2017.2720898"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TCOMM.2014.2357430"},{"key":"ref17","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ISSNIP.2007.4496871"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2902371"},{"key":"ref28","first-page":"6379","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","author":"lowe","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2015.2439636"},{"key":"ref27","first-page":"1","article-title":"Deep deterministic policy gradient based dynamic power control for self-powered ultradense networks","author":"li","year":"2018","journal-title":"Proc IEEE GLOBECOM Workshops (GC Wkshps)"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2018.2875535"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2019.2907871"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2015.2481722"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2016.2571730"},{"key":"ref8","first-page":"1","article-title":"Mobile and wireless communications system for 2020 and beyond (5G)","author":"osseiran","year":"2014","journal-title":"Proc ITU-R Vis Workshop"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2018.2838584"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2014.2312291"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/MCOM.2015.7010535"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2018.2812803"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2017.2707140"},{"key":"ref22","doi-asserted-by":"crossref","first-page":"2019","DOI":"10.3390\/s18072119","article-title":"A kind of joint routing and resource allocation scheme based on prioritized memories-deep Q network for cognitive radio ad hoc networks","volume":"18","author":"du","year":"2018","journal-title":"SENSORS"},{"key":"ref21","author":"mnih","year":"2013","journal-title":"Playing atari with deep reinforcement learning"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2018.2872440"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1049\/iet-com.2010.0985"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2831240"},{"key":"ref26","first-page":"387","article-title":"Deterministic policy gradient algorithms","author":"silver","year":"2014","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2017.2712560"}],"container-title":["IEEE Internet of Things Journal"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6488907\/9197748\/09119115.pdf?arnumber=9119115","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,4,27]],"date-time":"2022-04-27T17:26:57Z","timestamp":1651080417000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9119115\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,9]]},"references-count":41,"journal-issue":{"issue":"9"},"URL":"https:\/\/doi.org\/10.1109\/jiot.2020.3002936","relation":{},"ISSN":["2327-4662","2372-2541"],"issn-type":[{"value":"2327-4662","type":"electronic"},{"value":"2372-2541","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,9]]}}}