{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T21:52:17Z","timestamp":1775253137856,"version":"3.50.1"},"reference-count":38,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100021171","name":"Guangdong Basic and Applied Basic Research Foundation","doi-asserted-by":"publisher","award":["2022A1515010374"],"award-info":[{"award-number":["2022A1515010374"]}],"id":[{"id":"10.13039\/501100021171","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Hong Kong RGC Theme-based Research Scheme","award":["T43-513\/23-N"],"award-info":[{"award-number":["T43-513\/23-N"]}]},{"name":"HK RGC Collaborative Research Fund","award":["C5018-20G"],"award-info":[{"award-number":["C5018-20G"]}]},{"name":"HK RGC General Research Fund","award":["PolyU-15204921"],"award-info":[{"award-number":["PolyU-15204921"]}]},{"name":"HK RGC General Research Fund","award":["PolyU-15220922"],"award-info":[{"award-number":["PolyU-15220922"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Comput."],"published-print":{"date-parts":[[2024,6]]},"DOI":"10.1109\/tc.2024.3377912","type":"journal-article","created":{"date-parts":[[2024,3,19]],"date-time":"2024-03-19T18:15:22Z","timestamp":1710872122000},"page":"1603-1615","source":"Crossref","is-referenced-by-count":32,"title":["Decentralized Task Offloading in Edge Computing: An Offline-to-Online Reinforcement Learning Approach"],"prefix":"10.1109","volume":"73","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-8830-1650","authenticated-orcid":false,"given":"Hongcai","family":"Lin","sequence":"first","affiliation":[{"name":"School of Software Engineering, South China University of Technology, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8732-3675","authenticated-orcid":false,"given":"Lei","family":"Yang","sequence":"additional","affiliation":[{"name":"School of Software Engineering, South China University of Technology, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5176-2692","authenticated-orcid":false,"given":"Hao","family":"Guo","sequence":"additional","affiliation":[{"name":"School of Software Engineering, South China University of Technology, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2725-2529","authenticated-orcid":false,"given":"Jiannong","family":"Cao","sequence":"additional","affiliation":[{"name":"Department of Computing, Hong Kong Polytechnic University, Hong Kong, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TNSE.2021.3076795"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2020.3023936"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2020.3042599"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2018.2880874"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2018.2841758"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2020.3019492"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2019.2934103"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2017.8057196"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2018.2863301"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/MSN.2018.000-1"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2020.2970550"},{"key":"ref12","article-title":"Multi-agent reinforcement learning for networked system control","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Chu","year":"2020"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2020.3046737"},{"key":"ref14","first-page":"2052","article-title":"Off-policy deep reinforcement learning without exploration","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","volume":"97","author":"Fujimoto","year":"2019"},{"key":"ref15","first-page":"1179","article-title":"Conservative Q-learning for offline reinforcement learning","volume-title":"Adv. Neural Inf. Proc. Syst.","volume":"33","author":"Kumar","year":"2020"},{"key":"ref16","first-page":"1702","article-title":"Offline-to-online reinforcement learning via balanced replay and pessimistic Q-ensemble","volume-title":"Proc. Conf. Robot Learn.","volume":"164","author":"Lee","year":"2021"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2020.3042224"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2020.3019492"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2021.3067919"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.pmcj.2020.101221"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TCC.2020.2968443"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/JSYST.2022.3188997"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2021.3091508"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2020.3009540"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2020.3036871"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ISIT.2016.7541539"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2019.2926979"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2020.2982292"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOMWKSHPS50562.2020.9163048"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2021.3115807"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM42981.2021.9488707"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/SSCI47803.2020.9308468"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref34","first-page":"1856","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","volume":"80","author":"Haarnoja","year":"2018"},{"key":"ref35","article-title":"Soft actor-critic for discrete action settings","author":"Christodoulou","year":"2019"},{"key":"ref36","article-title":"High-dimensional continuous control using generalized advantage estimation","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Schulman","year":"2016"},{"key":"ref37","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/71.963420"}],"container-title":["IEEE Transactions on Computers"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/12\/10527219\/10473221.pdf?arnumber=10473221","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,10]],"date-time":"2024-05-10T05:17:31Z","timestamp":1715318251000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10473221\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6]]},"references-count":38,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/tc.2024.3377912","relation":{},"ISSN":["0018-9340","1557-9956","2326-3814"],"issn-type":[{"value":"0018-9340","type":"print"},{"value":"1557-9956","type":"electronic"},{"value":"2326-3814","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,6]]}}}