{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T20:57:07Z","timestamp":1776113827750,"version":"3.50.1"},"reference-count":172,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Serv. Comput."],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1109\/tsc.2026.3658306","type":"journal-article","created":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T20:59:51Z","timestamp":1769633991000},"page":"1708-1727","source":"Crossref","is-referenced-by-count":0,"title":["Transfer Reinforcement Learning for Resource Allocation in Mobile Edge Computing Systems: A Survey"],"prefix":"10.1109","volume":"19","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3528-968X","authenticated-orcid":false,"given":"Chenhao","family":"Ni","sequence":"first","affiliation":[{"name":"Zhejiang Key Laboratory of Visual Information Intelligent Processing, College of Computer Science and Technology, Zhejiang University of Technology, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2698-3319","authenticated-orcid":false,"given":"Xiangjie","family":"Kong","sequence":"additional","affiliation":[{"name":"Zhejiang Key Laboratory of Visual Information Intelligent Processing, College of Computer Science and Technology, Zhejiang University of Technology, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1776-9628","authenticated-orcid":false,"given":"Jiaxin","family":"Du","sequence":"additional","affiliation":[{"name":"Zhejiang Key Laboratory of Visual Information Intelligent Processing, College of Computer Science and Technology, Zhejiang University of Technology, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1064-1250","authenticated-orcid":false,"given":"Guojiang","family":"Shen","sequence":"additional","affiliation":[{"name":"Zhejiang Key Laboratory of Visual Information Intelligent Processing, College of Computer Science and Technology, Zhejiang University of Technology, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0681-5393","authenticated-orcid":false,"given":"Wail","family":"Al-Asad","sequence":"additional","affiliation":[{"name":"Zhejiang Key Laboratory of Visual Information Intelligent Processing, College of Computer Science and Technology, Zhejiang University of Technology, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1395-7314","authenticated-orcid":false,"given":"Geyong","family":"Min","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Faculty of Environment, Science and Economy, University of Exeter, Exeter, U.K."}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2017.09.020"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2020.3020854"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2023.3255923"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TSC.2023.3241430"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2022.3155162"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2024.3365293"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.52202\/079017-4331"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2023.03.029"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2020.3042087"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2023.3337051"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.sysarc.2023.103048"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TSC.2024.3478768"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TSC.2022.3174475"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2020.3004555"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.11396"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3292075"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2019.2918951"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3604933"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2024.3416309"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9341260"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2021.3129785"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2022.3208457"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2021.3106401"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3626566"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2021.3119950"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2024.3384132"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/3658671"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i9.16979"},{"key":"ref29","first-page":"9744","article-title":"Structured world belief for reinforcement learning in POMDP","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Singh","year":"2021"},{"key":"ref30","first-page":"19805","article-title":"Model-based reinforcement learning for semi-Markov decision processes with neural odes","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"33","author":"Du","year":"2020"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"ref34","first-page":"1995","article-title":"Dueling network architectures for deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wang","year":"2016"},{"key":"ref35","first-page":"449","article-title":"A distributional perspective on reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Bellemare","year":"2017"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11796"},{"key":"ref37","first-page":"1329","article-title":"Benchmarking deep reinforcement learning for continuous control","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Duan","year":"2016"},{"key":"ref38","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"37","author":"Schulman","year":"2015"},{"key":"ref39","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"},{"key":"ref40","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"12","author":"Sutton","year":"1999"},{"key":"ref41","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"48","author":"Mnih","year":"2016"},{"key":"ref42","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja","year":"2018"},{"key":"ref43","first-page":"387","article-title":"Deterministic policy gradient algorithms","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Silver","year":"2014"},{"key":"ref44","first-page":"1","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representation","author":"Lillicrap","year":"2016"},{"key":"ref45","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fujimoto","year":"2018"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"key":"ref47","first-page":"2252","article-title":"Learning multiagent communication with backpropagation","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"29","author":"Sukhbaatar","year":"2016"},{"key":"ref48","first-page":"6382","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"30","author":"Lowe","year":"2017"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2023.103905"},{"issue":"178","key":"ref50","first-page":"1","article-title":"Monotonic value function factorisation for deep multi-agent reinforcement learning","volume":"21","author":"Rashid","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.65109\/JSRC7365"},{"key":"ref52","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"99","author":"Ng","year":"1999"},{"key":"ref53","first-page":"792","article-title":"Principled methods for advising reinforcement learning agents","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wiewiora","year":"2003"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.65109\/JJTT8551"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v29i1.9628"},{"key":"ref56","first-page":"15931","article-title":"Learning to utilize shaping rewards: A new approach of reward shaping","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"33","author":"Hu","year":"2020"},{"key":"ref57","first-page":"1126","article-title":"Model-agnostic meta-learning for fast adaptation of deep networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Finn","year":"2017"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17337"},{"key":"ref59","first-page":"1","article-title":"Centralized reward agent for knowledge sharing and transfer in multi-task reinforcement learning","volume-title":"Proc. 39th Annu. Conf. Neural Inform. Process. Syst.","author":"Ma","year":"2025"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1145\/1160633.1160762"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1007\/s13748-012-0026-6"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11718"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN55064.2022.9892798"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-016-5547-y"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.65109\/gylq9408"},{"key":"ref66","first-page":"52","article-title":"Learning against sequential opponents in repeated stochastic games","volume-title":"Proc. Multi-Disciplinary Conf. Reinforcement Learn. Decis. Mak.","volume":"25","author":"Hernandez-Leal","year":"2017"},{"key":"ref67","first-page":"962","article-title":"A deep Bayesian policy reuse approach against non-stationary agents","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"31","author":"Zheng","year":"2018"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-020-09480-9"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.65109\/ukrx3491"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/428"},{"key":"ref71","first-page":"27537","article-title":"CUP: Critic-guided policy reuse","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"35","author":"Zhang","year":"2022"},{"key":"ref72","first-page":"1","article-title":"Distilling the knowledge in a neural network","volume-title":"Proc. Deep Learn. Representation Learn. Workshop Conjun. NIPS","author":"Hinton","year":"2014"},{"key":"ref73","first-page":"1","article-title":"Policy distillation","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Rusu","year":"2016"},{"key":"ref74","first-page":"1331","article-title":"Distilling policy distillation","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Czarnecki","year":"2019"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/435"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8967849"},{"key":"ref77","first-page":"1","article-title":"First-order Meta-learned initialization for faster adaptation in deep reinforcement learning","volume-title":"Proc. Neural Inf. Process. Syst.","author":"Biswas","year":"2018"},{"key":"ref78","article-title":"On first-order meta-learning algorithms","author":"Nichol","year":"2018"},{"key":"ref79","first-page":"5307","article-title":"Meta-reinforcement learning of structured exploration strategies","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"31","author":"Gupta","year":"2018"},{"key":"ref80","first-page":"7343","article-title":"Bayesian model-agnostic meta-learning","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"31","author":"Yoon","year":"2018"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636628"},{"key":"ref82","article-title":"$RL^{2}$RL2: Fast reinforcement learning via slow reinforcement learning","author":"Duan","year":"2016"},{"key":"ref83","first-page":"795","article-title":"Long short-term memory and learning-to-learn in networks of spiking neurons","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"31","author":"Bellec","year":"2018"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.52202\/068431-0569"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9981250"},{"key":"ref86","first-page":"1","article-title":"Rapid task-solving in novel environments","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Ritter","year":"2021"},{"key":"ref87","first-page":"24631","article-title":"Prompting decision transformer for few-shot policy generalization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Xu","year":"2022"},{"key":"ref88","first-page":"1478","article-title":"Hypernetworks in meta-reinforcement learning","volume-title":"Proc. Conf. Robot Learn.","author":"Beck","year":"2023"},{"key":"ref89","first-page":"9660","article-title":"A modern self-referential weight matrix that learns to modify itself","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Irie","year":"2022"},{"key":"ref90","first-page":"1040","article-title":"Learning from demonstration","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"9","author":"Schaal","year":"1996"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1145\/1329125.1329407"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1145\/1349822.1349852"},{"key":"ref93","first-page":"428","article-title":"Active imitation learning via reduction to IID active learning","volume-title":"Proc. Conf. Uncertainty Artif. Intell.","author":"Judah","year":"2012"},{"issue":"1","key":"ref94","first-page":"3925","article-title":"Active lmitation learning: Formal and practical reductions to IID learning","volume":"15","author":"Judah","year":"2014","journal-title":"J. Mach. Learn. Res."},{"key":"ref95","first-page":"661","article-title":"Efficient reductions for imitation learning","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Ross","year":"2010"},{"key":"ref96","first-page":"3309","article-title":"Deeply aggrevated: Differentiable imitation learning for sequential prediction","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Sun","year":"2017"},{"key":"ref97","first-page":"6237","article-title":"An imitation learning approach for cache replacement","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Liu","year":"2020"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2021.3053136"},{"key":"ref99","first-page":"180","article-title":"On the sample complexity of stability constrained imitation learning","volume-title":"Proc. Learn. Dyn. Control Conf.","author":"Tu","year":"2022"},{"key":"ref100","article-title":"Reinforcement and imitation learning via interactive no-regret learning","author":"Ross","year":"2014"},{"key":"ref101","first-page":"66","article-title":"Learning by cheating","volume-title":"Proc. Conf. Robot Learn.","author":"Chen","year":"2020"},{"key":"ref102","first-page":"4572","article-title":"Generative adversarial imitation learning","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"29","author":"Ho","year":"2016"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10857"},{"issue":"2","key":"ref104","first-page":"663","article-title":"Algorithms for inverse reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"1","author":"Ng","year":"2000"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561411"},{"key":"ref107","first-page":"1930","article-title":"Model-based inverse reinforcement learning from visual demonstrations","volume-title":"Proc. Conf. Robot Learn.","author":"Das","year":"2021"},{"issue":"9","key":"ref108","first-page":"2125","article-title":"Transfer learning via inter-task mappings for temporal difference learning","volume":"8","author":"Taylor","year":"2007","journal-title":"J. Mach. Learn. Res."},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1007\/11564096_40"},{"key":"ref110","first-page":"1","article-title":"Learning invariant feature spaces to transfer skills with reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Gupta","year":"2017"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143906"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2644615"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.65109\/rsvm8502"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390225"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1109\/TCCN.2022.3204572"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1109\/TNSM.2023.3258692"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2020.3044597"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2023.3336191"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2021.3132136"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1109\/TNSM.2024.3400605"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2024.3431875"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2024.3365557"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1109\/TCOMM.2022.3182034"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2025.3580347"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1109\/TCC.2022.3192560"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2024.3360438"},{"key":"ref127","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2022.3225239"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM53939.2023.10228946"},{"key":"ref129","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2024.3444784"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1109\/TSC.2024.3478841"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1016\/j.comnet.2024.110247"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1109\/TNSM.2022.3198074"},{"key":"ref133","doi-asserted-by":"publisher","DOI":"10.1007\/s10723-023-09730-6"},{"key":"ref134","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2024.3392329"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2022.3151201"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2023.3300927"},{"key":"ref137","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2023.105930"},{"key":"ref138","doi-asserted-by":"publisher","DOI":"10.1109\/GLOBECOM42002.2020.9348212"},{"key":"ref139","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2022.3220981"},{"key":"ref140","doi-asserted-by":"publisher","DOI":"10.1109\/TNSE.2023.3321764"},{"key":"ref141","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2025.3579748"},{"key":"ref142","doi-asserted-by":"publisher","DOI":"10.1109\/TCE.2024.3368156"},{"key":"ref143","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2024.3439696"},{"key":"ref144","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2021.3118446"},{"key":"ref145","doi-asserted-by":"publisher","DOI":"10.1145\/3715146"},{"key":"ref146","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2024.3411169"},{"key":"ref147","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2024.3397796"},{"key":"ref148","doi-asserted-by":"publisher","DOI":"10.1109\/TSC.2024.3376256"},{"key":"ref149","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3232944"},{"key":"ref150","first-page":"1273","article-title":"Communication-efficient learning of deep networks from decentralized data","volume-title":"Proc. Artif. Intell. Statist.","author":"McMahan","year":"2017"},{"key":"ref151","doi-asserted-by":"publisher","DOI":"10.1109\/JAS.2022.106004"},{"key":"ref152","first-page":"27199","article-title":"Addressing negative transfer in diffusion models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Go","year":"2023"},{"key":"ref153","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01155"},{"key":"ref154","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2024.3352910"},{"key":"ref155","doi-asserted-by":"publisher","DOI":"10.1145\/3475991"},{"key":"ref156","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2021.3075439"},{"key":"ref157","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2023.3324962"},{"key":"ref158","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2024.3445177"},{"key":"ref159","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3112718"},{"key":"ref160","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3227873"},{"key":"ref161","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3314762"},{"key":"ref162","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3457538"},{"key":"ref163","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2024.3485697"},{"key":"ref164","first-page":"1","article-title":"Robust transfer of safety-constrained reinforcement learning agents","volume-title":"Proc. 13th Int. Conf. Learn. Representations","author":"Zubia","year":"2025"},{"key":"ref165","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3283523"},{"key":"ref166","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i15.29571"},{"key":"ref167","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3107375"},{"key":"ref168","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA55743.2025.11127396"},{"key":"ref169","doi-asserted-by":"publisher","DOI":"10.3233\/faia230291"},{"key":"ref170","doi-asserted-by":"publisher","DOI":"10.52202\/079017-3123"},{"key":"ref171","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2021.3052606"},{"key":"ref172","article-title":"Transfer reinforcement learning in heterogeneous action spaces using subgoal mapping","author":"Sivakumar","year":"2024"}],"container-title":["IEEE Transactions on Services Computing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/4629386\/11479482\/11364305.pdf?arnumber=11364305","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T19:37:07Z","timestamp":1776109027000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11364305\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3]]},"references-count":172,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tsc.2026.3658306","relation":{},"ISSN":["1939-1374","2372-0204"],"issn-type":[{"value":"1939-1374","type":"electronic"},{"value":"2372-0204","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,3]]}}}