{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,17]],"date-time":"2026-04-17T16:00:16Z","timestamp":1776441616222,"version":"3.51.2"},"reference-count":289,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62401161"],"award-info":[{"award-number":["62401161"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62301336"],"award-info":[{"award-number":["62301336"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62322106"],"award-info":[{"award-number":["62322106"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62471151"],"award-info":[{"award-number":["62471151"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100021171","name":"Basic and Applied Basic Research Foundation of Guangdong Province","doi-asserted-by":"publisher","award":["2022A1515110056"],"award-info":[{"award-number":["2022A1515110056"]}],"id":[{"id":"10.13039\/501100021171","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Science and Technology Program of Guangzhou","award":["2025A04J3846"],"award-info":[{"award-number":["2025A04J3846"]}]},{"name":"Longgang District Shenzhen&#x0027;s Ten Action Plan for Supporting Innovation Projects"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Netw. Sci. Eng."],"published-print":{"date-parts":[[2025,11]]},"DOI":"10.1109\/tnse.2025.3572362","type":"journal-article","created":{"date-parts":[[2025,5,22]],"date-time":"2025-05-22T14:09:19Z","timestamp":1747922959000},"page":"4498-4530","source":"Crossref","is-referenced-by-count":5,"title":["Rethinking Knowledge Distillation in Collaborative Machine Learning: Memory, Knowledge, and Their Interactions"],"prefix":"10.1109","volume":"12","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2246-7640","authenticated-orcid":false,"given":"Pengchao","family":"Han","sequence":"first","affiliation":[{"name":"School of Information Engineering, Guangdong University of Technology, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3391-6675","authenticated-orcid":false,"given":"Xi","family":"Huang","sequence":"additional","affiliation":[{"name":"Around Tech Company Ltd., Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1538-249X","authenticated-orcid":false,"given":"Yi","family":"Fang","sequence":"additional","affiliation":[{"name":"School of Information Engineering, Guangdong University of Technology, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2480-8066","authenticated-orcid":false,"given":"Guojun","family":"Han","sequence":"additional","affiliation":[{"name":"School of Information Engineering, Guangdong University of Technology, Guangzhou, China"}]}],"member":"263","reference":[{"key":"ref1","article-title":"GPT-4 technical report","author":"Achiam","year":"2023"},{"key":"ref2","article-title":"BERT: A review of applications in natural language processing and understanding","author":"Koroteev","year":"2021"},{"key":"ref3","first-page":"1273","article-title":"Communication-efficient learning of deep networks from decentralized data","volume-title":"Proc. Artif. Intell. Statist.","author":"McMahan","year":"2017"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-71704-9_65"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/s11633-022-1398-0"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00992"},{"key":"ref7","article-title":"Federated multi-task learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Smith","year":"2017"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3502223.3502233"},{"key":"ref9","article-title":"Distilling the knowledge in a neural network","author":"Hinton","year":"2015"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1038\/s41565-020-0655-z"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1038\/s41928-023-01053-4"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1038\/s44287-023-00002-9"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/356580.356581"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-neuro-061010-113720"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-psych-010419-051101"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.tics.2024.06.006"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/j.visres.2016.09.010"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1126\/science.aaw4325"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.neuron.2020.02.024"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1038\/s41593-023-01382-9"},{"key":"ref21","first-page":"1560","article-title":"Birth of a transformer: A memory viewpoint","volume-title":"Proc. 37th Int. Conf. Neural Inf. Process. Syst.","author":"Bietti","year":"2024"},{"key":"ref22","first-page":"14637","article-title":"Transformer as a hippocampal memory consolidation model based on NMDAR-inspired nonlinearity","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Kim","year":"2023"},{"key":"ref23","article-title":"Knowledge circuits in pretrained transformers","author":"Yao","year":"2024"},{"key":"ref24","article-title":"Linking in-context learning in transformers to human episodic memory","author":"Ji-An","year":"2024"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i17.29946"},{"key":"ref26","first-page":"74530","article-title":"Augmenting language models with long-term memory","volume-title":"Proc. 37th Int. Conf. Neural Inf. Process. Syst.","author":"Wang","year":"2024"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.4208\/jml.240708"},{"key":"ref28","first-page":"27132","article-title":"Towards understanding ensemble distillation in federated learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Park","year":"2023"},{"key":"ref29","article-title":"Towards understanding ensemble, knowledge distillation and self-distillation in deep learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Allen-Zhu","year":"2023"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00497"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3103973"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01503"},{"key":"ref33","article-title":"Knowledge distillation for federated learning: A practical guide","author":"Mora","year":"2022"},{"key":"ref34","article-title":"Knowledge distillation in federated edge learning: A survey","author":"Wu","year":"2024","journal-title":"Authorea Preprints"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.2024.3428519"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2024.3423319"},{"key":"ref37","first-page":"2654","article-title":"Do deep nets really need to be deep?","volume-title":"Proc. 28th Int. Conf. Neural Inf. Process. Syst.","author":"Ba","year":"2014"},{"key":"ref38","first-page":"98","article-title":"Knowledge acquisition from examples via multiple models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Domingos","year":"1997"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/1150402.1150464"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01955"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2024.3431516"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00336"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3321480"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ICPADS56603.2022.00107"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671514"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01453-z"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3055564"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3152732"},{"key":"ref49","article-title":"Data-free knowledge transfer: A survey","author":"Liu","year":"2021"},{"key":"ref50","article-title":"On the opportunities and risks of foundation models","author":"Bommasani","year":"2021"},{"key":"ref51","first-page":"22300","article-title":"Revisiting neural scaling laws in language and vision","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Alabdulmohsin","year":"2022"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.2311878121"},{"key":"ref53","first-page":"28699","article-title":"The quantization model of neural scaling","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Michaud","year":"2024"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/MWC.016.2300600"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/MWC.005.2400019"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/MCOM.001.2300550"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/MNET.2024.3436670"},{"key":"ref58","first-page":"11037","article-title":"What knowledge gets distilled in knowledge distillation?","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Ojha","year":"2023"},{"key":"ref59","first-page":"55565","article-title":"Are emergent abilities of large language models a mirage?","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Schaeffer","year":"2024"},{"key":"ref60","first-page":"10421","article-title":"Specializing smaller language models towards multi-step reasoning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fu","year":"2023"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1145\/3589334.3645671"},{"key":"ref62","first-page":"24872","article-title":"DISTILLM: Towards streamlined distillation for large language models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Ko","year":"2024"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.507"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-emnlp.145"},{"key":"ref65","article-title":"TinyLLM: Learning a small student from multiple large language models","author":"Tian","year":"2024"},{"key":"ref66","first-page":"7741","article-title":"Neural attentive circuits","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Weiss","year":"2022"},{"key":"ref67","article-title":"What do the circuits mean? A knowledge edit view","author":"Ge","year":"2024"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.581"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i3.27965"},{"key":"ref70","article-title":"What does the knowledge neuron thesis have to do with knowledge?","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Niu","year":"2024"},{"key":"ref71","article-title":"Circuit component reuse across tasks in transformer language models","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Merullo","year":"2024"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.522"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1228"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i10.21294"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/BigData59044.2023.10386968"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3287837"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00454"},{"key":"ref78","article-title":"Reconciling utility and membership privacy via knowledge distillation","author":"Shejwalkar","year":"2019"},{"key":"ref79","article-title":"Distilling the knowledge in a neural network","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Hinton","year":"2014"},{"key":"ref80","article-title":"Large scale distributed neural network training through online distillation","author":"Anil","year":"2018"},{"key":"ref81","article-title":"N2n learning: Network to network compression via policy gradient reinforcement learning","author":"Ashok","year":"2017"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-86486-6_6"},{"key":"ref83","article-title":"Collaborative deep reinforcement learning","author":"Lin","year":"2017"},{"key":"ref84","article-title":"Contrastive representation distillation","author":"Tian","year":"2019"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00938"},{"key":"ref86","article-title":"Understanding and improving knowledge distillation","author":"Tang","year":"2020"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01165"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01294"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4757-3264-1"},{"key":"ref90","article-title":"Unifying distillation and privileged information","author":"Lopez-Paz","year":"2015"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5963"},{"key":"ref92","first-page":"7632","article-title":"A statistical perspective on distillation","volume-title":"Proc. 38th Int. Conf. Mach. Learn., Ser. Proc. Mach. Learn. Res.","volume":"139","author":"Menon","year":"2021"},{"key":"ref93","first-page":"2351","article-title":"Ensemble distillation for robust model fusion in federated learning","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Lin","year":"2021"},{"key":"ref94","article-title":"Fitnets: Hints for thin deep nets","author":"Romero","year":"2014"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN55064.2022.9892595"},{"key":"ref96","article-title":"Paying more attention to attention: Improving the performance of convolutional neural networks via attention transfer","author":"Zagoruyko","year":"2016"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00409"},{"key":"ref98","first-page":"65299","article-title":"Knowledge diffusion for distillation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Huang","year":"2023"},{"key":"ref99","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Ho","year":"2020"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00201"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1017\/9781108966559.019"},{"key":"ref102","article-title":"Fedmd: Heterogenous federated learning via model distillation","volume-title":"Proc. NeurIPS Workshop Federated Learn. Data Privacy Confidentiality","author":"Li","year":"2019"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6997"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2021.3119385"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/303"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00152"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1016\/j.compbiomed.2022.105581"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2023.3288692"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3300470"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01563"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01637"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953072"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01930"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2883743"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00776"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01214"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/p19-1441"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611886"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3412005"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2019.8802909"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095014"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01184"},{"key":"ref123","article-title":"Open-vocabulary object detection via vision and language knowledge distillation","author":"Gu","year":"2021"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01467"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2020.2986962"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00644"},{"key":"ref127","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01088"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1037\/h0027277"},{"key":"ref129","article-title":"Communication-efficient on-device machine learning: Federated distillation and augmentation under non-IID private data","volume-title":"Proc. NeurIPS Workshop Mach. Learn. Phone Other Consum. Devices","author":"Jeong","year":"2018"},{"key":"ref130","article-title":"Distilling on-device intelligence at the network edge","author":"Park","year":"2019"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS54860.2022.00094"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00993"},{"key":"ref133","article-title":"Federated class-incremental learning with prompting","author":"Liu","year":"2023"},{"key":"ref134","doi-asserted-by":"publisher","DOI":"10.1109\/LCN60385.2024.10639809"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1561\/2200000083"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2021.3070013"},{"key":"ref137","article-title":"Cronus: Robust and heterogeneous collaborative learning with black-box knowledge transfer","author":"Hongyan","year":"2019"},{"key":"ref138","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3327373"},{"key":"ref139","article-title":"Personalized federated learning for heterogeneous clients with clustered knowledge transfer","author":"Cho","year":"2021"},{"key":"ref140","article-title":"Communication-efficient federated distillation","author":"Sattler","year":"2020"},{"key":"ref141","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2021.01.046"},{"key":"ref142","article-title":"Fedgems: Federated learning of larger server models via selective knowledge fusion","author":"Cheng","year":"2021"},{"key":"ref143","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2024.3431516"},{"key":"ref144","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3129371"},{"key":"ref145","article-title":"Edge bias in federated learning and its solution by buffered knowledge distillation","author":"Lee","year":"2020"},{"key":"ref146","doi-asserted-by":"publisher","DOI":"10.1109\/PIMRC.2019.8904164"},{"key":"ref147","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i8.20819"},{"key":"ref148","article-title":"The best of both worlds: Accurate global and personalized models through federated learning with data-free hyper-knowledge distillation","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Chen","year":"2023"},{"key":"ref149","first-page":"14068","article-title":"Group knowledge transfer: Federated learning of large CNNS at the edge","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"He","year":"2020"},{"key":"ref150","doi-asserted-by":"publisher","DOI":"10.1145\/3639369"},{"key":"ref151","first-page":"19504","article-title":"Adversarial collaborative learning on non-IID features","volume-title":"Proc. 40th Int. Conf. Mach. Learn., Ser. Proc. Mach. Learn. Res.","author":"Li","year":"2023"},{"key":"ref152","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2023.3272801"},{"key":"ref153","first-page":"12878","article-title":"Data-free knowledge distillation for heterogeneous federated learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zhu","year":"2021"},{"key":"ref154","article-title":"Fedbe: Making bayesian model ensemble applicable to federated learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Chen","year":"2021"},{"key":"ref155","article-title":"Class-wise adaptive self distillation for heterogeneous federated learning","volume-title":"Proc. 36th AAAI Conf. Artif. Intell. Virtual","volume":"22","author":"He","year":"2022"},{"key":"ref156","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2022.3189703"},{"key":"ref157","first-page":"11058","article-title":"Multi-level branched regularization for federated learning","volume-title":"Proc. 39th Int. Conf. Mach. Learn. Ser. Proc. Mach. Learn. Res.","volume":"162","author":"Kim","year":"2022"},{"key":"ref158","first-page":"3622","article-title":"Quped: Quantized personalization via distillation with applications to federated learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Ozkara","year":"2021"},{"key":"ref159","article-title":"Data-free one-shot federated learning under very high statistical heterogeneity","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Heinbaugh","year":"2023"},{"key":"ref160","first-page":"21414","article-title":"Dense: Data-free one-shot federated learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Zhang","year":"2022"},{"key":"ref161","article-title":"Training shallow and thin networks for acceleration via knowledge distillation with conditional adversarial networks","volume-title":"Proc. Int. Conf. Learn. Representations Workshop","author":"Xu","year":"2018"},{"key":"ref162","article-title":"Learning from a lightweight teacher for efficient knowledge distillation","author":"Liu","year":"2020"},{"key":"ref163","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2022.3216566"},{"key":"ref164","first-page":"783","article-title":"KDGAN: Knowledge distillation with generative adversarial networks","volume-title":"Proc. 32nd Int. Conf. Neural Inf. Process. Syst.","author":"Wang","year":"2018"},{"key":"ref165","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11667"},{"key":"ref166","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2773081"},{"key":"ref167","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01219-9_9"},{"key":"ref168","doi-asserted-by":"publisher","DOI":"10.1162\/neco_a_01232"},{"key":"ref169","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00528"},{"key":"ref170","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.587"},{"key":"ref171","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01258-8_15"},{"key":"ref172","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11769"},{"key":"ref173","first-page":"4528","article-title":"Progress & compress: A scalable framework for continual learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Schwarz","year":"2018"},{"key":"ref174","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1611835114"},{"key":"ref175","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01252-6_33"},{"key":"ref176","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00164"},{"key":"ref177","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00092"},{"key":"ref178","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-022-10294-2"},{"key":"ref179","article-title":"Local-global knowledge distillation in heterogeneous federated learning with non-IID data","author":"Yao","year":"2021"},{"key":"ref180","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2022.3225185"},{"key":"ref181","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00990"},{"key":"ref182","article-title":"Overcoming forgetting in federated learning on non-IID data","volume-title":"Proc. NeurIPS Workshop","author":"Shoham","year":"2019"},{"key":"ref183","article-title":"Preservation of the global knowledge by not-true self knowledge distillation in federated learning","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Lee","year":"2021"},{"key":"ref184","article-title":"Acceleration of federated learning with alleviated forgetting in local training","author":"Xu","year":"2022"},{"key":"ref185","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2024.103156"},{"key":"ref186","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2023.3301972"},{"key":"ref187","article-title":"Flashback: Understanding and mitigating forgetting in federated learning","author":"Aljahdali","year":"2024"},{"key":"ref188","article-title":"DFRD: Data-free robustness distillation for heterogeneous federated learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Wang","year":"2024"},{"key":"ref189","doi-asserted-by":"publisher","DOI":"10.3390\/s23146518"},{"key":"ref190","article-title":"Preservation of the global knowledge by not-true self knowledge distillation in federated learning","author":"Lee","year":"2021"},{"key":"ref191","article-title":"Federated reconnaissance: Efficient, distributed, class-incremental learning","author":"Hendryx","year":"2021"},{"key":"ref192","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2023.09.019"},{"key":"ref193","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2023\/443"},{"key":"ref194","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3334213"},{"key":"ref195","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00441"},{"key":"ref196","first-page":"66408","article-title":"A data-free approach to mitigate catastrophic forgetting in federated class incremental learning for vision tasks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Babakniya","year":"2024"},{"key":"ref197","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01218"},{"key":"ref198","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2023.02.015"},{"key":"ref199","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.111491"},{"key":"ref200","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2024.127935"},{"key":"ref201","article-title":"DepthFL: Depthwise federated learning for heterogeneous clients","volume-title":"Proc. 11th Int. Conf. Learn. Representations","author":"Kim","year":"2023"},{"key":"ref202","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02350"},{"key":"ref203","doi-asserted-by":"publisher","DOI":"10.1109\/TNSE.2023.3348461"},{"key":"ref204","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i11.21446"},{"key":"ref205","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-022-29763-x"},{"key":"ref206","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2023.3324666"},{"key":"ref207","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/205"},{"key":"ref208","article-title":"A survey of what to share in federated learning: Perspectives on model utility, privacy leakage, and communication efficiency","author":"Shao","year":"2023"},{"key":"ref209","doi-asserted-by":"publisher","DOI":"10.1145\/2810103.2813677"},{"key":"ref210","doi-asserted-by":"publisher","DOI":"10.1145\/3359789.3359824"},{"key":"ref211","first-page":"9706","article-title":"Variational model inversion attacks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Wang","year":"2021"},{"key":"ref212","first-page":"2742","article-title":"Collective model fusion for multiple black-box experts","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"97","author":"Hoang","year":"2019"},{"key":"ref213","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5746"},{"key":"ref214","first-page":"14774","article-title":"Deep leakage from gradients","volume-title":"Proc. 33rd Int. Conf. Neural Inf. Process. Syst.","author":"Zhu","year":"2019"},{"key":"ref215","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/216"},{"key":"ref216","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-023-38794-x"},{"key":"ref217","doi-asserted-by":"publisher","DOI":"10.1109\/DSN58291.2024.00029"},{"key":"ref218","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-662-53641-4_24"},{"key":"ref219","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2021.3126052"},{"key":"ref220","article-title":"Asynchronous federated learning with differential privacy for edge intelligence","author":"Li","year":"2019"},{"key":"ref221","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM48880.2022.9796833"},{"key":"ref222","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2020.2988575"},{"key":"ref223","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413764"},{"key":"ref224","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2021.3056991"},{"key":"ref225","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33011190"},{"key":"ref226","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN54540.2023.10191850"},{"key":"ref227","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-28996-5_8"},{"key":"ref228","article-title":"An introduction to domain adaptation and transfer learning","author":"Kouw","year":"2018"},{"key":"ref229","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00922"},{"key":"ref230","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00172"},{"key":"ref231","first-page":"26103","article-title":"A mathematical framework for quantifying transferability in multi-source transfer learning","volume":"34","author":"Tong","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref232","first-page":"3274","article-title":"Kd3A: Unsupervised multi-source decentralized domain adaptation via knowledge distillation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Feng","year":"2021"},{"key":"ref233","doi-asserted-by":"publisher","DOI":"10.1109\/ISBI56570.2024.10635548"},{"key":"ref234","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00416"},{"key":"ref235","article-title":"Federated adversarial domain adaptation","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Peng","year":"2019"},{"key":"ref236","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2023.3236008"},{"issue":"104096","key":"ref237","article-title":"Knowledge distillation methods for efficient unsupervised adaptation across multiple domains","volume":"108","author":"Belal","year":"2021","journal-title":"Image Vis. Comput."},{"key":"ref238","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00138"},{"key":"ref239","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00809"},{"key":"ref240","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00894"},{"key":"ref241","doi-asserted-by":"publisher","DOI":"10.3390\/s23156986"},{"key":"ref242","article-title":"Multimodal federated learning in healthcare: A review","author":"Thrasher","year":"2023"},{"key":"ref243","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403234"},{"key":"ref244","article-title":"Exploring modality collaboration with modality-agnostic transformers in multi-modal federated learning","author":"Sun","year":"2023"},{"key":"ref245","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i10.29007"},{"key":"ref246","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3613837"},{"key":"ref247","doi-asserted-by":"publisher","DOI":"10.1145\/3650040"},{"key":"ref248","article-title":"Multimodal federated learning via contrastive representation ensemble","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Yu","year":"2023"},{"key":"ref249","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00089"},{"key":"ref250","doi-asserted-by":"publisher","DOI":"10.1049\/cvi2.70013"},{"key":"ref251","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3367329"},{"key":"ref252","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.2023.3339091"},{"key":"ref253","article-title":"A survey of incremental transfer learning: Combining peer-to-peer federated learning and domain incremental learning for multicenter collaboration","author":"Huang","year":"2023"},{"key":"ref254","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2024.3363240"},{"key":"ref255","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/137"},{"key":"ref256","article-title":"Accurate forgetting for heterogeneous federated continual learning","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Wuerkaixi","year":"2023"},{"key":"ref257","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN55064.2022.9892034"},{"key":"ref258","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2023.3289444"},{"key":"ref259","first-page":"15434","article-title":"Federated multi-task learning under a mixture of distributions","volume":"34","author":"Marfoq","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref260","doi-asserted-by":"publisher","DOI":"10.1561\/116.00000065"},{"key":"ref261","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3223018"},{"key":"ref262","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i4.25570"},{"key":"ref263","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i8.28680"},{"key":"ref264","doi-asserted-by":"publisher","DOI":"10.1145\/3488560.3498437"},{"key":"ref265","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2024.3360429"},{"key":"ref266","article-title":"Low-dimensional federated knowledge graph embedding via knowledge distillation","author":"Zhang","year":"2024"},{"key":"ref267","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583305"},{"key":"ref268","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671717"},{"key":"ref269","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i8.28722"},{"key":"ref270","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3449898"},{"key":"ref271","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN60899.2024.10651323"},{"key":"ref272","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM53939.2023.10228954"},{"issue":"355","key":"ref273","first-page":"1","article-title":"Beyond spectral gap: The role of the topology in decentralized learning","volume":"24","author":"Vogels","year":"2023","journal-title":"J. Mach. Learn. Res."},{"key":"ref274","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2021.3118400"},{"key":"ref275","first-page":"5381","article-title":"A unified theory of decentralized SGD with changing topology and local updates","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Koloskova","year":"2020"},{"key":"ref276","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2021.3118424"},{"key":"ref277","first-page":"22593","article-title":"Distributed distillation for on-device learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Bistritz","year":"2020"},{"key":"ref278","article-title":"Homogenizing non-iid datasets via in-distribution knowledge distillation for decentralized learning","author":"Ravikumar","year":"2023"},{"key":"ref279","article-title":"DFML: Decentralized federated mutual learning","author":"Khalil","year":"2024"},{"key":"ref280","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR48806.2021.9412356"},{"key":"ref281","doi-asserted-by":"publisher","DOI":"10.1109\/ICC45041.2023.10279714"},{"key":"ref282","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS47774.2020.00026"},{"key":"ref283","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM52122.2024.10621105"},{"key":"ref284","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS60910.2024.00083"},{"key":"ref285","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i8.20825"},{"key":"ref286","doi-asserted-by":"publisher","DOI":"10.1145\/3636534.3649391"},{"key":"ref287","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2021.3118354"},{"key":"ref288","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-009-5152-4"},{"key":"ref289","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/7503.003.0022"}],"container-title":["IEEE Transactions on Network Science and Engineering"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6488902\/11217296\/11010091.pdf?arnumber=11010091","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,29]],"date-time":"2025-10-29T17:40:38Z","timestamp":1761759638000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11010091\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11]]},"references-count":289,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/tnse.2025.3572362","relation":{},"ISSN":["2327-4697","2334-329X"],"issn-type":[{"value":"2327-4697","type":"electronic"},{"value":"2334-329X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11]]}}}