{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,16]],"date-time":"2026-03-16T21:24:32Z","timestamp":1773696272195,"version":"3.50.1"},"reference-count":58,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62572280"],"award-info":[{"award-number":["62572280"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["U24A20244"],"award-info":[{"award-number":["U24A20244"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["U23A20302"],"award-info":[{"award-number":["U23A20302"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Parallel Distrib. Syst."],"published-print":{"date-parts":[[2026,5]]},"DOI":"10.1109\/tpds.2026.3666309","type":"journal-article","created":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T21:00:06Z","timestamp":1771534806000},"page":"1048-1061","source":"Crossref","is-referenced-by-count":0,"title":["Fed-Grow: Federating to Grow Transformers for Resource-Constrained Users Without Model Sharing"],"prefix":"10.1109","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1690-5678","authenticated-orcid":false,"given":"Shikun","family":"Shen","sequence":"first","affiliation":[{"name":"School of Computer Science and Technology, Shandong University, Qingdao, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4579-5380","authenticated-orcid":false,"given":"Yifei","family":"Zou","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Shandong University, Qingdao, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3976-0870","authenticated-orcid":false,"given":"Yuan","family":"Yuan","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Shandong University, Qingdao, China"}]},{"given":"Hanlin","family":"Gu","sequence":"additional","affiliation":[{"name":"WeBank AI Lab, WeBank, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5303-0700","authenticated-orcid":false,"given":"Peng","family":"Li","sequence":"additional","affiliation":[{"name":"School of Cyber Science and Engineering, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5912-4647","authenticated-orcid":false,"given":"Xiuzhen","family":"Cheng","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Shandong University, Qingdao, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1989-1750","authenticated-orcid":false,"given":"Falko","family":"Dressler","sequence":"additional","affiliation":[{"name":"School of Electrical Engineering and Computer Science, Technical University, Berlin, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6835-5981","authenticated-orcid":false,"given":"Dongxiao","family":"Yu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Shandong University, Qingdao, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1810.04805"},{"issue":"8","key":"ref3","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI Blog"},{"key":"ref4","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Brown","year":"2020"},{"key":"ref5","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Dosovitskiy","year":"2021"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref7","first-page":"10347","article-title":"Training data-efficient image transformers & distillation through attention","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Touvron","year":"2021"},{"key":"ref8","article-title":"GPT-4 technical report","author":"Achiam","year":"2023"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2024.3385639"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2023\/764"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2024.3476390"},{"key":"ref12","article-title":"Megatron-LM: Training multi-billion parameter language models using model parallelism","author":"Shoeybi","year":"2019"},{"key":"ref13","article-title":"Large batch optimization for deep learning: Training BERT in 76 minutes","volume-title":"Proc. Int. Conf. Learn. Representations","author":"You","year":"2020"},{"key":"ref14","first-page":"14011","article-title":"Accelerating training of transformer-based language models with progressive layer dropping","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Zhang","year":"2020"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.262"},{"key":"ref16","first-page":"2337","article-title":"Efficient training of BERT by progressively stacking","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Gong","year":"2019"},{"key":"ref17","article-title":"Progressively stacking 2.0: A multi-stage layerwise training method for BERT training speedup","author":"Yang","year":"2020"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.151"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.288"},{"key":"ref20","article-title":"Learning to grow pretrained models for efficient transformer training","volume-title":"Proc. 11th Int. Conf. Learn. Representations","author":"Wang","year":"2023"},{"key":"ref21","article-title":"LEMON: Lossless model expansion","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Wang","year":"2024"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01573"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.52202\/075280-0144"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2020.10.007"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3724113"},{"key":"ref26","article-title":"LoRA: Low-rank adaptation of large language models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hu","year":"2022"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/SP.2017.41"},{"key":"ref29","first-page":"19974","article-title":"Chasing sparsity in vision transformers: An end-to-end exploration","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Chen","year":"2021"},{"key":"ref30","article-title":"Net2Net: Accelerating learning via knowledge transfer","author":"Chen","year":"2015"},{"key":"ref31","article-title":"FLM-101B: An open LLM and how to train it with ${\\$}$$100k budget","author":"Li","year":"2023"},{"key":"ref32","first-page":"1273","article-title":"Communication-efficient learning of deep networks from decentralized data","volume-title":"Proc. Artif. Intell. Statist.","author":"McMahan","year":"2017"},{"key":"ref33","first-page":"429","article-title":"Federated optimization in heterogeneous networks","volume-title":"Proc. Mach. Learn. Syst.","volume":"2","author":"Li","year":"2020"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i8.20825"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM52122.2024.10621164"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-022-29763-x"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00061"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICPADS63350.2024.00040"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02250"},{"key":"ref40","article-title":"FedCLIP: Fast generalization and personalization for CLIP in federated learning","volume-title":"Proc. ICLR 2023 Workshop Trustworthy Reliable Large-Scale Mach. Learn. Models","author":"Lu","year":"2023"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.52202\/079017-1245"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ijcnn64981.2025.11228793"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.52202\/079017-2451"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.3390\/electronics13091738"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/WorldS450073.2020.9210355"},{"issue":"2","key":"ref46","article-title":"FedQMIX: Communication-efficient federated learning via multi-agent reinforcement learning","volume-title":"High-Confidence Comput.","volume":"4","author":"Cao","year":"2024"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-377-6.50048-7"},{"key":"ref48","article-title":"Character-level convolutional networks for text classification","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"28","author":"Zhang","year":"2015"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-emnlp.215"},{"key":"ref50","article-title":"Learning multiple layers of features from tiny images","author":"Krizhevsky","year":"2009"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/ICVGIP.2008.47"},{"key":"ref52","article-title":"Flower: A friendly federated learning research framework","author":"Beutel","year":"2020"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-naacl.13"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE53745.2022.00077"},{"key":"ref55","first-page":"17716","article-title":"Federated learning with partial model personalization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Pillutla","year":"2022"},{"issue":"3","key":"ref56","article-title":"DEFEAT: A decentralized federated learning against gradient attacks","volume-title":"High-Confidence Comput.","volume":"3","author":"Lu","year":"2023"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1145\/2976749.2978318"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/TDSC.2021.3093711"}],"container-title":["IEEE Transactions on Parallel and Distributed Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/71\/11434582\/11399916.pdf?arnumber=11399916","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,16]],"date-time":"2026-03-16T20:17:01Z","timestamp":1773692221000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11399916\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5]]},"references-count":58,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tpds.2026.3666309","relation":{},"ISSN":["1045-9219","1558-2183","2161-9883"],"issn-type":[{"value":"1045-9219","type":"print"},{"value":"1558-2183","type":"electronic"},{"value":"2161-9883","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,5]]}}}