{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T21:14:40Z","timestamp":1773090880369,"version":"3.50.1"},"reference-count":32,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"8","license":[{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62272261"],"award-info":[{"award-number":["62272261"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Beijing Municipal Science and Technology Project","award":["Z231100010323005"],"award-info":[{"award-number":["Z231100010323005"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Comput."],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1109\/tc.2025.3575905","type":"journal-article","created":{"date-parts":[[2025,6,3]],"date-time":"2025-06-03T13:50:11Z","timestamp":1748958611000},"page":"2799-2811","source":"Crossref","is-referenced-by-count":6,"title":["Serving MoE Models on Resource-Constrained Edge Devices via Dynamic Expert Swapping"],"prefix":"10.1109","volume":"74","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-2889-2266","authenticated-orcid":false,"given":"Rui","family":"Kong","sequence":"first","affiliation":[{"name":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1591-2526","authenticated-orcid":false,"given":"Yuanchun","family":"Li","sequence":"additional","affiliation":[{"name":"Institute of AI Industrial Research (AIR), Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9545-3322","authenticated-orcid":false,"given":"Weijun","family":"Wang","sequence":"additional","affiliation":[{"name":"Institute of AI Industrial Research (AIR), Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9266-3044","authenticated-orcid":false,"given":"Linghe","family":"Kong","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7352-8955","authenticated-orcid":false,"given":"Yunxin","family":"Liu","sequence":"additional","affiliation":[{"name":"Institute of AI Industrial Research (AIR), Tsinghua University, Beijing, China"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Scaling laws for neural language models","author":"Kaplan","year":"2020"},{"key":"ref2","first-page":"8583","article-title":"Scaling vision with sparse mixture of experts","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Riquelme","year":"2021"},{"key":"ref3","article-title":"Sparse mixture-of-experts are domain generalizable learners","volume-title":"Proc. 11th Int. Conf. Learn. Representations","author":"Li","year":"2023"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i8.20858"},{"issue":"120","key":"ref5","first-page":"1","article-title":"Switch transformers: Scaling to trillion parameter models with simple and efficient sparsity","volume":"23","author":"Fedus","year":"2022","journal-title":"J. Mach. Learn. Res."},{"key":"ref6","article-title":"Outrageously large neural networks: The sparsely-gated mixture-of-experts layer","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Shazeer","year":"2017"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-478"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ISCSLP57327.2022.10037818"},{"key":"ref9","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Proc. Adv. Neural Inf. Process. Syst."},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-4009"},{"key":"ref11","article-title":"Tutel: Adaptive mixture-of-experts at scale","author":"Hwang","year":"2022"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01170"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.804"},{"key":"ref14","article-title":"Attention is all you need","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Vaswani","year":"2017"},{"key":"ref15","article-title":"M3vit: Mixture-of-experts vision transformer for efficient multi-task learning with model-accelerator co-design","author":"Liang","year":"2022","journal-title":"Adv. Neural Inf. Proces. Syst."},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-acl.71"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02068"},{"key":"ref18","article-title":"Pointer sentinel mixture models","author":"Merity","year":"2016"},{"key":"ref19","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Dosovitskiy","year":"2021"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref21","article-title":"Towards MoE deployment: Mitigating inefficiencies in mixture-of-expert (MoE) inference","author":"Huang","year":"2023"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3458864.3467882"},{"key":"ref23","article-title":"Fast inference of mixture-of-experts language models with offloading","author":"Eliseev","year":"2023"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA57654.2024.00066"},{"key":"ref25","article-title":"Sida-MoE: Sparsity-inspired data-aware serving for efficient and scalable large mixture-of-experts models","author":"Du","year":"2024"},{"key":"ref26","doi-asserted-by":"crossref","DOI":"10.1109\/ISCA59077.2024.00078","article-title":"Pre-gated MoE: An algorithm-system co-design for fast and scalable mixture-of-expert inference","author":"Hwang","year":"2024"},{"key":"ref27","article-title":"EdgeMoE: Fast on-device inference of MoE-based large language models","author":"Yi","year":"2023"},{"key":"ref28","article-title":"Gshard: Scaling giant models with conditional computation and automatic sharding","author":"Lepikhin","year":"2020"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-emnlp.304"},{"key":"ref30","article-title":"Task-specific expert pruning for sparse mixture-of-experts","author":"Chen","year":"2022"},{"key":"ref31","article-title":"Parameter-efficient mixture-of-experts architecture for pre-trained language models","author":"Gao","year":"2022"},{"key":"ref32","article-title":"AutoMoE: Neural architecture search for efficient sparsely activated transformers","author":"Jawahar","year":"2022"}],"container-title":["IEEE Transactions on Computers"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/12\/11077790\/11022729.pdf?arnumber=11022729","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,11]],"date-time":"2025-07-11T17:43:48Z","timestamp":1752255828000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11022729\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8]]},"references-count":32,"journal-issue":{"issue":"8"},"URL":"https:\/\/doi.org\/10.1109\/tc.2025.3575905","relation":{},"ISSN":["0018-9340","1557-9956","2326-3814"],"issn-type":[{"value":"0018-9340","type":"print"},{"value":"1557-9956","type":"electronic"},{"value":"2326-3814","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,8]]}}}