{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T09:38:25Z","timestamp":1773999505170,"version":"3.50.1"},"reference-count":18,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T00:00:00Z","timestamp":1763078400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T00:00:00Z","timestamp":1763078400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,11,14]]},"DOI":"10.1109\/euc66494.2025.00010","type":"proceedings-article","created":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T20:04:00Z","timestamp":1773950640000},"page":"11-16","source":"Crossref","is-referenced-by-count":0,"title":["Adaptive Model Partitioning for Distributed LLM Inference Across Heterogeneous Devices"],"prefix":"10.1109","author":[{"given":"Junda","family":"Wang","sequence":"first","affiliation":[{"name":"College of Information Science and Electronic Engineering, Zhejiang University,Hangzhou,China,310007"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhaoyang","family":"Li","sequence":"additional","affiliation":[{"name":"College of Information Science and Electronic Engineering, Zhejiang University,Hangzhou,China,310007"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qianqian","family":"Yang","sequence":"additional","affiliation":[{"name":"College of Information Science and Electronic Engineering, Zhejiang University,Hangzhou,China,310007"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jing","family":"Li","sequence":"additional","affiliation":[{"name":"National Key Laboratory of Complex System Control and Intelligent Agent Cooperation,Beijing,China,100074"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaoqiang","family":"Zhao","sequence":"additional","affiliation":[{"name":"Beijing Electro-mechanical Engineering Institute,Beijing,China,102200"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenbo","family":"Zhang","sequence":"additional","affiliation":[{"name":"National Key Laboratory of Complex System Control and Intelligent Agent Cooperation,Beijing,China,100074"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/JAS.2025.125495"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3641289"},{"key":"ref3","author":"Wu","year":"2024","journal-title":"Fast Distributed Inference Serving for Large Language Models"},{"key":"ref4","author":"Lepikhin","year":"2020","journal-title":"GShard: Scaling giant models with conditional computation and automatic sharding"},{"key":"ref5","first-page":"559","article-title":"Alpa: Automating inter- and intra-operator parallelism for distributed deep learning","volume-title":"Proc. 16th USENIX Symp. Oper. Syst. Des. Implement. (OSDI)","author":"Zheng"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.48550\/arxiv.1811.06965"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359646"},{"key":"ref8","first-page":"7937","article-title":"Memory-efficient pipeline-parallel DNN training","volume-title":"Proceedings of the International Conference on Machine Learning (ICML)","author":"Narayanan","year":"2021"},{"key":"ref9","author":"Shoeybi","year":"2020","journal-title":"Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism"},{"key":"ref10","first-page":"521","article-title":"Orca: A distributed serving system for transformer-based generative models","volume-title":"Proc. 16th USENIX Symp. Oper. Syst. Des. Implement. (OSDI)","author":"Yu"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2024.3524255"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.23919\/WiOpt66569.2025.11123401"},{"key":"ref13","author":"Xiong","year":"2025","journal-title":"High-Throughput LLM Inference on Heterogeneous Clusters"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2023.3272567"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2024.3389779"},{"key":"ref16","author":"Chen","year":"2023","journal-title":"Confidant: Customizing transformer-based LLMs via collaborative edge training"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/Ucom59132.2023.10257591"},{"key":"ref18","first-page":"1","article-title":"Efficient large-scale language model training on gpu clusters using megatron-lm","volume-title":"Proc. Int. Conf. High Perform. Comput., Netw., Storage Anal. (SC)","author":"Narayanan"}],"event":{"name":"2025 IEEE 23rd International Conference on Embedded and Ubiquitous Computing (EUC)","location":"Guiyang, China","start":{"date-parts":[[2025,11,14]]},"end":{"date-parts":[[2025,11,17]]}},"container-title":["2025 IEEE 23rd International Conference on Embedded and Ubiquitous Computing (EUC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11433190\/11433180\/11433290.pdf?arnumber=11433290","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T06:07:51Z","timestamp":1773986871000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11433290\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,14]]},"references-count":18,"URL":"https:\/\/doi.org\/10.1109\/euc66494.2025.00010","relation":{},"subject":[],"published":{"date-parts":[[2025,11,14]]}}}