{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,15]],"date-time":"2026-01-15T22:57:36Z","timestamp":1768517856243,"version":"3.49.0"},"reference-count":30,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,12,14]],"date-time":"2025-12-14T00:00:00Z","timestamp":1765670400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,14]],"date-time":"2025-12-14T00:00:00Z","timestamp":1765670400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62402141,U22A2036"],"award-info":[{"award-number":["62402141,U22A2036"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,14]]},"DOI":"10.1109\/icpads67057.2025.11323056","type":"proceedings-article","created":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T20:36:54Z","timestamp":1768423014000},"page":"1-10","source":"Crossref","is-referenced-by-count":0,"title":["ServerlessLego: An Elastic Serverless Framework Assembling Model Building Blocks to Provide SLO-Aware Inference Services"],"prefix":"10.1109","author":[{"given":"Yiting","family":"Li","sequence":"first","affiliation":[{"name":"School of Computer Science and Technology, Harbin Institute of Technology,Shenzhen,China"}]},{"given":"Desheng","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Harbin Institute of Technology,Shenzhen,China"}]},{"given":"Weizhe","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Harbin Institute of Technology,Shenzhen,China"}]},{"given":"Sichao","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Cyberspace Science, Harbin Institute of Technology,Harbin,China"}]},{"given":"Yuming","family":"Feng","sequence":"additional","affiliation":[{"name":"Pengcheng Laboratory,Shenzhen,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3589334.3645347"},{"key":"ref2","author":"Huang","year":"2024","journal-title":"Soap: enhancing efficiency of generated code via self-optimization"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3387941"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1093\/jamia\/ocae166"},{"key":"ref5","first-page":"135","article-title":"Serverlessllm:low-latency serverless inference for large language models","volume-title":"18th USENIX Symposium on Operating Systems Design and Implementation (OSDI 24)","author":"Fu","year":"2024"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3702634.3702950"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3510611"},{"key":"ref8","first-page":"1049","article-title":"Mark: Exploiting cloud services for cost-effective, slo-aware machine learning inference serving","volume-title":"2019 USENIX Annual Technical Conference (USENIX ATC 19)","author":"Zhang","year":"2019"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/icpads60453.2023.00350"},{"key":"ref10","author":"Yu","year":"2023","journal-title":"Faaswap: Slo-aware, gpu-efficient serverless inference via model swapping"},{"key":"ref11","author":"Hu","year":"2025","journal-title":"Deepserve: Serverless large language model serving at scale"},{"key":"ref12","volume-title":"Openwhisk resource limit"},{"key":"ref13","volume-title":"Azure function resource limit"},{"key":"ref14","volume-title":"Lambda function resource limit"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3669940.3707215"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3302424.3303953"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS51616.2021.00022"},{"key":"ref18","author":"Duan","year":"2024","journal-title":"Mopar: A model partitioning framework for deep learning inference services on serverless platforms"},{"key":"ref19","first-page":"69","article-title":"Help rather than recycle: Alleviating cold startup in serverless computing through {Inter-Function} container sharing","volume-title":"2022 USENIX annual technical conference (USENIX ATC 22)","author":"Li","year":"2022"},{"key":"ref20","first-page":"57","article-title":"Sock: Rapid task provisioning with serverlessoptimized containers","volume-title":"2018 USENIX annual technical conference (USENIX ATC 18)","author":"Oakes","year":"2018"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3342195.3392698"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3445814.3446757"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507750"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378512"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3627703.3629567"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3477113.3487273"},{"key":"ref27","first-page":"923","article-title":"Sand: towards high-performance serverless computing","volume-title":"2018 USENIX annual technical conference (USENIX ATC 18)","author":"Akkus","year":"2018"},{"key":"ref28","first-page":"419","article-title":"Firecracker: Lightweight virtualization for serverless applications","volume-title":"17th USENIX symposium on networked systems design and implementation (NSDI 20)","author":"Agache","year":"2020"},{"key":"ref29","year":"2018","journal-title":"Google gvisor"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2024.3391858"}],"event":{"name":"2025 IEEE 31th International Conference on Parallel and Distributed Systems (ICPADS)","location":"Hefei, China","start":{"date-parts":[[2025,12,14]]},"end":{"date-parts":[[2025,12,18]]}},"container-title":["2025 IEEE 31th International Conference on Parallel and Distributed Systems (ICPADS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11322805\/11322871\/11323056.pdf?arnumber=11323056","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,15]],"date-time":"2026-01-15T07:09:24Z","timestamp":1768460964000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11323056\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,14]]},"references-count":30,"URL":"https:\/\/doi.org\/10.1109\/icpads67057.2025.11323056","relation":{},"subject":[],"published":{"date-parts":[[2025,12,14]]}}}