{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,25]],"date-time":"2025-12-25T05:46:09Z","timestamp":1766641569372,"version":"3.48.0"},"reference-count":76,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61673265"],"award-info":[{"award-number":["61673265"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"NSF","award":["2527416"],"award-info":[{"award-number":["2527416"]}]},{"name":"NSF","award":["2534241"],"award-info":[{"award-number":["2534241"]}]},{"name":"AWS Cloud Credit for Research program"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Parallel Distrib. Syst."],"published-print":{"date-parts":[[2026,2]]},"DOI":"10.1109\/tpds.2025.3638428","type":"journal-article","created":{"date-parts":[[2025,11,28]],"date-time":"2025-11-28T18:44:23Z","timestamp":1764355463000},"page":"472-488","source":"Crossref","is-referenced-by-count":0,"title":["Accelerating ML Inference via Opportunistic Pre-Loading on Serverless Clusters"],"prefix":"10.1109","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-2261-5772","authenticated-orcid":false,"given":"Yifan","family":"Sui","sequence":"first","affiliation":[{"name":"Department of Automation of the School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5790-4981","authenticated-orcid":false,"given":"Hanfei","family":"Yu","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, Stevens Institute of Technology, Hoboken, NJ, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-0458-0900","authenticated-orcid":false,"given":"Yitao","family":"Hu","sequence":"additional","affiliation":[{"name":"Tianjin Key Laboratory of Advanced Networking of the Department of Intelligence and Computing, Tianjin University, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4205-8561","authenticated-orcid":false,"given":"Jianxun","family":"Li","sequence":"additional","affiliation":[{"name":"Department of Automation of the School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1444-2657","authenticated-orcid":false,"given":"Hao","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, Stevens Institute of Technology, Hoboken, NJ, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3698038.3698509"},{"article-title":"Accelerating Facebook\u2019s infrastructure with application-specific hardware","year":"2019","author":"Lee","key":"ref2"},{"year":"2023","key":"ref3","article-title":"Alexa skills - serverless applications lens"},{"year":"2024","key":"ref4","article-title":"Serverless AI chat with RAG using LangChain.js"},{"year":"2024","key":"ref5","article-title":"Nuclio: Serverless platform for automated data science"},{"article-title":"ServerlessLLM: Locality-enhanced serverless inference for large language models","year":"2024","author":"Fu","key":"ref6"},{"article-title":"Cloud programming simplified: A berkeley view on serverless computing","year":"2019","author":"Jonas","key":"ref7"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378512"},{"key":"ref9","first-page":"923","article-title":"$\\lbrace${SAND $\\rbrace$}: Towards $\\lbrace${ High-Performance$\\rbrace$} serverless computing","volume-title":"Proc. 2018 Usenix Annu. Tech. Conf.","author":"Akkus","year":"2018"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304016"},{"key":"ref11","first-page":"315","article-title":"On-demand container loading in AWS Lambda","volume-title":"Proc. 2023 USENIX Annu. Tech. Conf.","author":"Brooker"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10071120"},{"key":"ref13","first-page":"69","article-title":"Help rather than recycle: Alleviating cold startup in serverless computing through $\\lbrace${Inter-Function$\\rbrace$} container sharing","volume-title":"Proc. 2022 USENIX Annu. Tech. Conf.","author":"Li"},{"key":"ref14","first-page":"205","article-title":"Serverless in the wild: Characterizing and optimizing the serverless workload at a large cloud provider","volume-title":"Proc. 2020 USENIX Annu. Tech. Conf.","author":"Shahrad"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3445814.3446757"},{"article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","year":"2018","author":"Devlin","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3627703.3629567"},{"key":"ref18","first-page":"473","article-title":"Tetris: Memory-efficient serverless inference through tensor sharing","volume-title":"Proc. 2022 USENIX Annu. Tech. Conf.","author":"Li","year":"2022"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3620678.3624664"},{"year":"2023","key":"ref20","article-title":"Azure functions warmup trigger"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3445814.3446714"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3492321.3524270"},{"year":"2023","key":"ref23","article-title":"Optimizing static initialization - AWS Lambda"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3429880.3430102"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3464298.3476133"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507750"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/3423211.3425683"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM48880.2022.9796705"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/SC41404.2022.00027"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3342195.3392698"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3477113.3487273"},{"article-title":"Agile cold starts for scalable serverless","volume-title":"Proc. 11th USENIX Workshop Hot Topics Cloud Comput.","author":"Mohan","key":"ref32"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3472883.3486992"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2024.3386063"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/3617232.3624871"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/SEC.2018.00029"},{"key":"ref37","first-page":"303","article-title":"$\\lbrace${ORION $\\rbrace$} and the three rights: Sizing, bundling, and prewarming for serverless $\\lbrace${ DAGs$\\rbrace$}","volume-title":"Proc. 16th USENIX Symp. Operating Syst. Des. Implementation","author":"Mahgoub","year":"2022"},{"key":"ref38","first-page":"57","article-title":"SOCK: Rapid task provisioning with serverless-optimized containers","volume-title":"Proc. USENIX Conf. Usenix Annu. Tech. Conf.","author":"Oakes"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/IC2E55432.2022.00017"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/3492321.3524272"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/3302424.3303978"},{"key":"ref42","first-page":"419","article-title":"Firecracker: Lightweight virtualization for serverless applications","volume-title":"Proc. USENIX Symp. Networked Syst. Des. Implementation","author":"Agache"},{"year":"2018","key":"ref43","article-title":"gVisor"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/3423211.3425682"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1145\/3492321.3524272"},{"key":"ref46","first-page":"497","article-title":"No provisioned concurrency: Fast $\\lbrace${RDMA-codesigned$\\rbrace$} remote fork for serverless computing","volume-title":"Proc. USENIX Symp. Operating Syst. Des. Implementation","author":"Wei"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1145\/3477132.3483580"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1145\/3588195.3592996"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3511979"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1145\/3567955.3567960"},{"key":"ref51","first-page":"397","article-title":"$\\lbrace${INFaaS$\\rbrace$}: Automated model-less inference serving","volume-title":"Proc. 2021 USENIX Annu. Tech. Conf.","author":"Romero"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2019.11.037"},{"year":"2024","key":"ref54","article-title":"Configure lambda function memory"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421280"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-68279-0_8"},{"key":"ref57","first-page":"20","article-title":"An evening with berferd in which a cracker is lured, endured, and studied","volume-title":"Proc. Winter USENIX Conf.","author":"Cheswick"},{"key":"ref58","article-title":"NVIDIA multi-process service","volume-title":"Softw. Available From NVIDIA","author":"Corporation","year":"2024"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"article-title":"Very deep convolutional networks for large-scale image recognition","year":"2014","author":"Simonyan","key":"ref62"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.4324\/9781410605337-29"},{"article-title":"GitHub\u2014Pagurus","year":"2022","author":"Li","key":"ref64"},{"year":"2024","key":"ref65","article-title":"Azure functions premium plan"},{"year":"2024","key":"ref66","article-title":"Pricing - microsoft azure function"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1145\/3472456.3472501"},{"key":"ref68","first-page":"20","article-title":"SLA-driven ML inference framework for clouds with heterogeneous accelerators","volume-title":"Proc. Mach. Learn. Syst.","volume":"4","author":"Cho"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3459240"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421297"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.14778\/3547305.3547313"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00073"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507709"},{"key":"ref74","first-page":"427","article-title":"Pocket: Elastic ephemeral storage for serverless analytics","volume-title":"Proc. 13th USENIX Symp. Operating Syst. Des. Implementation","author":"Klimovic"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1145\/3447786.3456239"},{"key":"ref76","first-page":"267","article-title":"$\\lbrace${InfiniCache $\\rbrace$}: Exploiting ephemeral serverless functions to build a $\\lbrace${ Cost-Effective$\\rbrace$} memory cache","volume-title":"Proc. 18th USENIX Conf. File Storage Technol.","author":"Wang","year":"2020"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1145\/3472883.3486974"}],"container-title":["IEEE Transactions on Parallel and Distributed Systems"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/71\/11299490\/11271332-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/71\/11299490\/11271332.pdf?arnumber=11271332","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,25]],"date-time":"2025-12-25T05:42:07Z","timestamp":1766641327000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11271332\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2]]},"references-count":76,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tpds.2025.3638428","relation":{},"ISSN":["1045-9219","1558-2183","2161-9883"],"issn-type":[{"type":"print","value":"1045-9219"},{"type":"electronic","value":"1558-2183"},{"type":"electronic","value":"2161-9883"}],"subject":[],"published":{"date-parts":[[2026,2]]}}}