{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T12:00:51Z","timestamp":1775822451014,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":94,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,11,20]],"date-time":"2024-11-20T00:00:00Z","timestamp":1732060800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100006374","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61673265"],"award-info":[{"award-number":["61673265"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"name":"NSF","award":["2153502, 2403247, 2403398"],"award-info":[{"award-number":["2153502, 2403247, 2403398"]}]},{"name":"AWS Cloud Credit for Research program"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,11,20]]},"DOI":"10.1145\/3698038.3698509","type":"proceedings-article","created":{"date-parts":[[2024,11,14]],"date-time":"2024-11-14T06:32:43Z","timestamp":1731565963000},"page":"178-195","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["Pre-Warming is Not Enough: Accelerating Serverless Inference With Opportunistic Pre-Loading"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-2261-5772","authenticated-orcid":false,"given":"Yifan","family":"Sui","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5790-4981","authenticated-orcid":false,"given":"Hanfei","family":"Yu","sequence":"additional","affiliation":[{"name":"Stevens Institute of Technology, Hoboken, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-0458-0900","authenticated-orcid":false,"given":"Yitao","family":"Hu","sequence":"additional","affiliation":[{"name":"Tianjin University, Tianjin, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4205-8561","authenticated-orcid":false,"given":"Jianxun","family":"Li","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1444-2657","authenticated-orcid":false,"given":"Hao","family":"Wang","sequence":"additional","affiliation":[{"name":"Stevens Institute of Technology, Hoboken, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,11,20]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2023. Alexa Skills - Serverless Applications Lens. https:\/\/docs.aws.amazon.com\/wellarchitected\/latest\/serverless-applications-lens\/alexa-skills.html. Accessed: 2024-07-07."},{"key":"e_1_3_2_1_2_1","unstructured":"2024. Pricing - Microsoft Azure Function. https:\/\/azure.microsoft.com\/en-us\/pricing\/details\/functions\/. Accessed: 2024-07-12."},{"key":"e_1_3_2_1_3_1","volume-title":"Proc. the USENIX Symposium on Networked Systems Design and Implementation (NSDI).","author":"Agache Alexandru","year":"2020","unstructured":"Alexandru Agache, Marc Brooker, Alexandra Iordache, Anthony Liguori, Rolf Neugebauer, Phil Piwonka, and Diana-Maria Popa. 2020. Firecracker: Lightweight Virtualization for Serverless Applications. In Proc. the USENIX Symposium on Networked Systems Design and Implementation (NSDI)."},{"key":"e_1_3_2_1_4_1","volume-title":"Proc. 2018 Usenix Annual Technical Conference (USENIX ATC). 923--935","author":"Akkus Istemi Ekin","year":"2018","unstructured":"Istemi Ekin Akkus, Ruichuan Chen, Ivica Rimac, Manuel Stein, Klaus Satzke, Andre Beck, Paarijaat Aditya, and Volker Hilt. 2018. {SAND}: Towards {High-Performance} Serverless Computing. In Proc. 2018 Usenix Annual Technical Conference (USENIX ATC). 923--935."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00073"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.14778\/3547305.3547313"},{"key":"e_1_3_2_1_7_1","unstructured":"Amazon Web Services. 2023. Optimizing static initialization - AWS Lambda. https:\/\/docs.aws.amazon.com\/lambda\/latest\/operatorguide\/static-initialization.html Accessed on: 2024-06-12."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3492321.3524270"},{"key":"e_1_3_2_1_9_1","unstructured":"Apache OpenWhisk. [n.d.]. [n. d.]. https:\/\/openwhisk.apache.org."},{"key":"e_1_3_2_1_10_1","unstructured":"AWS Lambda. 2024. Configure Lambda function memory. https:\/\/docs.aws.amazon.com\/lambda\/latest\/dg\/configuration-memory.html\/. Accessed: 2024-07-07."},{"key":"e_1_3_2_1_11_1","unstructured":"Azure Samples. 2024. Serverless AI Chat with RAG using LangChain.js. https:\/\/learn.microsoft.com\/en-us\/samples\/azure-samples\/serverless-chat-langchainjs\/serverless-chat-langchainjs\/. Accessed: 2024-07-07."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.tcs.2008.02.003"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472883.3486992"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1970.10481180"},{"key":"e_1_3_2_1_15_1","volume-title":"Random forests. Machine learning 45","author":"Breiman Leo","year":"2001","unstructured":"Leo Breiman. 2001. Random forests. Machine learning 45 (2001), 5--32."},{"key":"e_1_3_2_1_16_1","volume-title":"Proc. 2023 USENIX Annual Technical Conference (USENIX ATC). 315--328","author":"Brooker Marc","year":"2023","unstructured":"Marc Brooker, Mike Danilov, Chris Greenwood, and Phil Piwonka. 2023. On-demand Container Loading in {AWS} Lambda. In Proc. 2023 USENIX Annual Technical Conference (USENIX ATC). 315--328."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3342195.3392698"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2024.3386063"},{"key":"e_1_3_2_1_19_1","volume-title":"Proc. Winter USENIX Conference","author":"Cheswick Bill","year":"1992","unstructured":"Bill Cheswick. 1992. An Evening with Berferd in which a cracker is Lured, Endured, and Studied. In Proc. Winter USENIX Conference, San Francisco. 20--24."},{"key":"e_1_3_2_1_20_1","first-page":"20","article-title":"Sla-driven ML Inference Framework for Clouds with Heterogeneous Accelerators","volume":"4","author":"Cho Junguk","year":"2022","unstructured":"Junguk Cho, Diman Zad Tootaghaj, Lianjie Cao, and Puneet Sharma. 2022. Sla-driven ML Inference Framework for Clouds with Heterogeneous Accelerators. Proc. Machine Learning and Systems (MLSys) 4 (2022), 20--32.","journal-title":"Proc. Machine Learning and Systems (MLSys)"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3464298.3476133"},{"key":"e_1_3_2_1_22_1","unstructured":"NVIDIA Corporation. 2024. NVIDIA Multi-Process Service. Software available from NVIDIA. https:\/\/docs.nvidia.com\/deploy\/mps\/index.html Accessed: 2024-05-30."},{"key":"e_1_3_2_1_23_1","volume-title":"Bert: Pre-training of Deep Bidirectional Transformers for Language Understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of Deep Bidirectional Transformers for Language Understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378512"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421297"},{"key":"e_1_3_2_1_26_1","volume-title":"Costless: Optimizing Cost of Serverless Computing Through Function Fusion and Placement. In 2018 IEEE\/ACM Symposium on Edge Computing (SEC).","author":"Elgamal Tarek","year":"2018","unstructured":"Tarek Elgamal. 2018. Costless: Optimizing Cost of Serverless Computing Through Function Fusion and Placement. In 2018 IEEE\/ACM Symposium on Edge Computing (SEC)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2019.11.037"},{"key":"e_1_3_2_1_28_1","volume-title":"ServerlessLLM: Locality-Enhanced Serverless Inference for Large Language Models. arXiv preprint arXiv:2401.14351","author":"Fu Yao","year":"2024","unstructured":"Yao Fu, Leyang Xue, Yeqi Huang, Andrei-Octavian Brabete, Dmitrii Ustiugov, Yuvraj Patel, and Luo Mai. 2024. ServerlessLLM: Locality-Enhanced Serverless Inference for Large Language Models. arXiv preprint arXiv:2401.14351 (2024)."},{"key":"e_1_3_2_1_29_1","unstructured":"Alexander Fuerst. 2021. GitHub---aFuerst\/openwhisk-caching. https:\/\/github.com\/aFuerst\/openwhisk-caching. [Accessed 26-10-2023]."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3410276"},{"key":"e_1_3_2_1_31_1","unstructured":"Google. 2018. gVisor. https:\/\/gvisor.dev\/."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3423211.3425683"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11277-020-07402-2"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627703.3629567"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589334.3645383"},{"key":"e_1_3_2_1_37_1","volume-title":"Proc. the 2020 Sixth International Workshop on Serverless Computing","author":"Hunhoff Erika","year":"2021","unstructured":"Erika Hunhoff, Shazal Irshad, Vijay Thurimella, Ali Tariq, and Eric Rozner. 2021. Proactive Serverless Function Resource Management. In Proc. the 2020 Sixth International Workshop on Serverless Computing (WoSC). 61\u00e2\u0102\u015e66."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472456.3472501"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404119"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3459240"},{"key":"e_1_3_2_1_41_1","unstructured":"Eric Jonas Johann Schleier-Smith Vikram Sreekanti Chia-Che Tsai Anurag Khandelwal Qifan Pu Vaishaal Shankar Joao Carreira Karl Krauth Neeraja Yadwadkar et al. 2019. Cloud Programming Simplified: A Berkeley View on Serverless Computing. arXiv preprint arXiv:1902.03383 (2019)."},{"key":"e_1_3_2_1_42_1","volume-title":"Proceedings of the 2nd International SANE Conference","volume":"43","author":"Kamp Poul-Henning","year":"2000","unstructured":"Poul-Henning Kamp and Robert NM Watson. 2000. Jails: Confining the omnipotent root. In Proceedings of the 2nd International SANE Conference, Vol. 43. 116."},{"key":"e_1_3_2_1_43_1","volume-title":"Proc. 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI). 427--444","author":"Klimovic Ana","year":"2018","unstructured":"Ana Klimovic, Yawen Wang, Patrick Stuedi, Animesh Trivedi, Jonas Pfefferle, and Christos Kozyrakis. 2018. Pocket: Elastic Ephemeral Storage for Serverless Analytics. In Proc. 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI). 427--444."},{"key":"e_1_3_2_1_44_1","volume-title":"Imagenet Classification with Deep Convolutional Neural Networks. Advances in neural information processing systems (NeurIPS) 25","author":"Krizhevsky Alex","year":"2012","unstructured":"Alex Krizhevsky, Ilya Sutskever, and Geoffrey E Hinton. 2012. Imagenet Classification with Deep Convolutional Neural Networks. Advances in neural information processing systems (NeurIPS) 25 (2012)."},{"key":"e_1_3_2_1_45_1","unstructured":"Kevin Lee Vijay Rao and William Arnold. 2019. Accelerating Facebook's Infrastructure with Application-Specific Hardware. https:\/\/engineering.fb.com\/2019\/03\/14\/data-center-engineering\/accelerating-infrastructure\/. Accessed: 2024-07-07."},{"key":"e_1_3_2_1_46_1","volume-title":"Proc. 2022 USENIX Annual Technical Conference (USENIX ATC).","author":"Li Jie","year":"2022","unstructured":"Jie Li, Laiping Zhao, Yanan Yang, Kunlin Zhan, and Keqiu Li. 2022. Tetris: Memory-Efficient Serverless inference through tensor sharing. In Proc. 2022 USENIX Annual Technical Conference (USENIX ATC)."},{"key":"e_1_3_2_1_47_1","unstructured":"Zijun Li. [n. d.]. GitHub---lzjzx1122\/Pagurus: Help Rather Than Recycle: Alleviating Cold Startup in Serverless Computing Through Inter-Function Container Sharing. https:\/\/github.com\/lzjzx1122\/Pagurus\/tree\/master. [Accessed 26-10-2023]."},{"key":"e_1_3_2_1_48_1","volume-title":"Proc. 2022 USENIX Annual Technical Conference (USENIX ATC). 69--84","author":"Li Zijun","year":"2022","unstructured":"Zijun Li, Linsong Guo, Quan Chen, Jiagan Cheng, Chuhao Xu, Deze Zeng, Zhuo Song, Tao Ma, Yong Yang, Chao Li, and Minyi Guo. 2022. Help Rather Than Recycle: Alleviating Cold Startup in Serverless Computing Through {Inter-Function} Container Sharing. In Proc. 2022 USENIX Annual Technical Conference (USENIX ATC). 69--84."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477113.3487273"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3620665.3640361"},{"key":"e_1_3_2_1_51_1","volume-title":"16th USENIX Symposium on Operating Systems Design and Implementation (OSDI).","author":"Mahgoub Ashraf","year":"2022","unstructured":"Ashraf Mahgoub, Edgardo Barsallo Yi, Karthick Shankar, Sameh Elnikety, Somali Chaterji, and Saurabh Bagchi. 2022. {ORION} and the Three Rights: Sizing, Bundling, and Prewarming for Serverless {DAGs}. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI)."},{"key":"e_1_3_2_1_52_1","unstructured":"Microsoft. 2023. Azure Functions warmup trigger. https:\/\/learn.microsoft.com\/en-us\/azure\/azure-functions\/functions-bindings-warmup?tabs=isolated-process%2Cnodejs-v4&pivots=programming-language-python Accessed: 2024-06-12."},{"key":"e_1_3_2_1_53_1","unstructured":"Microsoft. 2024. Azure Functions Premium plan. https:\/\/learn.microsoft.com\/en-us\/azure\/azure-functions\/functions-premium-plan?tabs=portal Accessed: 2024-07-12."},{"key":"e_1_3_2_1_54_1","unstructured":"Microsoft. 2024. Storage considerations for Azure Functions. https:\/\/learn.microsoft.com\/en-us\/azure\/azure-functions\/storage-considerations?tabs=azure-cli. Accessed: 2024-07-01."},{"key":"e_1_3_2_1_55_1","volume-title":"Agile Cold Starts for Scalable Serverless. In 11th USENIX Workshop on Hot Topics in Cloud Computing (HotCloud).","author":"Mohan Anup","year":"2019","unstructured":"Anup Mohan, Harshad Sane, Kshitij Doshi, Saikrishna Edupuganti, Naren Nayak, and Vadim Sukhomlinov. 2019. Agile Cold Starts for Scalable Serverless. In 11th USENIX Workshop on Hot Topics in Cloud Computing (HotCloud)."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447786.3456239"},{"key":"e_1_3_2_1_57_1","volume-title":"Nuclio: Serverless Platform for Automated Data Science. https:\/\/nuclio.io\/ Accessed: 2024-07-12.","year":"2024","unstructured":"Nuclio. 2024. Nuclio: Serverless Platform for Automated Data Science. https:\/\/nuclio.io\/ Accessed: 2024-07-12."},{"key":"e_1_3_2_1_58_1","unstructured":"NVIDIA Corporation. 2024. NVIDIA Container Toolkit. Software available from NVIDIA. https:\/\/github.com\/NVIDIA\/nvidia-container-toolkit Accessed: 2024-05-30."},{"key":"e_1_3_2_1_59_1","volume-title":"Proc. the USENIX Conference on Usenix Annual Technical Conference (USENIX ATC).","author":"Oakes Edward","unstructured":"Edward Oakes, Leon Yang, Dennis Zhou, Kevin Houck, Tyler Harter, Andrea C. Arpaci-Dusseau, and Remzi H. Arpaci-Dusseau. 2018. SOCK: Rapid Task Provisioning with Serverless-Optimized Containers. In Proc. the USENIX Conference on Usenix Annual Technical Conference (USENIX ATC)."},{"key":"e_1_3_2_1_60_1","volume-title":"Learning dynamic app usage graph for next mobile app recommendation","author":"Ouyang Yi","year":"2022","unstructured":"Yi Ouyang, Bin Guo, Qianru Wang, Yunji Liang, and Zhiwen Yu. 2022. Learning dynamic app usage graph for next mobile app recommendation. IEEE Transactions on mobile Computing (2022)."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM48880.2022.9796705"},{"key":"e_1_3_2_1_62_1","volume-title":"PyTorch: An Imperative Style","author":"Paszke Adam","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In Advances in Neural Information Processing Systems (NeurIPS). 8024--8035."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3620678.3624664"},{"key":"e_1_3_2_1_64_1","volume-title":"Proc. 16th USENIX symposium on networked systems design and implementation (NSDI). 193--206","author":"Pu Qifan","year":"2019","unstructured":"Qifan Pu, Shivaram Venkataraman, and Ion Stoica. 2019. Shuffling, Fast and Slow: Scalable Analytics on Serverless Infrastructure. In Proc. 16th USENIX symposium on networked systems design and implementation (NSDI). 193--206."},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472883.3486974"},{"key":"e_1_3_2_1_66_1","volume-title":"Proc. 2021 USENIX Annual Technical Conference (USENIX ATC). 397--411","author":"Romero Francisco","year":"2021","unstructured":"Francisco Romero, Qian Li, Neeraja J Yadwadkar, and Christos Kozyrakis. 2021. {INFaaS}: Automated Model-less Inference Serving. In Proc. 2021 USENIX Annual Technical Conference (USENIX ATC). 397--411."},{"key":"e_1_3_2_1_67_1","volume-title":"DayDream: Executing Dynamic Scientific Workflows on Serverless Platforms with Hot Starts. In 2022 SC22: International Conference for High Performance Computing, Networking, Storage and Analysis (SC).","author":"Roy Rohan Basu","year":"2022","unstructured":"Rohan Basu Roy, Tirthak Patel, and Devesh Tiwari. 2022. DayDream: Executing Dynamic Scientific Workflows on Serverless Platforms with Hot Starts. In 2022 SC22: International Conference for High Performance Computing, Networking, Storage and Analysis (SC)."},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507750"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1145\/3492321.3524272"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3492321.3524272"},{"key":"e_1_3_2_1_71_1","volume-title":"FUSIONIZE: Improving Serverless Application Performance Through Feedback-driven Function Fusion. In 2022 IEEE International Conference on Cloud Engineering (IC2E).","author":"Schirmer Trever","year":"2022","unstructured":"Trever Schirmer, Joel Scheuner, Tobias Pfandzelter, and David Bermbach. 2022. FUSIONIZE: Improving Serverless Application Performance Through Feedback-driven Function Fusion. In 2022 IEEE International Conference on Cloud Engineering (IC2E)."},{"key":"e_1_3_2_1_72_1","volume-title":"Proc. 2020 USENIX Annual Technical Conference (USENIX ATC). 205--218","author":"Shahrad Mohammad","year":"2020","unstructured":"Mohammad Shahrad, Rodrigo Fonseca, Inigo Goiri, Gohar Chaudhry, Paul Batum, Jason Cooke, Eduardo Laureano, Colby Tresness, Mark Russinovich, and Ricardo Bianchini. 2020. Serverless in the wild: Characterizing and optimizing the serverless workload at a large cloud provider. In Proc. 2020 USENIX Annual Technical Conference (USENIX ATC). 205--218."},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304016"},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1145\/3551901.3556492"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1145\/3423211.3425682"},{"key":"e_1_3_2_1_76_1","volume-title":"Very Deep Convolutional Networks for Large-Scale Image Recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very Deep Convolutional Networks for Large-Scale Image Recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10071120"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1145\/3410279"},{"key":"e_1_3_2_1_81_1","volume-title":"Proc. 18th USENIX conference on file and storage technologies (FAST). 267--281","author":"Wang Ao","year":"2020","unstructured":"Ao Wang, Jingyuan Zhang, Xiaolong Ma, Ali Anwar, Lukas Rupprecht, Dimitrios Skourtis, Vasily Tarasov, Feng Yan, and Yue Cheng. 2020. {InfiniCache}: Exploiting Ephemeral Serverless Functions to Build a {Cost-Effective} Memory Cache. In Proc. 18th USENIX conference on file and storage technologies (FAST). 267--281."},{"key":"e_1_3_2_1_82_1","volume-title":"Proc. the Seventeenth European Conference on Computer Systems (EuroSys). 1--16","author":"Amy Wang Kai-Ting","year":"2019","unstructured":"Kai-Ting Amy Wang, Rayson Ho, and Peng Wu. 2019. Replayable execution optimized for page sharing for a managed runtime environment. In Proc. the Seventeenth European Conference on Computer Systems (EuroSys). 1--16."},{"key":"e_1_3_2_1_83_1","volume-title":"USENIX Symposium on Operating Systems Design and Implementation (OSDI). 497--517","author":"Wei Xingda","year":"2023","unstructured":"Xingda Wei, Fangming Lu, Tianxia Wang, Jinyu Gu, Yuhan Yang, Rong Chen, and Haibo Chen. 2023. No Provisioned Concurrency: Fast {RDMA-codesigned} Remote Fork for Serverless Computing. In USENIX Symposium on Operating Systems Design and Implementation (OSDI). 497--517."},{"key":"e_1_3_2_1_84_1","volume-title":"High-Throughput Inference. In Proc. the 27th ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS). 768--781","author":"Yang Yanan","year":"2022","unstructured":"Yanan Yang, Laiping Zhao, Yiming Li, Huanyu Zhang, Jie Li, Mingyang Zhao, Xingzhen Chen, and Keqiu Li. 2022. INFless: A Native Serverless System for Low-Latency, High-Throughput Inference. In Proc. the 27th ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS). 768--781."},{"key":"e_1_3_2_1_85_1","volume-title":"Proc. the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS). 335--350","author":"Yu Hanfei","year":"2024","unstructured":"Hanfei Yu, Rohan Basu Roy, Christian Fontenot, Devesh Tiwari, Jian Li, Hong Zhang, Hao Wang, and Seung-Jong Park. 2024. Rainbow-Cake: Mitigating Cold-starts in Serverless with Layer-wise Container Caching and Sharing. In Proc. the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS). 335--350."},{"key":"e_1_3_2_1_86_1","doi-asserted-by":"publisher","DOI":"10.1145\/3588195.3592996"},{"key":"e_1_3_2_1_87_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3511979"},{"key":"e_1_3_2_1_88_1","volume-title":"Proc. 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI). 1489--1504","author":"Yu Minchen","year":"2023","unstructured":"Minchen Yu, Tingjia Cao, Wei Wang, and Ruichuan Chen. 2023. Following the Data, not the Function: Rethinking Function Orchestration in Serverless Computing. In Proc. 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI). 1489--1504."},{"key":"e_1_3_2_1_89_1","volume-title":"GPU-Efficient Serverless Inference via Model Swapping. arXiv preprint arXiv:2306.03622","author":"Yu Minchen","year":"2023","unstructured":"Minchen Yu, Ao Wang, Dong Chen, Haoxuan Yu, Xiaonan Luo, Zhuohao Li, Wei Wang, Ruichuan Chen, Dapeng Nie, and Haoran Yang. 2023. FaaSwap: SLO-Aware, GPU-Efficient Serverless Inference via Model Swapping. arXiv preprint arXiv:2306.03622 (2023)."},{"key":"e_1_3_2_1_90_1","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421280"},{"key":"e_1_3_2_1_91_1","volume-title":"Proc. 2019 USENIX Annual Technical Conference (USENIX ATC). 1049--1062","author":"Zhang Chengliang","year":"2019","unstructured":"Chengliang Zhang, Minchen Yu, Wei Wang, and Feng Yan. 2019. {MArk}: Exploiting Cloud Services for {Cost-Effective}, {SLO-Aware} Machine Learning Inference Serving. In Proc. 2019 USENIX Annual Technical Conference (USENIX ATC). 1049--1062."},{"key":"e_1_3_2_1_92_1","volume-title":"Faster and Cheaper Serverless Computing on Harvested Resources. In ACM SIGOPS 28th Symposium on Operating Systems Principles (SOSP).","author":"Zhang Yanqi","year":"2021","unstructured":"Yanqi Zhang, \u00cd\u00f1igo Goiri, Gohar Irfan Chaudhry, Rodrigo Fonseca, Sameh Elnikety, Christina Delimitrou, and Ricardo Bianchini. 2021. Faster and Cheaper Serverless Computing on Harvested Resources. In ACM SIGOPS 28th Symposium on Operating Systems Principles (SOSP)."},{"key":"e_1_3_2_1_93_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2018.2874014"},{"key":"e_1_3_2_1_94_1","doi-asserted-by":"publisher","DOI":"10.1145\/3567955.3567960"}],"event":{"name":"SoCC '24: ACM Symposium on Cloud Computing","location":"Redmond WA USA","acronym":"SoCC '24","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the ACM Symposium on Cloud Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3698038.3698509","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3698038.3698509","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T19:01:55Z","timestamp":1755889315000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3698038.3698509"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,20]]},"references-count":94,"alternative-id":["10.1145\/3698038.3698509","10.1145\/3698038"],"URL":"https:\/\/doi.org\/10.1145\/3698038.3698509","relation":{},"subject":[],"published":{"date-parts":[[2024,11,20]]},"assertion":[{"value":"2024-11-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}