{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T01:53:19Z","timestamp":1773193999328,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,22]],"date-time":"2024-04-22T00:00:00Z","timestamp":1713744000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,22]]},"DOI":"10.1145\/3627703.3629567","type":"proceedings-article","created":{"date-parts":[[2024,4,18]],"date-time":"2024-04-18T06:28:28Z","timestamp":1713421708000},"page":"1039-1053","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":26,"title":["Optimus: Warming Serverless ML Inference via Inter-Function Model Transformation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5689-382X","authenticated-orcid":false,"given":"Zicong","family":"Hong","sequence":"first","affiliation":[{"name":"Hong Kong Polytechnic University"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9130-7320","authenticated-orcid":false,"given":"Jian","family":"Lin","sequence":"additional","affiliation":[{"name":"Shantou University"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9831-2202","authenticated-orcid":false,"given":"Song","family":"Guo","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-1473-1795","authenticated-orcid":false,"given":"Sifu","family":"Luo","sequence":"additional","affiliation":[{"name":"Sun Yat-sen University"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4430-7904","authenticated-orcid":false,"given":"Wuhui","family":"Chen","sequence":"additional","affiliation":[{"name":"Sun Yat-sen University and Peng Cheng Laboratory"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6339-3134","authenticated-orcid":false,"given":"Roger","family":"Wattenhofer","sequence":"additional","affiliation":[{"name":"ETH Zurich"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9865-2212","authenticated-orcid":false,"given":"Yue","family":"Yu","sequence":"additional","affiliation":[{"name":"Peng Cheng Laboratory"}]}],"member":"320","published-online":{"date-parts":[[2024,4,22]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3552326.3567496"},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of the 2018 USENIX Conference on Usenix Annual Technical Conference (Boston, MA, USA) (USENIX ATC '18). USENIX Association, USA, 923--935","author":"Akkus Istemi Ekin","year":"2018","unstructured":"Istemi Ekin Akkus, Ruichuan Chen, Ivica Rimac, Manuel Stein, Klaus Satzke, Andre Beck, Paarijaat Aditya, and Volker Hilt. 2018. SAND: Towards High-Performance Serverless Computing. In Proceedings of the 2018 USENIX Conference on Usenix Annual Technical Conference (Boston, MA, USA) (USENIX ATC '18). USENIX Association, USA, 923--935."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00073"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.14778\/3547305.3547313"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3552326.3567503"},{"key":"e_1_3_2_1_6_1","volume-title":"Retrieved","year":"2023","unstructured":"Azure. 2023. Azure Functions: Execute event-driven serverless code functions with an end-to-end development experience. Retrieved March 20, 2023 from https:\/\/azure.microsoft.com\/en-us\/products\/functions\/"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3552326.3567506"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3368454"},{"key":"e_1_3_2_1_9_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arXiv:1810.04805 [cs.CL]","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arXiv:1810.04805 [cs.CL]"},{"key":"e_1_3_2_1_10_1","volume-title":"International Conference on Learning Representations (ICLR). https:\/\/openreview.net\/forum?id=HJxyZkBKDr","author":"Dong Xuanyi","year":"2020","unstructured":"Xuanyi Dong and Yi Yang. 2020. NAS-Bench-201: Extending the Scope of Reproducible Neural Architecture Search. In International Conference on Learning Representations (ICLR). https:\/\/openreview.net\/forum?id=HJxyZkBKDr"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378512"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3445814.3446757"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3152247"},{"key":"e_1_3_2_1_14_1","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2015. Deep Residual Learning for Image Recognition. arXiv:1512.03385 [cs.CV]"},{"key":"e_1_3_2_1_15_1","volume-title":"Retrieved","year":"2021","unstructured":"HuggingFace. 2021. HuggingFace Model Hub. Retrieved March 20, 2023 from https:\/\/huggingface.co\/models?sort=downloads"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3360575"},{"key":"e_1_3_2_1_17_1","volume-title":"20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23)","author":"Lai Fan","year":"2023","unstructured":"Fan Lai, Yinwei Dai, Harsha V. Madhyastha, and Mosharaf Chowdhury. 2023. ModelKeeper: Accelerating DNN Training via Automated Training Warmup. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23). USENIX Association, Boston, MA, 769--785. https:\/\/www.usenix.org\/conference\/nsdi23\/presentation\/lai-fan"},{"key":"e_1_3_2_1_18_1","volume-title":"2022 USENIX Annual Technical Conference (USENIX ATC 22)","author":"Li Jie","year":"2022","unstructured":"Jie Li, Laiping Zhao, Yanan Yang, Kunlin Zhan, and Keqiu Li. 2022. Tetris: Memory-efficient Serverless Inference through Tensor Sharing. In 2022 USENIX Annual Technical Conference (USENIX ATC 22). USENIX Association, Carlsbad, CA. https:\/\/www.usenix.org\/conference\/atc22\/presentation\/li-jie"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM53939.2023.10228916"},{"key":"e_1_3_2_1_20_1","volume-title":"Help Rather Than Recycle: Alleviating Cold Startup in Serverless Computing Through Inter-Function Container Sharing. In 2022 USENIX Annual Technical Conference (USENIX ATC 22)","author":"Li Zijun","year":"2022","unstructured":"Zijun Li, Linsong Guo, Quan Chen, Jiagan Cheng, Chuhao Xu, Deze Zeng, Zhuo Song, Tao Ma, Yong Yang, Chao Li, and Minyi Guo. 2022. Help Rather Than Recycle: Alleviating Cold Startup in Serverless Computing Through Inter-Function Container Sharing. In 2022 USENIX Annual Technical Conference (USENIX ATC 22). USENIX Association, Carlsbad, CA, 69--84. https:\/\/www.usenix.org\/conference\/atc22\/presentation\/li-zijun-help"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3508360"},{"key":"e_1_3_2_1_22_1","volume-title":"Agile Cold Starts for Scalable Serverless. In 11th USENIX Workshop on Hot Topics in Cloud Computing (HotCloud 19)","author":"Mohan Anup","year":"2019","unstructured":"Anup Mohan, Harshad Sane, Kshitij Doshi, Saikrishna Edupuganti, Naren Nayak, and Vadim Sukhomlinov. 2019. Agile Cold Starts for Scalable Serverless. In 11th USENIX Workshop on Hot Topics in Cloud Computing (HotCloud 19). USENIX Association, Renton, WA. https:\/\/www.usenix.org\/conference\/hotcloud19\/presentation\/mohan"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3318464.3389758"},{"key":"e_1_3_2_1_24_1","volume-title":"Retrieved","author":"NVIDIA.","year":"2023","unstructured":"NVIDIA. 2023. NVIDIA Container Toolkit. Retrieved March 20, 2023 from https:\/\/github.com\/NVIDIA\/nvidia-docker"},{"key":"e_1_3_2_1_25_1","volume-title":"Arpaci-Dusseau","author":"Oakes Edward","year":"2018","unstructured":"Edward Oakes, Leon Yang, Dennis Zhou, Kevin Houck, Tyler Harter, Andrea C. Arpaci-Dusseau, and Remzi H. Arpaci-Dusseau. 2018. SOCK: Rapid Task Provisioning with Serverless-Optimized Containers. In Proceedings of the 2018 USENIX Conference on Usenix Annual Technical Conference (Boston, MA, USA) (USENIX ATC '18). USENIX Association, USA, 57--69."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3318464.3380609"},{"key":"e_1_3_2_1_27_1","volume-title":"Retrieved","author":"Platform Google Cloud","year":"2021","unstructured":"Google Cloud Platform. 2021. Machine Learning on Google Cloud Platform. Retrieved March 20, 2023 from https:\/\/github.com\/GoogleCloudPlatform\/ml-on-gcp"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41404.2022.00045"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41404.2022.00045"},{"key":"e_1_3_2_1_30_1","volume-title":"Fast and Slow: Scalable Analytics on Serverless Infrastructure. In 16th USENIX Symposium on Networked Systems Design and Implementation (NSDI 19)","author":"Pu Qifan","year":"2019","unstructured":"Qifan Pu, Shivaram Venkataraman, and Ion Stoica. 2019. Shuffling, Fast and Slow: Scalable Analytics on Serverless Infrastructure. In 16th USENIX Symposium on Networked Systems Design and Implementation (NSDI 19). USENIX Association, Boston, MA, 193--206. https:\/\/www.usenix.org\/conference\/nsdi19\/presentation\/pu"},{"key":"e_1_3_2_1_31_1","volume-title":"Approximate graph edit distance computation by means of bipartite graph matching. Image and Vision computing 27, 7","author":"Riesen Kaspar","year":"2009","unstructured":"Kaspar Riesen and Horst Bunke. 2009. Approximate graph edit distance computation by means of bipartite graph matching. Image and Vision computing 27, 7 (2009), 950--959."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507750"},{"key":"e_1_3_2_1_33_1","volume-title":"Retrieved","author":"Services Amazon Web","year":"2022","unstructured":"Amazon Web Services. 2022. Machine learning inference at scale using AWS serverless. Retrieved March 20, 2023 from https:\/\/aws.amazon.com\/cn\/blogs\/machine-learning\/machine-learning-inference-at-scale-using-aws-serverless\/"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3510611"},{"key":"e_1_3_2_1_35_1","volume-title":"Proceedings of the 2020 USENIX Conference on Usenix Annual Technical Conference (USENIX ATC'20). USENIX Association, USA, Article 14","author":"Shahrad Mohammad","year":"2020","unstructured":"Mohammad Shahrad, Rodrigo Fonseca, \u00cd\u00f1igo Goiri, Gohar Chaudhry, Paul Batum, Jason Cooke, Eduardo Laureano, Colby Tresness, Mark Russinovich, and Ricardo Bianchini. 2020. Serverless in the Wild: Characterizing and Optimizing the Serverless Workload at a Large Cloud Provider. In Proceedings of the 2020 USENIX Conference on Usenix Annual Technical Conference (USENIX ATC'20). USENIX Association, USA, Article 14, 14 pages."},{"key":"e_1_3_2_1_36_1","unstructured":"Karen Simonyan and Andrew Zisserman. 2015. Very Deep Convolutional Networks for Large-Scale Image Recognition. arXiv:1409.1556 [cs.CV]"},{"key":"e_1_3_2_1_37_1","volume-title":"Retrieved","author":"S\u00e9mery Oleg","year":"2021","unstructured":"Oleg S\u00e9mery. 2021. Sandbox for training deep learning networks. Retrieved March 20, 2023 from https:\/\/github.com\/osmr\/imgclsmob"},{"key":"e_1_3_2_1_38_1","volume-title":"Retrieved","year":"2022","unstructured":"TensorFlow. 2022. Save and load Keras models. Retrieved March 20, 2023 from https:\/\/www.tensorflow.org\/guide\/keras\/save_and_serialize"},{"key":"e_1_3_2_1_39_1","volume-title":"\u0141 ukasz Kaiser, and Illia Polosukhin","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141 ukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems, I. Guyon, U. Von Luxburg, S. Bengio, H. Wallach, R. Fergus, S. Vishwanathan, and R. Garnett (Eds.), Vol. 30. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2017\/file\/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"},{"key":"e_1_3_2_1_40_1","volume-title":"Peeking Behind the Curtains of Serverless Platforms. In 2018 USENIX Annual Technical Conference (USENIX ATC 18)","author":"Wang Liang","year":"2018","unstructured":"Liang Wang, Mengyuan Li, Yinqian Zhang, Thomas Ristenpart, and Michael Swift. 2018. Peeking Behind the Curtains of Serverless Platforms. In 2018 USENIX Annual Technical Conference (USENIX ATC 18). USENIX Association, Boston, MA, 133--146. https:\/\/www.usenix.org\/conference\/atc18\/presentation\/wang-liang"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3514221.3517905"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507709"},{"key":"e_1_3_2_1_43_1","volume-title":"SLO-Aware Machine Learning Inference Serving. In 2019 USENIX Annual Technical Conference (USENIX ATC 19)","author":"Zhang Chengliang","year":"2019","unstructured":"Chengliang Zhang, Minchen Yu, Wei Wang, and Feng Yan. 2019. MArk: Exploiting Cloud Services for Cost-Effective, SLO-Aware Machine Learning Inference Serving. In 2019 USENIX Annual Technical Conference (USENIX ATC 19). USENIX Association, Renton, WA, 1049--1062. https:\/\/www.usenix.org\/conference\/atc19\/presentation\/zhang-chengliang"},{"key":"e_1_3_2_1_44_1","volume-title":"Rodrigo Fonseca, Sameh Elnikety, Christina Delimitrou, and Ricardo Bianchini.","author":"Zhang Yanqi","year":"2021","unstructured":"Yanqi Zhang, \u00cd\u00f1igo Goiri, Gohar Irfan Chaudhry, Rodrigo Fonseca, Sameh Elnikety, Christina Delimitrou, and Ricardo Bianchini. 2021. Faster and Cheaper Serverless Computing on Harvested Resources. In Proceedings of the International Symposium on Operating Systems Principles (SOSP). ACM. https:\/\/www.microsoft.com\/en-us\/research\/publication\/faster-and-cheaper-serverless-computing-on-harvested-resources\/"}],"event":{"name":"EuroSys '24: Nineteenth European Conference on Computer Systems","location":"Athens Greece","acronym":"EuroSys '24","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the Nineteenth European Conference on Computer Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627703.3629567","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627703.3629567","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T01:13:36Z","timestamp":1755825216000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627703.3629567"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,22]]},"references-count":44,"alternative-id":["10.1145\/3627703.3629567","10.1145\/3627703"],"URL":"https:\/\/doi.org\/10.1145\/3627703.3629567","relation":{},"subject":[],"published":{"date-parts":[[2024,4,22]]},"assertion":[{"value":"2024-04-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}