{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T01:53:13Z","timestamp":1773193993303,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":62,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,30]],"date-time":"2023-10-30T00:00:00Z","timestamp":1698624000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Key Research & Development (R&D) Plan","award":["2022YFB4501703"],"award-info":[{"award-number":["2022YFB4501703"]}]},{"name":"Major Key Project of PCL","award":["PCL2022A05"],"award-info":[{"award-number":["PCL2022A05"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,30]]},"DOI":"10.1145\/3620678.3624664","type":"proceedings-article","created":{"date-parts":[[2023,10,31]],"date-time":"2023-10-31T13:58:07Z","timestamp":1698760687000},"page":"324-340","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":35,"title":["AsyFunc"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8870-4309","authenticated-orcid":false,"given":"Qiangyu","family":"Pei","sequence":"first","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-1960-5662","authenticated-orcid":false,"given":"Yongjie","family":"Yuan","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7543-0180","authenticated-orcid":false,"given":"Haichuan","family":"Hu","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9606-4492","authenticated-orcid":false,"given":"Qiong","family":"Chen","sequence":"additional","affiliation":[{"name":"Huawei, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8570-1345","authenticated-orcid":false,"given":"Fangming","family":"Liu","sequence":"additional","affiliation":[{"name":"Peng Cheng Laboratory, Huazhong University of Science and Technology, Wuhan, China"}]}],"member":"320","published-online":{"date-parts":[[2023,10,31]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Proceedings of the 17th USENIX Symposium on Networked Systems Design and Implementation. 419--434","author":"Agache Alexandru","year":"2020","unstructured":"Alexandru Agache, Marc Brooker, Alexandra Iordache, Anthony Liguori, Rolf Neugebauer, Phil Piwonka, and Diana-Maria Popa. 2020. Firecracker: Lightweight virtualization for serverless applications. In Proceedings of the 17th USENIX Symposium on Networked Systems Design and Implementation. 419--434."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00073"},{"key":"e_1_3_2_1_3_1","unstructured":"Archive Team. [n. d.]. Archive Team: The Twitter Stream Grab. https:\/\/archive.org\/details\/twitterstream[Online Accessed 28-Sept-2023]."},{"key":"e_1_3_2_1_4_1","unstructured":"AWS. [n.d.]. Alexa skills. https:\/\/docs.aws.amazon.com\/wellarchitected\/latest\/serverless-applications-lens\/alexa-skills.html[Online Accessed 28-Sept-2023]."},{"key":"e_1_3_2_1_5_1","unstructured":"AWS. [n. d.]. Amazon SageMaker. https:\/\/aws.amazon.com\/sagemaker\/[Online Accessed 28-Sept-2023]."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472883.3486992"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/IC2E.2019.00-10"},{"key":"e_1_3_2_1_8_1","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, Sandhini Agarwal, Ariel Herbert-Voss, Gretchen Krueger, Tom Henighan, Rewon Child, Aditya Ramesh, Daniel Ziegler, Jeffrey Wu, Clemens Winter, Chris Hesse, Mark Chen, Eric Sigler, Mateusz Litwin, Scott Gray, Benjamin Chess, Jack Clark, Christopher Berner, Sam McCandlish, Alec Radford, Ilya Sutskever, and Dario Amodei. 2020. Language models are few-shot learners. Advances in Neural Information Processing Systems 33 (2020), 1877--1901.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/300979.300985"},{"key":"e_1_3_2_1_10_1","volume-title":"Proceedings of the 14th USENIX Symposium on Networked Systems Design and Implementation. 613--627","author":"Crankshaw Daniel","year":"2017","unstructured":"Daniel Crankshaw, Xin Wang, Guilio Zhou, Michael J Franklin, Joseph E Gonzalez, and Ion Stoica. 2017. Clipper: A Low-Latency Online Prediction Serving System. In Proceedings of the 14th USENIX Symposium on Networked Systems Design and Implementation. 613--627."},{"key":"e_1_3_2_1_11_1","volume-title":"DVABatch: Diversity-aware Multi-Entry Multi-Exit Batching for Efficient Processing of DNN Services on GPUs. In 2022 USENIX Annual Technical Conference. 183--198","author":"Cui Weihao","year":"2022","unstructured":"Weihao Cui, Han Zhao, Quan Chen, Hao Wei, Zirui Li, Deze Zeng, Chao Li, and Minyi Guo. 2022. DVABatch: Diversity-aware Multi-Entry Multi-Exit Batching for Efficient Processing of DNN Services on GPUs. In 2022 USENIX Annual Technical Conference. 183--198."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3423211.3425690"},{"key":"e_1_3_2_1_13_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421284"},{"key":"e_1_3_2_1_15_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378512"},{"key":"e_1_3_2_1_17_1","unstructured":"Anirudh Garg. [n. d.]. Why use Azure Functions for ML inference? https:\/\/techcommunity.microsoft.com\/t5\/apps-on-azure-blog\/why-use-azure-functions-for-ml-inference\/ba-p\/1416728."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3135974.3135993"},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings of the 14th USENIX Symposium on Operating Systems Design and Implementation. 443--462","author":"Gujarati Arpan","year":"2020","unstructured":"Arpan Gujarati, Reza Karimi, Safya Alzayat, Wei Hao, Antoine Kaufmann, Ymir Vigfusson, and Jonathan Mace. 2020. Serving DNNs like Clockwork: Performance Predictability from the Bottom Up. In Proceedings of the 14th USENIX Symposium on Operating Systems Design and Implementation. 443--462."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2749472"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_22_1","unstructured":"Knative. [n. d.]. Knative is an Open-Source Enterprise-level solution to build Serverless and Event Driven Applications. https:\/\/knative.dev\/docs\/[Online Accessed 28-Sept-2023]."},{"key":"e_1_3_2_1_23_1","unstructured":"Oleksiy Kovyrin. [n. d.]. Make Data Useful by Greg Linden. https:\/\/www.scribd.com\/doc\/4970486\/[Online Accessed 28-Sept-2023]."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3342195.3387547"},{"key":"e_1_3_2_1_25_1","volume-title":"2022 USENIX Annual Technical Conference.","author":"Li Jie","year":"2022","unstructured":"Jie Li, Laiping Zhao, Yanan Yang, Kunlin Zhan, and Keqiu Li. 2022. Tetris: Memory-efficient Serverless Inference through Tensor Sharing. In 2022 USENIX Annual Technical Conference."},{"key":"e_1_3_2_1_26_1","volume-title":"RunD: A Lightweight Secure Container Runtime for High-density Deployment and High-concurrency Startup in Serverless Computing. In 2022 USENIX Annual Technical Conference. 53--68","author":"Li Zijun","year":"2022","unstructured":"Zijun Li, Jiagan Cheng, Quan Chen, Eryu Guan, Zizheng Bian, Yi Tao, Bin Zha, Qiang Wang, Weidong Han, and Minyi Guo. 2022. RunD: A Lightweight Secure Container Runtime for High-density Deployment and High-concurrency Startup in Serverless Computing. In 2022 USENIX Annual Technical Conference. 53--68."},{"key":"e_1_3_2_1_27_1","volume-title":"Proceedings of the 17th USENIX Symposium on Operating Systems Design and Implementation. 663--679","author":"Li Zhuohan","year":"2023","unstructured":"Zhuohan Li, Lianmin Zheng, Yinmin Zhong, Vincent Liu, Ying Sheng, Xin Jin, Yanping Huang, Zhifeng Chen, Hao Zhang, Joseph E Gonzalez, and Ion Stoica. 2023. AlpaServe: Statistical Multiplexing with Model Parallelism for Deep Learning Serving. In Proceedings of the 17th USENIX Symposium on Operating Systems Design and Implementation. 663--679."},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of the International Conference on Cloud Computing. Springer, 45--60","author":"Lin Yanying","year":"2021","unstructured":"Yanying Lin, Kejiang Ye, Yongkang Li, Peng Lin, Yingfei Tang, and Chengzhong Xu. 2021. BBServerless: A Bursty Traffic Benchmark for Serverless. In Proceedings of the International Conference on Cloud Computing. Springer, 45--60."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507752"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.5555\/1103050.1103058"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2020.2974843"},{"key":"e_1_3_2_1_33_1","volume-title":"Docker: Lightweight Linux Containers for Consistent Development and Deployment. Linux J.","author":"Merkel Dirk","year":"2014","unstructured":"Dirk Merkel. 2014. Docker: Lightweight Linux Containers for Consistent Development and Deployment. Linux J. 2014, 239, Article 2 (mar 2014)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366623.3368133"},{"key":"e_1_3_2_1_35_1","volume-title":"2018 USENIX Annual Technical Conference. 57--70","author":"Oakes Edward","year":"2018","unstructured":"Edward Oakes, Leon Yang, Dennis Zhou, Kevin Houck, Tyler Harter, Andrea Arpaci-Dusseau, and Remzi Arpaci-Dusseau. 2018. SOCK: Rapid task provisioning with Serverless-Optimized containers. In 2018 USENIX Annual Technical Conference. 57--70."},{"key":"e_1_3_2_1_36_1","unstructured":"OpenAI. [n. d.]. Introducing ChatGPT. https:\/\/openai.com\/blog\/chatgpt[Online Accessed 28-Sept-2023]."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM48880.2022.9796705"},{"key":"e_1_3_2_1_38_1","volume-title":"2020 USENIX Annual Technical Conference. 813--827","author":"Papagiannis Anastasios","year":"2020","unstructured":"Anastasios Papagiannis, Giorgos Xanthakis, Giorgos Saloustros, Manolis Marazakis, and Angelos Bilas. 2020. Optimizing Memory-mapped I\/O for Fast Storage Devices. In 2020 USENIX Annual Technical Conference. 813--827."},{"key":"e_1_3_2_1_39_1","volume-title":"PyTorch: An Imperative Style","author":"Paszke Adam","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. Advances in Neural Information Processing Systems 32."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507713"},{"key":"e_1_3_2_1_41_1","unstructured":"Pytorch. [n. d.]. Saving and Loading Models. https:\/\/pytorch.org\/tutorials\/beginner\/saving_loading_models.html[Online Accessed 28-Sept-2023]."},{"key":"e_1_3_2_1_42_1","volume-title":"Proceedings of the 15th USENIX Symposium on Operating Systems Design and Implementation.","author":"Qiao Aurick","year":"2021","unstructured":"Aurick Qiao, Sang Keun Choe, Suhas Jayaram Subramanya, Willie Neiswanger, Qirong Ho, Hao Zhang, Gregory R Ganger, and Eric P Xing. 2021. Pollux: Co-adaptive cluster scheduling for goodput-optimized deep learning. In Proceedings of the 15th USENIX Symposium on Operating Systems Design and Implementation."},{"key":"e_1_3_2_1_43_1","volume-title":"Real-Time Flying Object Detection with YOLOv8. arXiv preprint arXiv:2305.09972","author":"Reis Dillon","year":"2023","unstructured":"Dillon Reis, Jordan Kupec, Jacqueline Hong, and Ahmad Daoudi. 2023. Real-Time Flying Object Detection with YOLOv8. arXiv preprint arXiv:2305.09972 (2023)."},{"key":"e_1_3_2_1_44_1","unstructured":"Mariliis Retter. [n. d.]. Serverless Case Study -- Netflix. https:\/\/dashbird.io\/blog\/serverless-case-study-netflix\/[Online Accessed 28-Sept-2023]."},{"key":"e_1_3_2_1_45_1","volume-title":"INFaaS: Automated Model-less Inference Serving. In 2021 USENIX Annual Technical Conference. 397--411","author":"Romero Francisco","year":"2021","unstructured":"Francisco Romero, Qian Li, Neeraja J Yadwadkar, and Christos Kozyrakis. 2021. INFaaS: Automated Model-less Inference Serving. In 2021 USENIX Annual Technical Conference. 397--411."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507750"},{"key":"e_1_3_2_1_47_1","volume-title":"2020 USENIX Annual Technical Conference. 205--218","author":"Shahrad Mohammad","year":"2020","unstructured":"Mohammad Shahrad, Rodrigo Fonseca, \u00cd\u00f1igo Goiri, Gohar Chaudhry, Paul Batum, Jason Cooke, Eduardo Laureano, Colby Tresness, Mark Russinovich, and Ricardo Bianchini. 2020. Serverless in the wild: Characterizing and optimizing the serverless workload at a large cloud provider. In 2020 USENIX Annual Technical Conference. 205--218."},{"key":"e_1_3_2_1_48_1","volume-title":"2020 USENIX Annual Technical Conference. 419--433","author":"Shillaker Simon","year":"2020","unstructured":"Simon Shillaker and Peter Pietzuch. 2020. Faasm: Lightweight isolation for efficient stateful serverless computing. In 2020 USENIX Annual Technical Conference. 419--433."},{"key":"e_1_3_2_1_49_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_50_1","volume-title":"Proceedings of the autumn 1990 EUUG Conference. 241--248","author":"Snyder Peter","year":"1990","unstructured":"Peter Snyder. 1990. tmpfs: A virtual memory file system. In Proceedings of the autumn 1990 EUUG Conference. 241--248."},{"key":"e_1_3_2_1_51_1","first-page":"2818","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, title=Rethinking the Inception Architecture for Computer Vision, year=2016","author":"Szegedy Christian","unstructured":"Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jon Shlens, and Zbigniew Wojna. [n. d.]. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, title=Rethinking the Inception Architecture for Computer Vision, year=2016, pages=2818-2826."},{"key":"e_1_3_2_1_52_1","volume-title":"Proceedings of the International Conference on Machine Learning. PMLR, 6105--6114","author":"Tan Mingxing","year":"2019","unstructured":"Mingxing Tan and Quoc Le. 2019. Efficientnet: Rethinking model scaling for convolutional neural networks. In Proceedings of the International Conference on Machine Learning. PMLR, 6105--6114."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3410279"},{"key":"e_1_3_2_1_54_1","volume-title":"Proceedings of the Fourteenth EuroSys Conference. 1--16","author":"Amy Wang Kai-Ting","year":"2019","unstructured":"Kai-Ting Amy Wang, Rayson Ho, and Peng Wu. 2019. Replayable execution optimized for page sharing for a managed runtime environment. In Proceedings of the Fourteenth EuroSys Conference. 1--16."},{"key":"e_1_3_2_1_55_1","volume-title":"Proceedings of the 19th USENIX Symposium on Networked Systems Design and Implementation. 945--960","author":"Weng Qizhen","year":"2022","unstructured":"Qizhen Weng, Wencong Xiao, Yinghao Yu, Wei Wang, Cheng Wang, Jian He, Yong Li, Liping Zhang, Wei Lin, and Yu Ding. 2022. MLaaS in the Wild: Workload Analysis and Scheduling in Large-Scale Heterogeneous GPU Clusters. In Proceedings of the 19th USENIX Symposium on Networked Systems Design and Implementation. 945--960."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2022.3195664"},{"key":"e_1_3_2_1_57_1","volume-title":"Guoyu Hu, Meihui Zhang, Yeow Meng Chee, and Beng Chin Ooi.","author":"Wu Yuncheng","year":"2021","unstructured":"Yuncheng Wu, Tien Tuan Anh Dinh, Guoyu Hu, Meihui Zhang, Yeow Meng Chee, and Beng Chin Ooi. 2021. Serverless Data Science-Are We There Yet? A Case Study of Model Serving. arXiv e-prints (2021), arXiv--2103."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTSS46320.2019.00042"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3487552.3487815"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507709"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS51616.2021.00022"},{"key":"e_1_3_2_1_62_1","volume-title":"SLO-Aware Machine Learning Inference Serving. In 2019 USENIX Annual Technical Conference. 1049--1062","author":"Zhang Chengliang","year":"2019","unstructured":"Chengliang Zhang, Minchen Yu, Wei Wang, and Feng Yan. 2019. MArk: Exploiting Cloud Services for Cost-Effective, SLO-Aware Machine Learning Inference Serving. In 2019 USENIX Annual Technical Conference. 1049--1062."}],"event":{"name":"SoCC '23: ACM Symposium on Cloud Computing","location":"Santa Cruz CA USA","acronym":"SoCC '23","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the 2023 ACM Symposium on Cloud Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620678.3624664","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3620678.3624664","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T15:55:26Z","timestamp":1755878126000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620678.3624664"}},"subtitle":["A High-Performance and Resource-Efficient Serverless Inference System via Asymmetric Functions"],"short-title":[],"issued":{"date-parts":[[2023,10,30]]},"references-count":62,"alternative-id":["10.1145\/3620678.3624664","10.1145\/3620678"],"URL":"https:\/\/doi.org\/10.1145\/3620678.3624664","relation":{},"subject":[],"published":{"date-parts":[[2023,10,30]]},"assertion":[{"value":"2023-10-31","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}