{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,3]],"date-time":"2026-07-03T07:58:14Z","timestamp":1783065494981,"version":"3.54.6"},"publisher-location":"New York, NY, USA","reference-count":104,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T00:00:00Z","timestamp":1733097600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["1931531"],"award-info":[{"award-number":["1931531"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2149389"],"award-info":[{"award-number":["2149389"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2122155"],"award-info":[{"award-number":["2122155"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,12,2]]},"DOI":"10.1145\/3652892.3700760","type":"proceedings-article","created":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T19:36:13Z","timestamp":1732736173000},"page":"211-224","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["Towards SLO-Compliant and Cost-Effective Serverless Computing on Emerging GPU Architectures"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6326-0665","authenticated-orcid":false,"given":"Vivek M.","family":"Bhasi","sequence":"first","affiliation":[{"name":"The Pennsylvania State University, University Park, University Park, PA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5010-2641","authenticated-orcid":false,"given":"Aakash","family":"Sharma","sequence":"additional","affiliation":[{"name":"The Pennsylvania State University, University Park, University Park, PA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-4017-2093","authenticated-orcid":false,"given":"Rishabh","family":"Jain","sequence":"additional","affiliation":[{"name":"The Pennsylvania State University, University Park, University Park, PA, United States"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9607-0131","authenticated-orcid":false,"given":"Jashwant Raj","family":"Gunasekaran","sequence":"additional","affiliation":[{"name":"Adobe Research, Bangalore, Karnataka, India"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0367-5989","authenticated-orcid":false,"given":"Ashutosh","family":"Pattnaik","sequence":"additional","affiliation":[{"name":"Arm, Austin, TX, United States"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9940-9951","authenticated-orcid":false,"given":"Mahmut Taylan","family":"Kandemir","sequence":"additional","affiliation":[{"name":"The Pennsylvania State University, University Park, University Park, PA, United States"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4746-7578","authenticated-orcid":false,"given":"Chita","family":"Das","sequence":"additional","affiliation":[{"name":"The Pennsylvania State University, University Park, University Park, PA, United States"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,12,2]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2020. Establishing Effective SLOs. https:\/\/www.datadoghq.com\/blog\/establishing-service-level-objectives\/."},{"key":"e_1_3_2_1_2_1","unstructured":"2020. Twitter Stream traces. https:\/\/archive.org\/details\/twitterstream. Accessed: 2020-05-07."},{"key":"e_1_3_2_1_3_1","unstructured":"2021. AWS Lambda Cold Starts. https:\/\/mikhail.io\/serverless\/coldstarts\/aws\/."},{"key":"e_1_3_2_1_4_1","unstructured":"2021. Azure Functions Cold Starts. https:\/\/mikhail.io\/serverless\/coldstarts\/azure\/."},{"key":"e_1_3_2_1_5_1","unstructured":"2022. The State of Serverless. https:\/\/www.datadoghq.com\/state-of-serverless\/."},{"key":"e_1_3_2_1_6_1","unstructured":"2024. AWS Lambda. https:\/\/aws.amazon.com\/lambda\/."},{"key":"e_1_3_2_1_7_1","unstructured":"2024. AWS Spot Instances. https:\/\/docs.aws.amazon.com\/AWSEC2\/latest\/UserGuide\/using-spot-instances.html."},{"key":"e_1_3_2_1_8_1","unstructured":"2024. Azure Spot Virtual Machines. https:\/\/learn.microsoft.com\/en-us\/azure\/virtual-machines\/spot-vms."},{"key":"e_1_3_2_1_9_1","unstructured":"2024. AzureML Inference Router. https:\/\/learn.microsoft.com\/en-us\/azure\/machine-learning\/how-to-kubernetes-inference-routing-azureml-fe."},{"key":"e_1_3_2_1_10_1","unstructured":"2024. Banana. https:\/\/docs.banana.dev\/banana-docs\/."},{"key":"e_1_3_2_1_11_1","unstructured":"2024. Banana Latency Guarantees. https:\/\/www.banana.dev\/."},{"key":"e_1_3_2_1_12_1","unstructured":"2024. Beam. https:\/\/www.beam.cloud\/."},{"key":"e_1_3_2_1_13_1","unstructured":"2024. Cerebrium. https:\/\/docs.cerebrium.ai\/introduction."},{"key":"e_1_3_2_1_14_1","unstructured":"2024. CUDA Concurrency Mechanisms. https:\/\/docs.nvidia.com\/datacenter\/tesla\/mig-user-guide\/#cuda-concurrency."},{"key":"e_1_3_2_1_15_1","unstructured":"2024. Docker Swarm. https:\/\/docs.docker.com\/engine\/swarm\/."},{"key":"e_1_3_2_1_16_1","unstructured":"2024. Google Cloud Functions. https:\/\/cloud.google.com\/functions."},{"key":"e_1_3_2_1_17_1","unstructured":"2024. Google Spot VMs. https:\/\/cloud.google.com\/compute\/docs\/instances\/preemptible."},{"key":"e_1_3_2_1_18_1","unstructured":"2024. GPUs vs CPUs for deployment of deep learning models. https:\/\/azure.microsoft.com\/en-us\/blog\/gpus-vs-cpus-for-deployment-of-deep-learning-models\/."},{"key":"e_1_3_2_1_19_1","unstructured":"2024. Microsoft Azure Serverless Functions. https:\/\/azure.microsoft.com\/en-us\/services\/functions\/."},{"key":"e_1_3_2_1_20_1","unstructured":"2024. NVIDIA A100 Tensor Core GPU. https:\/\/www.nvidia.com\/en-us\/data-center\/a100\/."},{"key":"e_1_3_2_1_21_1","unstructured":"2024. NVIDIA-Docker. https:\/\/github.com\/NVIDIA\/nvidia-docker."},{"key":"e_1_3_2_1_22_1","unstructured":"2024. NVIDIA Grace Hopper Superchip Architecture In-Depth. https:\/\/developer.nvidia.com\/blog\/nvidia-grace-hopper-superchip-architecture-in-depth\/."},{"key":"e_1_3_2_1_23_1","unstructured":"2024. NVIDIA H100 Tensor Core GPU. https:\/\/www.nvidia.com\/en-us\/data-center\/technologies\/hopper-architecture\/."},{"key":"e_1_3_2_1_24_1","unstructured":"2024. NVIDIA Multi-Instance GPU. https:\/\/docs.nvidia.com\/datacenter\/tesla\/mig-user-guide\/index.html."},{"key":"e_1_3_2_1_25_1","unstructured":"2024. NVIDIA Multi-Process Service. https:\/\/docs.nvidia.com\/deploy\/mps\/index.html."},{"key":"e_1_3_2_1_26_1","unstructured":"2024. NVIDIA-smi. https:\/\/developer.nvidia.com\/nvidia-system-management-interface."},{"key":"e_1_3_2_1_27_1","unstructured":"2024. PCIe Special Interest Group PCIe 6 Specification. https:\/\/pcisig.com\/pci-express-6.0-specification."},{"key":"e_1_3_2_1_28_1","unstructured":"2024. Replicate. https:\/\/replicate.com\/docs."},{"key":"e_1_3_2_1_29_1","unstructured":"2024. Runpod. https:\/\/www.runpod.io\/serverless-gpu."},{"key":"e_1_3_2_1_30_1","unstructured":"2024. Serverless Application Lens: Alexa Skills. https:\/\/docs.aws.amazon.com\/wellarchitected\/latest\/serverless-applications-lens\/alexa-skills.html."},{"key":"e_1_3_2_1_31_1","unstructured":"2024. Serverless Facebook Messenger Bot. https:\/\/github.com\/pmuens\/serverless-facebook-messenger-bot."},{"key":"e_1_3_2_1_32_1","unstructured":"2024. Serverless Optical Character Recognition (OCR) Tutorial. https:\/\/cloud.google.com\/functions\/docs\/tutorials\/ocr."},{"key":"e_1_3_2_1_33_1","unstructured":"2024. trainML. https:\/\/docs.trainml.ai\/."},{"key":"e_1_3_2_1_34_1","volume-title":"13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Berger Daniel S.","year":"2018","unstructured":"Daniel S. Berger, Benjamin Berg, Timothy Zhu, Siddhartha Sen, and Mor Harchol-Balter. 2018. RobinHood: Tail Latency Aware Caching - Dynamic Reallocation from Cache-Rich to Cache-Poor. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18). USENIX Association, Carlsbad, CA, 195--212. https:\/\/www.usenix.org\/conference\/osdi18\/presentation\/berger"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3542929.3563464"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472883.3486992"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS57955.2024.00018"},{"key":"e_1_3_2_1_38_1","volume-title":"Firecracker: Lightweight Virtualization for Serverless Applications. In NSDI.","author":"Brooker Marc","year":"2020","unstructured":"Marc Brooker, Andreea Florescu, Diana-Maria Popa, Rolf Neugebauer, Alexandru Agache, Alexandra Iordache, Anthony Liguori, and Phil Piwonka. 2020. Firecracker: Lightweight Virtualization for Serverless Applications. In NSDI."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783710"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3093337.3037700"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","unstructured":"Yunpeng Chen Jianan Li Huaxin Xiao Xiaojie Jin Shuicheng Yan and Jiashi Feng. 2017. Dual Path Networks. 10.48550\/ARXIV.1707.01629","DOI":"10.48550\/ARXIV.1707.01629"},{"key":"e_1_3_2_1_42_1","volume-title":"Serving Heterogeneous Machine Learning Models on Multi-GPU Servers with Spatio-Temporal Sharing. In 2022 USENIX Annual Technical Conference (USENIX ATC 22)","author":"Choi Seungbeom","year":"2022","unstructured":"Seungbeom Choi, Sunho Lee, Yeonjae Kim, Jongse Park, Youngjin Kwon, and Jaehyuk Huh. 2022. Serving Heterogeneous Machine Learning Models on Multi-GPU Servers with Spatio-Temporal Sharing. In 2022 USENIX Annual Technical Conference (USENIX ATC 22). USENIX Association, Carlsbad, CA, 199--216. https:\/\/www.usenix.org\/conference\/atc22\/presentation\/choi-seungbeom"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10071121"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2006.03236"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/2408776.2408794"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/1294261.1294281"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1810.04805"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507732"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/PMBS56514.2022.00007"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS53621.2022.00077"},{"key":"e_1_3_2_1_51_1","volume-title":"2019 USENIX Annual Technical Conference (USENIX ATC 19)","author":"Fouladi Sadjad","year":"2019","unstructured":"Sadjad Fouladi, Francisco Romero, Dan Iter, Qian Li, Shuvo Chatterjee, Christos Kozyrakis, Matei Zaharia, and Keith Winstein. 2019. From Laptop to Lambda: Outsourcing Everyday Jobs to Thousands of Transient Functional Containers. In 2019 USENIX Annual Technical Conference (USENIX ATC 19). USENIX Association, Renton, WA, 475--488. http:\/\/www.usenix.org\/conference\/atc19\/presentation\/fouladi"},{"key":"e_1_3_2_1_52_1","volume-title":"Fast and Slow: Low-Latency Video Processing Using Thousands of Tiny Threads. In 14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17)","author":"Fouladi Sadjad","year":"2017","unstructured":"Sadjad Fouladi, Riad S. Wahby, Brennan Shacklett, Karthikeyan Vasuki Balasubramaniam, William Zeng, Rahul Bhalerao, Anirudh Sivaraman, George Porter, and Keith Winstein. 2017. Encoding, Fast and Slow: Low-Latency Video Processing Using Thousands of Tiny Threads. In 14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17). USENIX Association, Boston, MA, 363--376. https:\/\/www.usenix.org\/conference\/nsdi17\/technical-sessions\/presentation\/fouladi"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575721"},{"key":"e_1_3_2_1_54_1","volume-title":"USENIX Middleware Conference.","author":"Gujarati Arpan","unstructured":"Arpan Gujarati, Sameh Elnikety, Yuxiong He, Kathryn S. McKinley, and Bj\u00f6rn B. Brandenburg. 2017. Swayam: Distributed Autoscaling to Meet SLAs of Machine Learning Inference Services with Resource Efficiency. In USENIX Middleware Conference."},{"key":"e_1_3_2_1_55_1","volume-title":"Cocktail: A Multidimensional Optimization for Model Serving in Cloud. In 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22)","author":"Gunasekaran Jashwant Raj","unstructured":"Jashwant Raj Gunasekaran, Cyan Subhra Mishra, Prashanth Thinakaran, Bikash Sharma, Mahmut Taylan Kandemir, and Chita R. Das. 2022. Cocktail: A Multidimensional Optimization for Model Serving in Cloud. In 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22). USENIX Association, Renton, WA, 1041--1057. https:\/\/www.usenix.org\/conference\/nsdi22\/presentation\/gunasekaran"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00084"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2015. Deep Residual Learning for Image Recognition. 10.48550\/ARXIV.1512.03385","DOI":"10.48550\/ARXIV.1512.03385"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","unstructured":"Pengcheng He Xiaodong Liu Jianfeng Gao and Weizhu Chen. 2020. DeBERTa: Decoding-enhanced BERT with Disentangled Attention. 10.48550\/ARXIV.2006.03654","DOI":"10.48550\/ARXIV.2006.03654"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","unstructured":"Andrew G. Howard Menglong Zhu Bo Chen Dmitry Kalenichenko Weijun Wang Tobias Weyand Marco Andreetto and Hartwig Adam. 2017. MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications. 10.48550\/ARXIV.1704.04861","DOI":"10.48550\/ARXIV.1704.04861"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","unstructured":"Jie Hu Li Shen Samuel Albanie Gang Sun and Enhua Wu. 2017. Squeeze-and-Excitation Networks. 10.48550\/ARXIV.1709.01507","DOI":"10.48550\/ARXIV.1709.01507"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1608.06993"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2006.11316"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3542929.3563499"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589112"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"crossref","unstructured":"Ram Srivatsa Kannan Lavanya Subramanian Ashwin Raju Jeongseob Ahn Jason Mars and Lingjia Tang. 2019. GrandSLAm: Guaranteeing SLAs for Jobs in Microservices Execution Frameworks. In EuroSys.","DOI":"10.1145\/3302424.3303958"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA53966.2022.00019"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/PDP2018.2018.00090"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-662-21708-5_18"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1909.11942"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","unstructured":"Hang Le Lo\u00efc Vial Jibril Frej Vincent Segonne Maximin Coavoux Benjamin Lecouteux Alexandre Allauzen Beno\u00eet Crabb\u00e9 Laurent Besacier and Didier Schwab. 2019. FlauBERT: Unsupervised Language Model Pre-training for French. 10.48550\/ARXIV.1912.05372","DOI":"10.48550\/ARXIV.1912.05372"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1145\/3542929.3563510"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","unstructured":"Yinhan Liu Myle Ott Naman Goyal Jingfei Du Mandar Joshi Danqi Chen Omer Levy Mike Lewis Luke Zettlemoyer and Veselin Stoyanov. 2019. RoBERTa: A Robustly Optimized BERT Pretraining Approach. 10.48550\/ARXIV.1907.11692","DOI":"10.48550\/ARXIV.1907.11692"},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","unstructured":"Ningning Ma Xiangyu Zhang Hai-Tao Zheng and Jian Sun. 2018. ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design. 10.48550\/ARXIV.1807.11164","DOI":"10.48550\/ARXIV.1807.11164"},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.5555\/2002472.2002491"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2020.01.004"},{"key":"e_1_3_2_1_76_1","volume-title":"Analysis and Exploitation of Dynamic Pricing in the Public Cloud for ML Training. VLDB DISPA Workshop 2020 ([n. d.]). https:\/\/par.nsf.gov\/biblio\/10213411","author":"Narayanan Deepak","unstructured":"Deepak Narayanan, Keshav Santhanam, Fiodar Kazhamiaka, Amar Phanishayee,, and Matei Zaharia. [n.d.]. Analysis and Exploitation of Dynamic Pricing in the Public Cloud for ML Training. VLDB DISPA Workshop 2020 ([n. d.]). https:\/\/par.nsf.gov\/biblio\/10213411"},{"key":"e_1_3_2_1_77_1","volume-title":"Garnett (Eds.)","volume":"32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In Advances in Neural Information Processing Systems, H. Wallach, H. Larochelle, A. Beygelzimer, F. d'Alch\u00e9-Buc, E. Fox, and R. Garnett (Eds.), Vol. 32. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper\/2019\/file\/bdbca288fee7f92f2bfa9f7012727740-Paper.pdf"},{"key":"e_1_3_2_1_78_1","unstructured":"Alec Radford Karthik Narasimhan Tim Salimans Ilya Sutskever et al. 2018. Improving language understanding by generative pre-training. (2018)."},{"key":"e_1_3_2_1_79_1","unstructured":"Alec Radford Jeffrey Wu Rewon Child David Luan Dario Amodei Ilya Sutskever et al. 2019. Language models are unsupervised multitask learners. OpenAI blog 1 8 (2019) 9."},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00045"},{"key":"e_1_3_2_1_81_1","doi-asserted-by":"publisher","DOI":"10.3390\/app11041438"},{"key":"e_1_3_2_1_82_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472883.3486972"},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_1_84_1","doi-asserted-by":"publisher","unstructured":"Mark Sandler Andrew Howard Menglong Zhu Andrey Zhmoginov and Liang-Chieh Chen. 2018. MobileNetV2: Inverted Residuals and Linear Bottlenecks. (2018). 10.48550\/ARXIV.1801.04381","DOI":"10.48550\/ARXIV.1801.04381"},{"key":"e_1_3_2_1_85_1","doi-asserted-by":"publisher","unstructured":"Victor Sanh Lysandre Debut Julien Chaumond and Thomas Wolf. 2019. DistilBERT a distilled version of BERT: smaller faster cheaper and lighter. 10.48550\/ARXIV.1910.01108","DOI":"10.48550\/ARXIV.1910.01108"},{"key":"e_1_3_2_1_86_1","doi-asserted-by":"publisher","DOI":"10.1145\/3452413.3464785"},{"key":"e_1_3_2_1_87_1","doi-asserted-by":"publisher","DOI":"10.1145\/3406011"},{"key":"e_1_3_2_1_88_1","volume-title":"Subrata Mitra, Mahmut Taylan Kandemir, George Kesidis, and Chita R. Das.","author":"Sharma Aakash","year":"2022","unstructured":"Aakash Sharma, Vivek M. Bhasi, Sonali Singh, Rishabh Jain, Jashwant Raj Gunasekaran, Subrata Mitra, Mahmut Taylan Kandemir, George Kesidis, and Chita R. Das. 2022. Analysis of Distributed Deep Learning in the Cloud. arXiv:2208.14344 [cs.LG] https:\/\/arxiv.org\/abs\/2208.14344"},{"key":"e_1_3_2_1_89_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS57875.2023.00023"},{"key":"e_1_3_2_1_90_1","volume-title":"Das","author":"Sharma Aakash","year":"2024","unstructured":"Aakash Sharma, Vivek M. Bhasi, Sonali Singh, George Kesidis, Mahmut T. Kandemir, and Chita R. Das. 2024. GPU Cluster Scheduling for Network-Sensitive Deep Learning. arXiv:2401.16492 [cs.PF] https:\/\/arxiv.org\/abs\/2401.16492"},{"key":"e_1_3_2_1_91_1","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359658"},{"key":"e_1_3_2_1_92_1","doi-asserted-by":"publisher","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very Deep Convolutional Networks for Large-Scale Image Recognition. 10.48550\/ARXIV.1409.1556","DOI":"10.48550\/ARXIV.1409.1556"},{"key":"e_1_3_2_1_93_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00039"},{"key":"e_1_3_2_1_94_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISLPED52811.2021.9502506"},{"key":"e_1_3_2_1_95_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472883.3486981"},{"key":"e_1_3_2_1_96_1","doi-asserted-by":"publisher","unstructured":"Christian Szegedy Wei Liu Yangqing Jia Pierre Sermanet Scott Reed Dragomir Anguelov Dumitru Erhan Vincent Vanhoucke and Andrew Rabinovich. 2014. Going Deeper with Convolutions. 10.48550\/ARXIV.1409.4842","DOI":"10.48550\/ARXIV.1409.4842"},{"key":"e_1_3_2_1_97_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1905.11946"},{"key":"e_1_3_2_1_98_1","volume-title":"Wikipedia workload analysis for decentralized hosting. Computer Networks","author":"Urdaneta Guido","year":"2009","unstructured":"Guido Urdaneta, Guillaume Pierre, and Maarten Van Steen. 2009. Wikipedia workload analysis for decentralized hosting. Computer Networks (2009)."},{"key":"e_1_3_2_1_99_1","unstructured":"Liang Wang Mengyuan Li Yinqian Zhang Thomas Ristenpart and Michael Swift. 2018. Peeking Behind the Curtains of Serverless Platforms. In ATC."},{"key":"e_1_3_2_1_100_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507709"},{"key":"e_1_3_2_1_101_1","volume-title":"A survey of multi-tenant deep learning inference on GPU. arXiv preprint arXiv:2203.09040","author":"Yu Fuxun","year":"2022","unstructured":"Fuxun Yu, Di Wang, Longfei Shangguan, Minjia Zhang, Chenchen Liu, and Xiang Chen. 2022. A survey of multi-tenant deep learning inference on GPU. arXiv preprint arXiv:2203.09040 (2022)."},{"key":"e_1_3_2_1_102_1","doi-asserted-by":"publisher","unstructured":"Fisher Yu Dequan Wang Evan Shelhamer and Trevor Darrell. 2017. Deep Layer Aggregation. 10.48550\/ARXIV.1707.06484","DOI":"10.48550\/ARXIV.1707.06484"},{"key":"e_1_3_2_1_103_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477132.3483580"},{"key":"e_1_3_2_1_104_1","doi-asserted-by":"publisher","DOI":"10.1145\/3567955.3567960"}],"event":{"name":"Middleware '24: 25th International Middleware Conference","location":"Hong Kong Hong Kong","acronym":"Middleware '24","sponsor":["IFIP","Usenix"]},"container-title":["Proceedings of the 25th International Middleware Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3652892.3700760","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3652892.3700760","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3652892.3700760","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T22:53:57Z","timestamp":1750287237000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3652892.3700760"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,2]]},"references-count":104,"alternative-id":["10.1145\/3652892.3700760","10.1145\/3652892"],"URL":"https:\/\/doi.org\/10.1145\/3652892.3700760","relation":{},"subject":[],"published":{"date-parts":[[2024,12,2]]},"assertion":[{"value":"2024-12-02","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}