{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,19]],"date-time":"2026-05-19T07:11:58Z","timestamp":1779174718841,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":84,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T00:00:00Z","timestamp":1733097600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,12,2]]},"DOI":"10.1145\/3652892.3700768","type":"proceedings-article","created":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T19:36:13Z","timestamp":1732736173000},"page":"313-326","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["Guardian: Safe GPU Sharing in Multi-Tenant Environments"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8341-3083","authenticated-orcid":false,"given":"Manos","family":"Pavlidakis","sequence":"first","affiliation":[{"name":"CARV, Foundation for Research and Technology - Hellas (FORTH), Institute of Computer Science (ICS, Heraklion, GR"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5470-4714","authenticated-orcid":false,"given":"Giorgos","family":"Vasiliadis","sequence":"additional","affiliation":[{"name":"Department of Management Science and Technology, Hellenic Mediterranean University, Heraklion, GR"},{"name":"Foundation for Research and Technology - Hellas (FORTH), Institute of Computer Science (ICS), Heraklion, Greece"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9857-9185","authenticated-orcid":false,"given":"Stelios","family":"Mavridis","sequence":"additional","affiliation":[{"name":"Foundation for Research and Technology - Hellas (FORTH), Institute of Computer Science (ICS), Heraklion, Greece"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-5129-6684","authenticated-orcid":false,"given":"Anargyros","family":"Argyros","sequence":"additional","affiliation":[{"name":"Foundation for Research and Technology - Hellas (FORTH), Institute of Computer Science (ICS), Heraklion, Greece"},{"name":"? Department of Computer Science, University of Crete, Heraklion, Greece"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4729-7396","authenticated-orcid":false,"given":"Antony","family":"Chazapis","sequence":"additional","affiliation":[{"name":"Foundation for Research and Technology - Hellas (FORTH), Institute of Computer Science (ICS), Heraklion, Greece"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2975-4124","authenticated-orcid":false,"given":"Angelos","family":"Bilas","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Crete, Heraklion, GR"},{"name":"Foundation for Research and Technology - Hellas (FORTH), Institute of Computer Science (ICS), Heraklion, Greece"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,12,2]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"USENIX Security '09","author":"Akritidis Periklis","year":"2009","unstructured":"Periklis Akritidis, Manuel Costa, Miguel Castro, and Steven Hand. 2009. Baggy Bounds Checking: An Efficient and Backwards-Compatible Defense against Out-of-Bounds Errors. In USENIX Security '09."},{"key":"e_1_3_2_1_2_1","unstructured":"Michael Andersch Greg Palmer Ronny Krashinsky Nick Stam Vishal Mehta Gonzalo Brito and Sridhar Ramaswamy. 2022. Nvidia Hopper architecture in-depth. https:\/\/developer.nvidia.com\/blog\/nvidia-hopper-architecture-in-depth\/"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC.2019.8916466"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2015.38"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123939.3123975"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173162.3173169"},{"key":"e_1_3_2_1_7_1","unstructured":"M Bari L Stoltzfus P Lin C Liao M Emani and B Chapman. 2018. Is Data Placement Optimization Still Relevant On Newer GPUs?. In U.S. Department of Energy Office of Scientific and Technical Information."},{"key":"e_1_3_2_1_8_1","volume-title":"Balancing Efficiency and Fairness in Heterogeneous GPU Clusters for Deep Learning. In EuroSys '20","author":"Chaudhary Shubham","unstructured":"Shubham Chaudhary, Ramachandran Ramjee, Muthian Sivathanu, N. Kwatra, and S. Viswanatha. 2020. Balancing Efficiency and Fairness in Heterogeneous GPU Clusters for Deep Learning. In EuroSys '20."},{"key":"e_1_3_2_1_9_1","volume-title":"Rodinia: A Benchmark Suite for Heterogeneous Computing. In IISWC '09","author":"Che Shuai","year":"2009","unstructured":"Shuai Che, Michael Boyer, Jiayuan Meng, David Tarjan, Jeremy W. Sheaffer, Sang-Ha Lee, and Kevin Skadron. 2009. Rodinia: A Benchmark Suite for Heterogeneous Computing. In IISWC '09."},{"key":"e_1_3_2_1_10_1","volume-title":"Serving Heterogeneous Machine Learning Models on Multi-GPU Servers with Spatio-Temporal Sharing. In USENIX ATC '22","author":"Choi Seungbeom","year":"2022","unstructured":"Seungbeom Choi, Sunho Lee, Yeonjae Kim, Jongse Park, Youngjin Kwon, and Jaehyuk Huh. 2022. Serving Heterogeneous Machine Learning Models on Multi-GPU Servers with Spatio-Temporal Sharing. In USENIX ATC '22."},{"key":"e_1_3_2_1_11_1","volume-title":"SoCC '20","author":"Dhakal Aditya","unstructured":"Aditya Dhakal, Sameer G Kulkarni, and K. K. Ramakrishnan. 2020. GSLICE: Controlled Spatial Sharing of GPUs for a Scalable Inference Platform. In SoCC '20."},{"key":"e_1_3_2_1_12_1","unstructured":"Bang Di Jianhua Sun and Hao Chen. 2016. A Study of Overflow Vulnerabilities on GPUs. In NPC' 16."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3243176.3243194"},{"key":"e_1_3_2_1_14_1","volume-title":"HiPC '11","author":"Duato Jose","unstructured":"Jose Duato, Antonio J. Pena, Federico Silla, Juan C. Fernandez, Rafael Mayo, and Enrique S. Quintana-Orti. 2011. Enabling CUDA acceleration within virtual machines using rCUDA. In HiPC '11."},{"key":"e_1_3_2_1_15_1","volume-title":"Cricket: A virtualization layer for distributed execution of CUDA applications with checkpoint\/restart support. In Concurrency and Computation: Practice and Experience.","author":"Eiling Niklas","year":"2022","unstructured":"Niklas Eiling, Jonas Baude, Stefan Lankes, and Antonello Monti. 2022. Cricket: A virtualization layer for distributed execution of CUDA applications with checkpoint\/restart support. In Concurrency and Computation: Practice and Experience."},{"key":"e_1_3_2_1_16_1","volume-title":"CGO '17","author":"Erb Christopher","unstructured":"Christopher Erb, Mike Collins, and Joseph L. Greathouse. 2017. Dynamic buffer overflow detection for GPGPUs. In CGO '17."},{"key":"e_1_3_2_1_17_1","unstructured":"Mart\u00edn Abadi et. al. 2015. TensorFlow: Large-Scale Machine Learning on Heterogeneous Systems. https:\/\/www.tensorflow.org\/ Software available from tensorflow.org."},{"key":"e_1_3_2_1_18_1","volume-title":"Rossbach","author":"Fingler Henrique","year":"2022","unstructured":"Henrique Fingler, Zhiting Zhu, Esther Yoon, Zhipeng Jia, Emmett Witchel, and Christopher J. Rossbach. 2022. DGSF: Disaggregated GPUs for Serverless Functions. In IPDPS '22."},{"key":"e_1_3_2_1_19_1","volume-title":"Dally","author":"Gebhart Mark","year":"2012","unstructured":"Mark Gebhart, Stephen W. Keckler, Brucek Khailany, Ronny Krashinsky, and William J. Dally. 2012. Unifying Primary Cache, Scratch, and Register File Memories in a Throughput Processor. In MICRO '12."},{"key":"e_1_3_2_1_20_1","volume-title":"MICRO '20","author":"Ghodrati Soroush","year":"2020","unstructured":"Soroush Ghodrati, Byung Hoon Ahn, Joon Kyung Kim, Sean Kinzer, Brahmendra Reddy Yatham, Navateja Alla, Hardik Sharma, Mohammad Alian, Eiman Ebrahimi, Nam Sung Kim, et al. 2020. Planaria: Dynamic architecture fission for spatial multi-tenant acceleration of deep neural networks. In MICRO '20."},{"key":"e_1_3_2_1_21_1","volume-title":"Demystifying the Placement Policies of the NVIDIA GPU Thread Block Scheduler for Concurrent Kernels. In SIGMETRICS '21","author":"Gilman Guin","unstructured":"Guin Gilman, Samuel S. Ogden, Tian Guo, and Robert J. Walls. 2021. Demystifying the Placement Policies of the NVIDIA GPU Thread Block Scheduler for Concurrent Kernels. In SIGMETRICS '21."},{"key":"e_1_3_2_1_22_1","volume-title":"GTC '12","author":"Grover Vinod","year":"2012","unstructured":"Vinod Grover and Yuan Lin. 2012. Compiling CUDA and other languages for GPUs. In GTC '12."},{"key":"e_1_3_2_1_23_1","volume-title":"Microsecond-scale Preemption for Concurrent GPU-accelerated DNN Inferences. In OSDI '22","author":"Han Mingcong","year":"2022","unstructured":"Mingcong Han, Hanze Zhang, Rong Chen, and Haibo Chen. 2022. Microsecond-scale Preemption for Concurrent GPU-accelerated DNN Inferences. In OSDI '22."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"key":"e_1_3_2_1_25_1","volume-title":"Dissecting the NVIDIA volta GPU architecture via microbenchmarking. arXiv preprint arXiv:1804.06826","author":"Jia Zhe","year":"2018","unstructured":"Zhe Jia, Marco Maggioni, Benjamin Staiger, and Daniele P Scarpazza. 2018. Dissecting the NVIDIA volta GPU architecture via microbenchmarking. arXiv preprint arXiv:1804.06826 (2018)."},{"key":"e_1_3_2_1_26_1","unstructured":"Alex Krizhevsky Geoffrey Hinton et al. 2009. Learning multiple layers of features from tiny images. (2009)."},{"key":"e_1_3_2_1_27_1","volume-title":"EuroSys '18","author":"Kroes Taddeus","unstructured":"Taddeus Kroes, Koen Koning, Erik van der Kouwe, Herbert Bos, and Cristiano Giuffrida. 2018. Delta pointers: Buffer overflow checks without the checks. In EuroSys '18."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3342195.3387547"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/5.726791"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527420"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2021.12.016"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"crossref","unstructured":"Matthew Leinhauser Jeffrey Young Sergei Bastrakov Rene Widera Ronnie Chatterjee and Sunita Chandrasekaran. 2021. Performance Analysis of PIConGPU: Particle-in-Cell on GPUs using NVIDIA's NSight Systems and NSight Compute. In OSTI.GOV.","DOI":"10.2172\/1761619"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3542929.3563510"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2015.7054184"},{"key":"e_1_3_2_1_35_1","volume-title":"Zico: Efficient GPU Memory Sharing for Concurrent DNN Training. In USENIX ATC '21","author":"Lim Gangmuk","year":"2021","unstructured":"Gangmuk Lim, Jeongseob Ahn, Wencong Xiao, Youngjin Kwon, and Myeongjae Jeon. 2021. Zico: Efficient GPU Memory Sharing for Concurrent DNN Training. In USENIX ATC '21."},{"key":"e_1_3_2_1_36_1","volume-title":"Honeycomb: Secure and Efficient GPU Executions via Static Validation. In OSDI 23.","author":"Mai HaoHui","year":"2023","unstructured":"HaoHui Mai, Jiacheng Zhao, Hongren Zheng, Yiyang Zhao, Zibin Liu, Mingyu Gao, Cong Wang, Huimin Cui, Xiaobing Feng, and Christos Kozyrakis. 2023. Honeycomb: Secure and Efficient GPU Executions via Static Validation. In OSDI 23."},{"key":"e_1_3_2_1_37_1","volume-title":"Gallatin: A General-Purpose GPU Memory Manager. In PPoPP '24","author":"Mccoy Hunter","year":"2024","unstructured":"Hunter Mccoy and Prashant Pandey. 2024. Gallatin: A General-Purpose GPU Memory Manager. In PPoPP '24."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"crossref","unstructured":"Andrea Miele. 2015. Buffer overflow vulnerabilities in CUDA: a preliminary analysis. In Journal of Computer Virology and Hacking Techniques.","DOI":"10.1007\/s11416-015-0251-1"},{"key":"e_1_3_2_1_39_1","volume-title":"Memory Performance Attacks: Denial of Memory Service in Multi-Core Systems. In USENIX Security '07","author":"Moscibroda Thomas","year":"2007","unstructured":"Thomas Moscibroda and Onur Mutlu. 2007. Memory Performance Attacks: Denial of Memory Service in Multi-Core Systems. In USENIX Security '07."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"crossref","unstructured":"Diana M. Naranjo Sebasti\u00e1n Risco Carlos de Alfonso Alfonso P\u00e9rez Ignacio Blanquer and Germ\u00e1n Molt\u00f3. 2020. Accelerated serverless computing based on GPU virtualization. J. Parallel and Distrib. Comput.","DOI":"10.1016\/j.jpdc.2020.01.004"},{"key":"e_1_3_2_1_41_1","volume-title":"CUDA Binary Utilities. Retrieved","author":"NVIDIA.","year":"2023","unstructured":"NVIDIA. 2021. CUDA Binary Utilities. Retrieved May 2023 from https:\/\/docs.nvidia.com\/cuda\/pdf\/CUDA_Binary_Utilities.pdf"},{"key":"e_1_3_2_1_42_1","volume-title":"Retrieved","author":"LibrarySample NVIDIA.","year":"2023","unstructured":"NVIDIA. 2022. CUDALibrarySample. Retrieved April 2023 from https:\/\/github.com\/NVIDIA\/CUDALibrarySamples\/tree\/master\/"},{"key":"e_1_3_2_1_43_1","volume-title":"Retrieved","author":"NVIDIA.","year":"2023","unstructured":"NVIDIA. 2022. Multi-Instance GPU. Retrieved April 2023 from https:\/\/docs.nvidia.com\/datacenter\/tesla\/pdf\/NVIDIA_MIG_User_Guide.pdf"},{"key":"e_1_3_2_1_44_1","volume-title":"Retrieved","author":"Multi-Process Service NVIDIA.","year":"2023","unstructured":"NVIDIA. 2022. Multi-Process Service. Retrieved May 2023 from https:\/\/docs.nvidia.com\/deploy\/pdf\/CUDA_Multi_Process_Service_Overview.pdf"},{"key":"e_1_3_2_1_45_1","volume-title":"Parallel Thread Execution ISA. Retrieved","author":"NVIDIA.","year":"2023","unstructured":"NVIDIA. 2023. Parallel Thread Execution ISA. Retrieved May 2023 from https:\/\/docs.nvidia.com\/cuda\/parallel-thread-execution\/index.html"},{"key":"e_1_3_2_1_46_1","unstructured":"Meni Orenbach and Mark Silberstein. 2016. Enclaves as accelerators: learning lessons from gpu computing for designing efficient runtimes for enclaves. In ArXiv."},{"key":"e_1_3_2_1_47_1","volume-title":"Improving GPGPU Concurrency with Elastic Kernels. In ASPLOS '13","author":"Pai Sreepathi","unstructured":"Sreepathi Pai, Matthew J. Thazhuthaveetil, and R. Govindarajan. 2013. Improving GPGPU Concurrency with Elastic Kernels. In ASPLOS '13."},{"key":"e_1_3_2_1_48_1","volume-title":"Dynamic Resource Management for Efficient Utilization of Multitasking GPUs. In ASPLOS '17","author":"Kyu Park Jason Jong","year":"2017","unstructured":"Jason Jong Kyu Park, Yongjun Park, and Scott Mahlke. 2017. Dynamic Resource Management for Efficient Utilization of Multitasking GPUs. In ASPLOS '17."},{"key":"e_1_3_2_1_49_1","volume-title":"Sang Kil Cha, and Hyunsoo Yoon","author":"Park Sang-Ok","year":"2021","unstructured":"Sang-Ok Park, Ohmin Kwon, Yonggon Kim, Sang Kil Cha, and Hyunsoo Yoon. 2021. Mind Control Attack: Undermining Deep Learning with GPU Memory Exploitation. In Computers and Security."},{"key":"e_1_3_2_1_50_1","unstructured":"Alberto Parravicini Davide B. Bartolini Lukas Stadler Arnaud Delamare Marco Arnaboldi and Marco Domenico Santambrogio. 2015. Automated GPU Out-of-Bound Access Detection and Prevention in a Managed Environment. In ArXiv."},{"key":"e_1_3_2_1_51_1","volume-title":"Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, et al. 2019. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3542929.3563467"},{"key":"e_1_3_2_1_53_1","volume-title":"TReM: A Task Revocation Mechanism for GPUs. In HPCC '20","author":"Pavlidakis Manos","year":"2020","unstructured":"Manos Pavlidakis, Stelios Mavridis, Nikos Chrysos, and Angelos Bilas. 2020. TReM: A Task Revocation Mechanism for GPUs. In HPCC '20."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541942"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/2801153"},{"key":"e_1_3_2_1_56_1","volume-title":"ImageNet Large Scale Visual Recognition Challenge. In IJCV '15","author":"Russakovsky Olga","year":"2015","unstructured":"Olga Russakovsky, Jia Deng, Hao Su, Jonathan Krause, Sanjeev Satheesh, Sean Ma, Zhiheng Huang, Andrej Karpathy, Aditya Khosla, Michael Bernstein, Alexander C. Berg, and Li Fei-Fei. 2015. ImageNet Large Scale Visual Recognition Challenge. In IJCV '15."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001200"},{"key":"e_1_3_2_1_58_1","volume-title":"Fine-grained GPU Sharing for ML Applications. In EuroSys '24","author":"Strati Foteini","year":"2024","unstructured":"Foteini Strati, Xianzhe Ma, and Ana Klimovic. 2024. Orion: Interference-aware, Fine-grained GPU Sharing for ML Applications. In EuroSys '24."},{"key":"e_1_3_2_1_59_1","volume-title":"MISE: Providing Performance Predictability and Improving Fairness in Shared Main Memory Systems. In HPCA '13","author":"Subramanian Lavanya","year":"2013","unstructured":"Lavanya Subramanian, Vivek Seshadri, Yoongu Kim, Ben Jaiyen, and Onur Mutlu. 2013. MISE: Providing Performance Predictability and Improving Fairness in Shared Main Memory Systems. In HPCA '13."},{"key":"e_1_3_2_1_60_1","volume-title":"CuCatch: A Debugging Tool for Efficiently Catching Memory Safety Violations in CUDA Applications. In PLDI '23","author":"Ibn Ziad Mohamed Tarek","year":"2023","unstructured":"Mohamed Tarek Ibn Ziad, Sana Damani, Aamer Jaleel, Stephen W. Keckler, and Mark Stephenson. 2023. CuCatch: A Debugging Tool for Efficiently Catching Memory Safety Violations in CUDA Applications. In PLDI '23."},{"key":"e_1_3_2_1_61_1","unstructured":"Lukas Tobler. 2022. GPUless-Serverless GPU Functions. In Master Thesis."},{"key":"e_1_3_2_1_62_1","volume-title":"The Best of Many Worlds: Scheduling Machine Learning Inference on CPU-GPU Integrated Architectures. In 2022 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)","author":"Vasiliadis Giorgos","unstructured":"Giorgos Vasiliadis, Rafail Tsirbas, and Sotiris Ioannidis. 2022. The Best of Many Worlds: Scheduling Machine Learning Inference on CPU-GPU Integrated Architectures. In 2022 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW). IEEE, 55--64."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"crossref","unstructured":"Nandita Vijaykumar Gennady Pekhimenko Adwait Jog Saugata Ghose Abhishek Bhowmick Rachata Ausavarangnirun Chita Das Mahmut Kandemir Todd C Mowry and Onur Mutlu. 2016. A Framework for Accelerating Bottlenecks in GPU Execution with Assist Warps. In ArXiv.","DOI":"10.1016\/B978-0-12-803738-6.00015-X"},{"key":"e_1_3_2_1_64_1","volume-title":"Keckler","author":"Villa Oreste","year":"2019","unstructured":"Oreste Villa, Mark Stephenson, David Nellans, and Stephen W. Keckler. 2019. NVBit: A Dynamic Binary Instrumentation Framework for NVIDIA GPUs. In MICRO '19."},{"key":"e_1_3_2_1_65_1","volume-title":"Graviton: Trusted Execution Environments on GPUs. In OSDI '18","author":"Volos Stavros","year":"2018","unstructured":"Stavros Volos, Kapil Vaswani, and Rodrigo Bruno. 2018. Graviton: Trusted Execution Environments on GPUs. In OSDI '18."},{"key":"e_1_3_2_1_66_1","volume-title":"Wavelet: Efficient DNN Training with Tick-Tock Scheduling. In MLSys '21","author":"Wang Guanhua","year":"2021","unstructured":"Guanhua Wang, Kehan Wang, Kenan Jiang, XIANGJUN LI, and Ion Stoica. 2021. Wavelet: Efficient DNN Training with Tick-Tock Scheduling. In MLSys '21."},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2016.7446078"},{"key":"e_1_3_2_1_68_1","unstructured":"Florian Wende Thomas Steinke and Frank Cordes. 2014. Multi-threaded kernel offloading to gpgpu using hyper-q on kepler architecture. In ArXiv."},{"key":"e_1_3_2_1_69_1","volume-title":"NSDI '22","author":"Weng Qizhen","year":"2022","unstructured":"Qizhen Weng, Wencong Xiao, Yinghao Yu, Wei Wang, Cheng Wang, Jian He, Yong Li, Liping Zhang, Wei Lin, and Yu Ding. 2022. MLaaS in the wild: Workload analysis and scheduling in Large-Scale heterogeneous GPU clusters. In NSDI '22."},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3392717.3392742"},{"key":"e_1_3_2_1_71_1","volume-title":"Transparent GPU Sharing in Container Clouds for Deep Learning Workloads. In NSDI '23","author":"Wu Bingyang","year":"2023","unstructured":"Bingyang Wu, Zili Zhang, Zhihao Bai, Xuanzhe Liu, and Xin Jin. 2023. Transparent GPU Sharing in Container Clouds for Deep Learning Workloads. In NSDI '23."},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS47774.2020.00125"},{"key":"e_1_3_2_1_73_1","volume-title":"GPUCC - An Open-Source GPGPU Compiler. In CGO '16","author":"Wu Jingyue","year":"2016","unstructured":"Jingyue Wu, Artem Belevich, Eli Bendersky, Mark Heffernan, Chris Leary, Jacques Pienaar, Bjarke Roune, Rob Springer, Xuetian Weng, and Robert Hundt. 2016. GPUCC - An Open-Source GPGPU Compiler. In CGO '16."},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCC\/SmartCity\/DSS.2019.00035"},{"key":"e_1_3_2_1_75_1","volume-title":"Gandiva: Introspective Cluster Scheduling for Deep Learning. In OSDI '18","author":"Xiao Wencong","unstructured":"Wencong Xiao, Romil Bhardwaj, Ramachandran Ramjee, Muthian Sivathanu, Nipun Kwatra, Zhenhua Han, Pratyush Patel, Xuan Peng, Hanyu Zhao, Quanlu Zhang, F. Yang, and L. Zhou. 2018. Gandiva: Introspective Cluster Scheduling for Deep Learning. In OSDI '18."},{"key":"e_1_3_2_1_76_1","volume-title":"AntMan: Dynamic Scaling on GPU Clusters for Deep Learning. In OSDI '20","author":"Xiao Wencong","year":"2020","unstructured":"Wencong Xiao, Shiru Ren, Yong Li, Yang Zhang, Pengyang Hou, Zhi Li, Yihui Feng, Wei Lin, and Yangqing Jia. 2020. AntMan: Dynamic Scaling on GPU Clusters for Deep Learning. In OSDI '20."},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1145\/3330345.3330389"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1145\/3018743.3018754"},{"key":"e_1_3_2_1_79_1","unstructured":"Fuxun Yu Di Wang Longfei Shangguan Minjia Zhang Chenchen Liu and Xiang Chen. 2022. A survey of multi-tenant deep learning inference on GPU. In ArXiv."},{"key":"e_1_3_2_1_80_1","volume-title":"Amogh Akshintala, and Christopher J. Rossbach.","author":"Yu Hangchen","year":"2020","unstructured":"Hangchen Yu, Arthur Michener Peters, Amogh Akshintala, and Christopher J. Rossbach. 2020. AvA: Accelerated Virtualization of Accelerators. In ASPLOS '20."},{"key":"e_1_3_2_1_81_1","volume-title":"Salus: Fine-grained gpu sharing primitives for deep learning applications. In arXiv preprint arXiv:1902.04610.","author":"Yu Peifeng","year":"2019","unstructured":"Peifeng Yu and Mosharaf Chowdhury. 2019. Salus: Fine-grained gpu sharing primitives for deep learning applications. In arXiv preprint arXiv:1902.04610."},{"key":"e_1_3_2_1_82_1","volume-title":"G-Net: Effective GPU Sharing in NFV Systems. In NSDI'18","author":"Zhang Kai","year":"2018","unstructured":"Kai Zhang, Bingsheng He, Jiayu Hu, Zeke Wang, Bei Hua, Jiayi Meng, and Lishan Yang. 2018. G-Net: Effective GPU Sharing in NFV Systems. In NSDI'18."},{"key":"e_1_3_2_1_83_1","volume-title":"TunneLs for Bootlegging: Fully Reverse-Engineering GPU TLBs for Challenging Isolation Guarantees of NVIDIA MIG. In CCS '23","author":"Zhang Zhenkai","year":"2023","unstructured":"Zhenkai Zhang, Tyler Allen, Fan Yao, Xing Gao, and Rong Ge. 2023. TunneLs for Bootlegging: Fully Reverse-Engineering GPU TLBs for Challenging Isolation Guarantees of NVIDIA MIG. In CCS '23."},{"key":"e_1_3_2_1_84_1","volume-title":"Muxflow: Efficient and safe GPU sharing in large-scale production deep learning clusters. ArXiv","author":"Zhao Yihao","year":"2023","unstructured":"Yihao Zhao, Xin Liu, Shufan Liu, Xiang Li, Yibo Zhu, Gang Huang, Xuanzhe Liu, and Xin Jin. 2023. Muxflow: Efficient and safe GPU sharing in large-scale production deep learning clusters. ArXiv (2023)."}],"event":{"name":"Middleware '24: 25th International Middleware Conference","location":"Hong Kong Hong Kong","acronym":"Middleware '24","sponsor":["IFIP","Usenix"]},"container-title":["Proceedings of the 25th International Middleware Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3652892.3700768","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3652892.3700768","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T22:53:57Z","timestamp":1750287237000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3652892.3700768"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,2]]},"references-count":84,"alternative-id":["10.1145\/3652892.3700768","10.1145\/3652892"],"URL":"https:\/\/doi.org\/10.1145\/3652892.3700768","relation":{},"subject":[],"published":{"date-parts":[[2024,12,2]]},"assertion":[{"value":"2024-12-02","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}