{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,6]],"date-time":"2026-06-06T01:12:40Z","timestamp":1780708360875,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":66,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,2,28]],"date-time":"2025-02-28T00:00:00Z","timestamp":1740700800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"NSF (National Science Foundation)","award":["CNS-2339901,CNS-2312785"],"award-info":[{"award-number":["CNS-2339901,CNS-2312785"]}]},{"name":"Hong Kong Ph.D. Fellowship Scheme","award":["PF20-46117"],"award-info":[{"award-number":["PF20-46117"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,2,28]]},"DOI":"10.1145\/3710848.3710863","type":"proceedings-article","created":{"date-parts":[[2025,2,28]],"date-time":"2025-02-28T06:20:57Z","timestamp":1740723657000},"page":"267-281","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["SGDRC: Software-Defined Dynamic Resource Control for Concurrent DNN Inference on NVIDIA GPUs"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4745-3176","authenticated-orcid":false,"given":"Yongkang","family":"Zhang","sequence":"first","affiliation":[{"name":"HKUST, Hong Kong, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-9677-7960","authenticated-orcid":false,"given":"Haoxuan","family":"Yu","sequence":"additional","affiliation":[{"name":"HKUST, Hong Kong, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-3301-3811","authenticated-orcid":false,"given":"Chenxia","family":"Han","sequence":"additional","affiliation":[{"name":"CUHK, Hong Kong, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4232-4746","authenticated-orcid":false,"given":"Cheng","family":"Wang","sequence":"additional","affiliation":[{"name":"Alibaba Group, Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0230-1048","authenticated-orcid":false,"given":"Baotong","family":"Lu","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5073-5561","authenticated-orcid":false,"given":"Yunzhe","family":"Li","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7024-441X","authenticated-orcid":false,"given":"Zhifeng","family":"Jiang","sequence":"additional","affiliation":[{"name":"HKUST, Hong Kong, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9337-1382","authenticated-orcid":false,"given":"Yang","family":"Li","sequence":"additional","affiliation":[{"name":"China University of Geosciences, Wuhan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9745-4372","authenticated-orcid":false,"given":"Xiaowen","family":"Chu","sequence":"additional","affiliation":[{"name":"HKUST (Guangzhou), Guangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3155-0203","authenticated-orcid":false,"given":"Huaicheng","family":"Li","sequence":"additional","affiliation":[{"name":"Virginia Tech, Blacksburg, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,2,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTSS.2017.00017"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"Gorka Irazoqui Apecechea Thomas Eisenbarth and Berk Sunar. 2015. Systematic Reverse Engineering of Cache Slice Selection in Intel Processors. IACR Cryptol. ePrint Arch. (2015) 690. http:\/\/eprint.iacr.org\/2015\/690","DOI":"10.1109\/DSD.2015.56"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTAS58335.2023.00012"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTSS.2018.00021"},{"key":"e_1_3_2_1_5_1","volume-title":"TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In 13th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2018","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Q. Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. 2018. TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In 13th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2018, Carlsbad, CA, USA, October 8-10, 2018, Andrea C. Arpaci-Dusseau and Geoff Voelker (Eds.). USENIX Association, 578--594. https:\/\/www.usenix.org\/conference\/osdi18\/presentation\/chen"},{"key":"e_1_3_2_1_6_1","volume-title":"Multi-model Machine Learning Inference Serving with GPU Spatial Partitioning. CoRR abs\/2109.01611","author":"Choi Seungbeom","year":"2021","unstructured":"Seungbeom Choi, Sunho Lee, Yeonjae Kim, Jongse Park, Youngjin Kwon, and Jaehyuk Huh. 2021. Multi-model Machine Learning Inference Serving with GPU Spatial Partitioning. CoRR abs\/2109.01611 (2021). arXiv:2109.01611 https:\/\/arxiv.org\/abs\/2109.01611"},{"key":"e_1_3_2_1_7_1","volume-title":"Serving Heterogeneous Machine Learning Models on Multi-GPU Servers with Spatio-Temporal Sharing. In 2022 USENIX Annual Technical Conference, USENIX ATC 2022","author":"Choi Seungbeom","year":"2022","unstructured":"Seungbeom Choi, Sunho Lee, Yeonjae Kim, Jongse Park, Youngjin Kwon, and Jaehyuk Huh. 2022. Serving Heterogeneous Machine Learning Models on Multi-GPU Servers with Spatio-Temporal Sharing. In 2022 USENIX Annual Technical Conference, USENIX ATC 2022, Carlsbad, CA, USA, July 11-13, 2022, Jiri Schindler and Noa Zilberman (Eds.). USENIX Association, 199--216. https:\/\/www.usenix.org\/conference\/atc22\/presentation\/choi-seungbeom"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10071121"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476143"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/N19-1423"},{"key":"e_1_3_2_1_11_1","unstructured":"envytools. 2024. Envytools. https:\/\/github.com\/envytools\/envytools."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2403.03206"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3302424.3303977"},{"key":"e_1_3_2_1_14_1","volume-title":"Fine-grained GPU Sharing for ML Applications. In EuroSys '24: Nineteenth EuroSys Conference 2024","author":"Foteini Strati","year":"2024","unstructured":"Strati Foteini, Ma Xianzhe, and Klimovic Ana. 2024. Orion: Interference-aware, Fine-grained GPU Sharing for ML Applications. In EuroSys '24: Nineteenth EuroSys Conference 2024, Athens, Greece, April 22-25, 2024. Association for Computing Machinery."},{"key":"e_1_3_2_1_15_1","volume-title":"14th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2020","author":"Gujarati Arpan","year":"2020","unstructured":"Arpan Gujarati, Reza Karimi, Safya Alzayat, Wei Hao, Antoine Kaufmann, Ymir Vigfusson, and Jonathan Mace. 2020. Serving DNNs like Clockwork: Performance Predictability from the Bottom Up. In 14th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2020, Virtual Event, November 4-6, 2020. USENIX Association, 443--462. https:\/\/www.usenix.org\/conference\/osdi20\/presentation\/gujarati"},{"key":"e_1_3_2_1_16_1","volume-title":"Microsecond-scale Preemption for Concurrent GPU-accelerated DNN Inferences. In 16th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2022","author":"Han Mingcong","year":"2022","unstructured":"Mingcong Han, Hanze Zhang, Rong Chen, and Haibo Chen. 2022. Microsecond-scale Preemption for Concurrent GPU-accelerated DNN Inferences. In 16th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2022, Carlsbad, CA, USA, July 11-13, 2022, Marcos K. Aguilera and Hakim Weatherspoon (Eds.). USENIX Association, 539--558. https:\/\/www.usenix.org\/conference\/osdi22\/presentation\/han"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01712"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3625549.3658657"},{"key":"e_1_3_2_1_21_1","volume-title":"DISB: DNN Inference Serving Benchmark. https:\/\/github.com\/SJTU-IPADS\/disb\/tree\/main.","author":"SJTU","year":"2024","unstructured":"SJTU IPADS. 2024. DISB: DNN Inference Serving Benchmark. https:\/\/github.com\/SJTU-IPADS\/disb\/tree\/main."},{"key":"e_1_3_2_1_22_1","unstructured":"J. D. Hall J. F. Duluk Jr T.J. Purcell and P. A. Cuadra. 2018. Error checking in out-of-order task scheduling. https:\/\/patents.google.com\/patent\/US20130152094. https:\/\/patents.google.com\/patent\/US20130152094 US Patent 9 965 321."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTAS.2019.00011"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO51591.2021.9370308"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW55747.2022.00124"},{"key":"e_1_3_2_1_26_1","volume-title":"AlpaServe: Statistical Multiplexing with Model Parallelism for Deep Learning Serving. In 17th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2023","author":"Li Zhuohan","year":"2023","unstructured":"Zhuohan Li, Lianmin Zheng, Yinmin Zhong, Vincent Liu, Ying Sheng, Xin Jin, Yanping Huang, Zhifeng Chen, Hao Zhang, Joseph E. Gonzalez, and Ion Stoica. 2023. AlpaServe: Statistical Multiplexing with Model Parallelism for Deep Learning Serving. In 17th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2023, Boston, MA, USA, July 10-12, 2023, Roxana Geambasu and Ed Nightingale (Eds.). USENIX Association, 663--679. https:\/\/www.usenix.org\/conference\/osdi23\/presentation\/li-zhouhan"},{"key":"e_1_3_2_1_27_1","volume-title":"14th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2020","author":"Ma Lingxiao","year":"2020","unstructured":"Lingxiao Ma, Zhiqiang Xie, Zhi Yang, Jilong Xue, Youshan Miao, Wei Cui, Wenxiang Hu, Fan Yang, Lintao Zhang, and Lidong Zhou. 2020. Rammer: Enabling Holistic Deep Learning Compiler Optimizations with rTasks. In 14th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2020, Virtual Event, November 4-6, 2020. USENIX Association, 881--897. https:\/\/www.usenix.org\/conference\/osdi20\/presentation\/ma"},{"key":"e_1_3_2_1_28_1","volume-title":"17th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2023","author":"Mai Haohui","year":"2023","unstructured":"Haohui Mai, Jiacheng Zhao, Hongren Zheng, Yiyang Zhao, Zibin Liu, Mingyu Gao, Cong Wang, Huimin Cui, Xiaobing Feng, and Christos Kozyrakis. 2023. Honeycomb: Secure and Efficient GPU Executions via Static Validation. In 17th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2023, Boston, MA, USA, July 10-12, 2023, Roxana Geambasu and Ed Nightingale (Eds.). USENIX Association, 155--172. https:\/\/www.usenix.org\/conference\/osdi23\/presentation\/mai"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-26362-5_3"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2016.2549523"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613163"},{"key":"e_1_3_2_1_32_1","unstructured":"NVIDIA. 2024. Description of changes made to the framebuffer partition addressing (FBPA) in Pascal and later NVIDIA architectures. https:\/\/http.download.nvidia.com\/open-gpu-doc\/pascal\/1\/gp100-fbpa.txt."},{"key":"e_1_3_2_1_33_1","unstructured":"NVIDIA. 2024. NVIDIA A100 Tensor Core GPU Architecture. https:\/\/images.nvidia.com\/aem-dam\/en-zz\/Solutions\/data-center\/nvidia-ampere-architecture-whitepaper.pdf."},{"key":"e_1_3_2_1_34_1","unstructured":"NVIDIA. 2024. NVIDIA H100 Tensor Core GPU Architecture. https:\/\/resources.nvidia.com\/en-us-tensor-core\/gtc22-whitepaper-hopper."},{"key":"e_1_3_2_1_35_1","unstructured":"NVIDIA. 2024. NVIDIA Multi-Instance GPU. https:\/\/www.nvidia.com\/en-us\/technologies\/multi-instance-gpu\/."},{"key":"e_1_3_2_1_36_1","unstructured":"NVIDIA. 2024. NVIDIA Multi-Process Service. https:\/\/docs.nvidia.com\/deploy\/mps\/index.html."},{"key":"e_1_3_2_1_37_1","unstructured":"NVIDIA. 2024. Pascal MMU Format Changes. https:\/\/http.download.nvidia.com\/open-gpu-doc\/pascal\/1\/gp100-mmu-format.pdf."},{"key":"e_1_3_2_1_38_1","unstructured":"OpenAI. 2023. GPT-4 Technical Report. CoRR abs\/2303.08774 (2023). https:\/\/doi.org\/10.48550\/ARXIV.2303.08774 arXiv:2303.08774"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.4230\/LIPIcs.ECRTS.2020.10"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2401.09290"},{"key":"e_1_3_2_1_41_1","volume-title":"INFaaS: Automated Model-less Inference Serving. In 2021 USENIX Annual Technical Conference, USENIX ATC 2021","author":"Romero Francisco","year":"2021","unstructured":"Francisco Romero, Qian Li, Neeraja J. Yadwadkar, and Christos Kozyrakis. 2021. INFaaS: Automated Model-less Inference Serving. In 2021 USENIX Annual Technical Conference, USENIX ATC 2021, July 14-16, 2021, Irina Calciu and Geoff Kuenning (Eds.). USENIX Association, 397--411. https:\/\/www.usenix.org\/conference\/atc21\/presentation\/romero"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3018113"},{"key":"e_1_3_2_1_43_1","volume-title":"17th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2023","author":"Shi Yining","year":"2023","unstructured":"Yining Shi, Zhi Yang, Jilong Xue, Lingxiao Ma, Yuqing Xia, Ziming Miao, Yuxiao Guo, Fan Yang, and Lidong Zhou. 2023. Welder: Scheduling Deep Learning Memory Access via Tile-graph. In 17th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2023, Boston, MA, USA, July 10-12, 2023, Roxana Geambasu and Ed Nightingale (Eds.). USENIX Association, 701--718. https:\/\/www.usenix.org\/conference\/osdi23\/presentation\/shi"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3315508.3329973"},{"key":"e_1_3_2_1_45_1","volume-title":"Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, December 4-9, 2017, Long Beach, CA, USA, Isabelle Guyon, Ulrike von Luxburg, Samy Bengio, Hanna M. Wallach, Rob Fergus, S. V. N. Vishwanathan, and Roman Garnett (Eds.). 5998--6008. https:\/\/proceedings.neurips.cc\/paper\/2017\/hash\/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html"},{"key":"e_1_3_2_1_46_1","unstructured":"Colin Wei Yining Chen and Tengyu Ma. 2022. Statistically Meaningful Approximation: a Case Study on Approximating Turing Machines with Transformers. In NeurIPS. http:\/\/papers.nips.cc\/paper_files\/paper\/2022\/hash\/4ebf1d74f53ece08512a23309d58df89-Abstract-Conference.html"},{"key":"e_1_3_2_1_47_1","volume-title":"MLaaS in the Wild: Workload Analysis and Scheduling in Large-Scale Heterogeneous GPU Clusters. In 19th USENIX Symposium on Networked Systems Design and Implementation, NSDI 2022","author":"Weng Qizhen","year":"2022","unstructured":"Qizhen Weng, Wencong Xiao, Yinghao Yu, Wei Wang, Cheng Wang, Jian He, Yong Li, Liping Zhang, Wei Lin, and Yu Ding. 2022. MLaaS in the Wild: Workload Analysis and Scheduling in Large-Scale Heterogeneous GPU Clusters. In 19th USENIX Symposium on Networked Systems Design and Implementation, NSDI 2022, Renton, WA, USA, April 4-6, 2022, Amar Phanishayee and Vyas Sekar (Eds.). USENIX Association, 945--960. https:\/\/www.usenix.org\/conference\/nsdi22\/presentation\/weng"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/2751205.2751213"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037742"},{"key":"e_1_3_2_1_50_1","volume-title":"Transparent GPU Sharing in Container Clouds for Deep Learning Workloads. In 20th USENIX Symposium on Networked Systems Design and Implementation, NSDI 2023","author":"Wu Bingyang","year":"2023","unstructured":"Bingyang Wu, Zili Zhang, Zhihao Bai, Xuanzhe Liu, and Xin Jin. 2023. Transparent GPU Sharing in Container Clouds for Deep Learning Workloads. In 20th USENIX Symposium on Networked Systems Design and Implementation, NSDI 2023, Boston, MA, April 17-19, 2023 (2023), Mahesh Balakrishnan and Manya Ghobadi (Eds.). USENIX Association, 69--85. https:\/\/www.usenix.org\/conference\/nsdi23\/presentation\/wu"},{"key":"e_1_3_2_1_51_1","volume-title":"Proceedings of the 2024 USENIX Annual Technical Conference, USENIX ATC 2024","author":"Wu Hao","year":"2024","unstructured":"Hao Wu, Yue Yu, Junxiao Deng, Shadi Ibrahim, Song Wu, Hao Fan, Ziyue Cheng, and Hai Jin. 2024. StreamBox: A Lightweight GPU SandBox for Serverless Inference Workflow. In Proceedings of the 2024 USENIX Annual Technical Conference, USENIX ATC 2024, Santa Clara, CA, USA, July 10-12, 2024, Saurabh Bagchi and Yiying Zhang (Eds.). USENIX Association, 59--73. https:\/\/www.usenix.org\/conference\/atc24\/presentation\/wu-hao"},{"key":"e_1_3_2_1_52_1","volume-title":"AntMan: Dynamic Scaling on GPU Clusters for Deep Learning. In 14th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2020","author":"Xiao Wencong","year":"2020","unstructured":"Wencong Xiao, Shiru Ren, Yong Li, Yang Zhang, Pengyang Hou, Zhi Li, Yihui Feng, Wei Lin, and Yangqing Jia. 2020. AntMan: Dynamic Scaling on GPU Clusters for Deep Learning. In 14th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2020, Virtual Event, November 4-6, 2020. USENIX Association, 533--548. https:\/\/www.usenix.org\/conference\/osdi20\/presentation\/xiao"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTSS55097.2022.00040"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/2628071.2628104"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2203.09040"},{"key":"e_1_3_2_1_56_1","volume-title":"SHEPHERD: Serving DNNs in the Wild. In 20th USENIX Symposium on Networked Systems Design and Implementation, NSDI 2023","author":"Zhang Hong","year":"2023","unstructured":"Hong Zhang, Yupeng Tang, Anurag Khandelwal, and Ion Stoica. 2023. SHEPHERD: Serving DNNs in the Wild. In 20th USENIX Symposium on Networked Systems Design and Implementation, NSDI 2023, Boston, MA, April 17-19, 2023, Mahesh Balakrishnan and Manya Ghobadi (Eds.). USENIX Association, 787--808. https:\/\/www.usenix.org\/conference\/nsdi23\/presentation\/zhang-hong"},{"key":"e_1_3_2_1_57_1","volume-title":"PilotFish: Harvesting Free Cycles of Cloud Gaming with Deep Learning Training. In 2022 USENIX Annual Technical Conference, USENIX ATC 2022","author":"Zhang Wei","year":"2022","unstructured":"Wei Zhang, Binghao Chen, Zhenhua Han, Quan Chen, Peng Cheng, Fan Yang, Ran Shu, Yuqing Yang, and Minyi Guo. 2022. PilotFish: Harvesting Free Cycles of Cloud Gaming with Deep Learning Training. In 2022 USENIX Annual Technical Conference, USENIX ATC 2022, Carlsbad, CA, USA, July 11-13, 2022, Jiri Schindler and Noa Zilberman (Eds.). USENIX Association, 217--232. https:\/\/www.usenix.org\/conference\/atc22\/presentation\/zhang-wei"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00073"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3542929.3563465"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3576915.3616672"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575745"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2307.04339"},{"key":"e_1_3_2_1_63_1","volume-title":"Ansor: Generating High-Performance Tensor Programs for Deep Learning. In 14th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2020","author":"Zheng Lianmin","year":"2020","unstructured":"Lianmin Zheng, Chengfan Jia, Minmin Sun, Zhao Wu, Cody Hao Yu, Ameer Haj-Ali, Yida Wang, Jun Yang, Danyang Zhuo, Koushik Sen, Joseph E. Gonzalez, and Ion Stoica. 2020. Ansor: Generating High-Performance Tensor Programs for Deep Learning. In 14th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2020, Virtual Event, November 4-6, 2020. USENIX Association, 863--879. https:\/\/www.usenix.org\/conference\/osdi20\/presentation\/zheng"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTAS.2018.00028"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTAS.2015.7108420"},{"key":"e_1_3_2_1_66_1","volume-title":"ROLLER: Fast and Efficient Tensor Compilation for Deep Learning. In 16th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2022","author":"Zhu Hongyu","year":"2022","unstructured":"Hongyu Zhu, Ruofan Wu, Yijia Diao, Shanbin Ke, Haoyu Li, Chen Zhang, Jilong Xue, Lingxiao Ma, Yuqing Xia, Wei Cui, Fan Yang, Mao Yang, Lidong Zhou, Asaf Cidon, and Gennady Pekhimenko. 2022. ROLLER: Fast and Efficient Tensor Compilation for Deep Learning. In 16th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2022, Carlsbad, CA, USA, July 11-13, 2022, Marcos K. Aguilera and Hakim Weatherspoon (Eds.). USENIX Association, 233--248. https:\/\/www.usenix.org\/conference\/osdi22\/presentation\/zhu"}],"event":{"name":"PPoPP '25: The 30th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming","location":"Las Vegas NV USA","acronym":"PPoPP '25","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the 30th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3710848.3710863","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3710848.3710863","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T15:14:56Z","timestamp":1755875696000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3710848.3710863"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,28]]},"references-count":66,"alternative-id":["10.1145\/3710848.3710863","10.1145\/3710848"],"URL":"https:\/\/doi.org\/10.1145\/3710848.3710863","relation":{},"subject":[],"published":{"date-parts":[[2025,2,28]]},"assertion":[{"value":"2025-02-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}