{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T21:27:57Z","timestamp":1769981277173,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,27]],"date-time":"2024-10-27T00:00:00Z","timestamp":1729987200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,27]]},"DOI":"10.1145\/3676536.3676718","type":"proceedings-article","created":{"date-parts":[[2025,4,9]],"date-time":"2025-04-09T12:53:56Z","timestamp":1744203236000},"page":"1-9","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["GACER: Granularity-Aware ConcurrEncy Regulation for Multi-Tenant Deep Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-7314-001X","authenticated-orcid":false,"given":"Yongbo","family":"Yu","sequence":"first","affiliation":[{"name":"George Mason University, Fairfax, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4880-6658","authenticated-orcid":false,"given":"Fuxun","family":"Yu","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2738-6826","authenticated-orcid":false,"given":"Zhi","family":"Tian","sequence":"additional","affiliation":[{"name":"George Mason University, Fairfax, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2790-976X","authenticated-orcid":false,"given":"Xiang","family":"Chen","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,4,9]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2008.917757"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2011.89"},{"key":"e_1_3_2_1_3_1","volume-title":"Global data center accelerator market size, status and forecast 2020--2025","author":"Reports Market","year":"2021","unstructured":"Market Reports. Global data center accelerator market size, status and forecast 2020--2025, 2021. https:\/\/www.mynewsdesk.com\/brandessence\/pressreleases\/data-center-accelerator-market-size-2021-cagr-38-dot-7-percent-3112488."},{"key":"e_1_3_2_1_4_1","first-page":"27","article-title":"Optimizing dnn computation with relaxed graph substitutions","volume":"1","author":"Jia Zhihao","year":"2019","unstructured":"Zhihao Jia, James Thomas, Todd Warszawski, Mingyu Gao, Matei Zaharia, and Alex Aiken. Optimizing dnn computation with relaxed graph substitutions. Proceedings of Machine Learning and Systems, 1:27--39, 2019.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_5_1","first-page":"167","article-title":"Inter-operator scheduler for cnn acceleration","volume":"3","author":"Ding Yaoyao","year":"2021","unstructured":"Yaoyao Ding, Ligeng Zhu, Zhihao Jia, Gennady Pekhimenko, and Song Han. Ios: Inter-operator scheduler for cnn acceleration. Proceedings of Machine Learning and Systems, 3:167--180, 2021.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00290"},{"key":"e_1_3_2_1_7_1","volume-title":"Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861","author":"Howard Andrew G","year":"2017","unstructured":"Andrew G Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, and Hartwig Adam. Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861, 2017."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2019.2915983"},{"key":"e_1_3_2_1_9_1","volume-title":"Recommender systems for large-scale social networks: A review of challenges and solutions","author":"Eirinaki Magdalini","year":"2018","unstructured":"Magdalini Eirinaki, Jerry Gao, Iraklis Varlamis, and Konstantinos Tserpes. Recommender systems for large-scale social networks: A review of challenges and solutions, 2018."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.3390\/encyclopedia2010031"},{"key":"e_1_3_2_1_11_1","unstructured":"NVIDIA. Multi-Process Service 2021. URL https:\/\/docs.nvidia.com\/deploy\/pdf\/CUDA_Multi_Process_Service_Overview.pdf."},{"key":"e_1_3_2_1_12_1","unstructured":"NVIDIA. Multi-Stream 2020. URL https:\/\/on-demand.gputechconf.com\/gtc\/2014\/presentations\/S4158-cuda-streams-best-practices-common-pitfalls.pdf."},{"key":"e_1_3_2_1_13_1","volume-title":"Nvidia multi instance gpu (mig)","author":"NVIDIA.","year":"2020","unstructured":"NVIDIA. Nvidia multi instance gpu (mig). 2020."},{"key":"e_1_3_2_1_14_1","volume-title":"Ang Li, Shawn Bray, Chenchen Liu, and Xiang Chen. Powering multi-task federated learning with competitive gpu resource sharing.","author":"Yu Yongbo","year":"2022","unstructured":"Yongbo Yu, Fuxun Yu, Zirui Xu, Mingjia Zhang Di Wang, Ang Li, Shawn Bray, Chenchen Liu, and Xiang Chen. Powering multi-task federated learning with competitive gpu resource sharing. 2022."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00049"},{"key":"e_1_3_2_1_16_1","volume-title":"Hermann Ney, and Richard Bowden. Weakly supervised learning with multi-stream cnn-lstm-hmms to discover sequential parallelism in sign language videos","author":"Koller Oscar","year":"2019","unstructured":"Oscar Koller, Necati Cihan Camgoz, Hermann Ney, and Richard Bowden. Weakly supervised learning with multi-stream cnn-lstm-hmms to discover sequential parallelism in sign language videos. IEEE transactions on pattern analysis and machine intelligence, 42(9):2306--2320, 2019."},{"key":"e_1_3_2_1_17_1","first-page":"8343","article-title":"Lightweight and parallel gpu task scheduling for deep learning","volume":"33","author":"Kwon Woosuk","year":"2020","unstructured":"Woosuk Kwon, Gyeong-In Yu, Eunji Jeong, and Byung-Gon Chun. Nimble: Lightweight and parallel gpu task scheduling for deep learning. Advances in Neural Information Processing Systems, 33:8343--8354, 2020.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421284"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD51958.2021.9643501"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.29"},{"key":"e_1_3_2_1_21_1","first-page":"98","article-title":"Fine-grained gpu sharing primitives for deep learning applications","volume":"2","author":"Yu Peifeng","year":"2020","unstructured":"Peifeng Yu and Mosharaf Chowdhury. Fine-grained gpu sharing primitives for deep learning applications. Proceedings of Machine Learning and Systems, 2: 98--111, 2020.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10071121"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583120.3586953"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTSS46320.2019.00040"},{"key":"e_1_3_2_1_25_1","volume-title":"et al. Gpipe: Efficient training of giant neural networks using pipeline parallelism. Advances in neural information processing systems, 32","author":"Huang Yanping","year":"2019","unstructured":"Yanping Huang, Youlong Cheng, Ankur Bapna, Orhan Firat, Dehao Chen, Mia Chen, HyoukJoong Lee, Jiquan Ngiam, Quoc V Le, Yonghui Wu, et al. Gpipe: Efficient training of giant neural networks using pipeline parallelism. Advances in neural information processing systems, 32, 2019."},{"key":"e_1_3_2_1_26_1","volume-title":"Multi-model machine learning inference serving with gpu spatial partitioning. arXiv preprint arXiv:2109.01611","author":"Choi Seungbeom","year":"2021","unstructured":"Seungbeom Choi, Sunho Lee, Yeonjae Kim, Jongse Park, Youngjin Kwon, and Jaehyuk Huh. Multi-model machine learning inference serving with gpu spatial partitioning. arXiv preprint arXiv:2109.01611, 2021."},{"key":"e_1_3_2_1_27_1","volume-title":"Opara: Exploiting operator parallelism for expediting dnn inference on gpus. arXiv preprint arXiv:2312.10351","author":"Chen Aodong","year":"2023","unstructured":"Aodong Chen, Fei Xu, Li Han, Yuan Dong, Li Chen, Zhi Zhou, and Fangming Liu. Opara: Exploiting operator parallelism for expediting dnn inference on gpus. arXiv preprint arXiv:2312.10351, 2023."},{"key":"e_1_3_2_1_28_1","volume-title":"NVIDIA Nsight Systems","author":"NVIDIA.","year":"2020","unstructured":"NVIDIA. NVIDIA Nsight Systems, 2020. URL https:\/\/on-demand.gputechconf.com\/gtc\/2014\/presentations\/S4158-cuda-streams-best-practices-common-pitfalls.pdf."},{"key":"e_1_3_2_1_29_1","volume-title":"NVIDIA Nsight Computing","author":"NVIDIA.","year":"2020","unstructured":"NVIDIA. NVIDIA Nsight Computing, 2020. URL https:\/\/developer.nvidia.com\/nsight-compute."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA53966.2022.00065"},{"key":"e_1_3_2_1_31_1","volume-title":"Bert: Pretraining of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. Bert: Pretraining of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805, 2018."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco_a_01199"},{"key":"e_1_3_2_1_33_1","unstructured":"Maxim Naumov Dheevatsa Mudigere Hao-Jun Michael Shi Jianyu Huang Narayanan Sundaraman Jongsoo Park Xiaodong Wang Udit Gupta Carole-Jean Wu Alisson G Azzolini et al. Deep learning recommendation model for personalization and recommendation systems. arXiv preprint arXiv:1906.00091 2019."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3326937.3341261"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219823"},{"key":"e_1_3_2_1_36_1","volume-title":"Nvidia CuDNN Documentation","author":"NVIDIA.","year":"2020","unstructured":"NVIDIA. Nvidia CuDNN Documentation,, 2020. URL https:\/\/docs.nvidia.com\/deeplearning\/cudnn\/developer-guide\/index.html."},{"key":"e_1_3_2_1_37_1","first-page":"578","volume-title":"13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, et al. {TVM}: An automated {End-to-End} optimizing compiler for deep learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18), pages 578--594, 2018."}],"event":{"name":"ICCAD '24: 43rd IEEE\/ACM International Conference on Computer-Aided Design","location":"Newark Liberty International Airport Marriott New York NY USA","acronym":"ICCAD '24","sponsor":["SIGDA ACM Special Interest Group on Design Automation","IEEE CAS","IEEE CEDA","IEEE EDS"]},"container-title":["Proceedings of the 43rd IEEE\/ACM International Conference on Computer-Aided Design"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3676536.3676718","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3676536.3676718","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T23:43:57Z","timestamp":1750290237000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3676536.3676718"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,27]]},"references-count":37,"alternative-id":["10.1145\/3676536.3676718","10.1145\/3676536"],"URL":"https:\/\/doi.org\/10.1145\/3676536.3676718","relation":{},"subject":[],"published":{"date-parts":[[2024,10,27]]},"assertion":[{"value":"2025-04-09","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}