{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:13:03Z","timestamp":1750219983722,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":23,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,2,20]],"date-time":"2024-02-20T00:00:00Z","timestamp":1708387200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"NKRDP","award":["2021YFB0300800"],"award-info":[{"award-number":["2021YFB0300800"]}]},{"name":"NSFC","award":["62102396"],"award-info":[{"award-number":["62102396"]}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100005090","name":"Beijing Nova Program","doi-asserted-by":"publisher","award":["Z211100002121143","20220484217"],"award-info":[{"award-number":["Z211100002121143","20220484217"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100005090","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Youth Innovation Promotion Association of Chinese Academy of Sciences","award":["2021099"],"award-info":[{"award-number":["2021099"]}]},{"name":"CCF-Ant Research Fund CCF-AFSGRF","award":["20230207"],"award-info":[{"award-number":["20230207"]}]},{"name":"Pilotfor Major Scientific Research Facility of Jiangsu Province of China","award":["NO.BM2021800"],"award-info":[{"award-number":["NO.BM2021800"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,3,2]]},"DOI":"10.1145\/3627535.3638485","type":"proceedings-article","created":{"date-parts":[[2024,2,20]],"date-time":"2024-02-20T14:22:41Z","timestamp":1708438961000},"page":"451-453","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["POSTER: FineCo: Fine-grained Heterogeneous Resource Management for Concurrent DNN Inferences"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-1081-0356","authenticated-orcid":false,"given":"Lixian","family":"Ma","sequence":"first","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, CAS, Beijing, China"},{"name":"University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-0634-6842","authenticated-orcid":false,"given":"Haoruo","family":"Chen","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, CAS, Beijing, China"},{"name":"University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9678-7228","authenticated-orcid":false,"given":"En","family":"Shao","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, CAS, Beijing, China"},{"name":"University of Chinese Academy of Sciences, Beijing, China"},{"name":"Nanjing Institute of InforSuperBahn, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-4940-5598","authenticated-orcid":false,"given":"Leping","family":"Wang","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, CAS, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5832-0347","authenticated-orcid":false,"given":"Quan","family":"Chen","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6361-5948","authenticated-orcid":false,"given":"Guangming","family":"Tan","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, CAS, Beijing, China"},{"name":"University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,2,20]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2012.6168946"},{"key":"e_1_3_2_1_2_1","unstructured":"AMD. Amd's approach to gpu virtualization. https:\/\/drivers.amd.com\/relnotes\/amd_mxgpu_deploymentguide_vmware.pdf."},{"key":"e_1_3_2_1_3_1","unstructured":"AMD. Stream management hip api. https:\/\/docs.amd.com\/bundle\/HIP-API-Guide-v5.4.1\/page\/a00183.html."},{"key":"e_1_3_2_1_4_1","first-page":"578","volume-title":"13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, et al. {TVM}: An automated {End-to-End} optimizing compiler for deep learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18), pages 578--594, 2018."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10071121"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476143"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421284"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3576933"},{"key":"e_1_3_2_1_9_1","first-page":"539","volume-title":"16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Han Mingcong","year":"2022","unstructured":"Mingcong Han, Hanze Zhang, Rong Chen, and Haibo Chen. Microsecond-scale preemption for concurrent gpu-accelerated dnn inferences. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22), pages 539--558, 2022."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_11_1","volume-title":"Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861","author":"Howard Andrew G","year":"2017","unstructured":"Andrew G Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, and Hartwig Adam. Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861, 2017."},{"key":"e_1_3_2_1_12_1","unstructured":"INTEL. Scalable i\/o virtualization. https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/articles\/technical\/introducing-intel-scalable-io-virtualization.html."},{"key":"e_1_3_2_1_13_1","volume-title":"Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems, 25","author":"Krizhevsky Alex","year":"2012","unstructured":"Alex Krizhevsky, Ilya Sutskever, and Geoffrey E Hinton. Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems, 25, 2012."},{"key":"e_1_3_2_1_14_1","unstructured":"NVIDIA. multi-process service. https:\/\/docs.NVIDIA.com\/deploy\/pdf\/CUDA_Multi_Process_Service_Overview.pdf."},{"key":"e_1_3_2_1_15_1","unstructured":"NVIDIA. Nvidia multi-instance gpu user guide - nvidia developer. https:\/\/docs.NVIDIA.com\/datacenter\/tesla\/pdf\/NVIDIA_MIG_User_Guide.pdf."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037707"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.91"},{"key":"e_1_3_2_1_18_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556, 2014."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2016.7446078"},{"key":"e_1_3_2_1_20_1","first-page":"204","article-title":"Bridging the gap between auto-tuners and hardware-native performance","volume":"4","author":"Xing Jiarong","year":"2022","unstructured":"Jiarong Xing, Leyuan Wang, Shang Zhang, Jack Chen, Ang Chen, and Yibo Zhu. Bolt: Bridging the gap between auto-tuners and hardware-native performance. Proceedings of Machine Learning and Systems, 4:204--216, 2022.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_21_1","first-page":"863","volume-title":"Proceedings of the 14th USENIX Conference on Operating Systems Design and Implementation","author":"Zheng Lianmin","year":"2020","unstructured":"Lianmin Zheng, Chengfan Jia, Minmin Sun, Zhao Wu, Cody Hao Yu, Ameer Haj-Ali, Yida Wang, Jun Yang, Danyang Zhuo, Koushik Sen, et al. Ansor: Generating high-performance tensor programs for deep learning. In Proceedings of the 14th USENIX Conference on Operating Systems Design and Implementation, pages 863--879, 2020."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378508"},{"key":"e_1_3_2_1_23_1","first-page":"233","volume-title":"16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Zhu Hongyu","year":"2022","unstructured":"Hongyu Zhu, Ruofan Wu, Yijia Diao, Shanbin Ke, Haoyu Li, Chen Zhang, Jilong Xue, Lingxiao Ma, Yuqing Xia, Wei Cui, et al. {ROLLER}: Fast and efficient tensor compilation for deep learning. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22), pages 233--248, 2022."}],"event":{"name":"PPoPP '24: 29th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","SIGPLAN ACM Special Interest Group on Programming Languages"],"location":"Edinburgh United Kingdom","acronym":"PPoPP '24"},"container-title":["Proceedings of the 29th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627535.3638485","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627535.3638485","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:49:27Z","timestamp":1750182567000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627535.3638485"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,20]]},"references-count":23,"alternative-id":["10.1145\/3627535.3638485","10.1145\/3627535"],"URL":"https:\/\/doi.org\/10.1145\/3627535.3638485","relation":{},"subject":[],"published":{"date-parts":[[2024,2,20]]},"assertion":[{"value":"2024-02-20","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}