{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,2]],"date-time":"2026-07-02T23:40:05Z","timestamp":1783035605356,"version":"3.54.6"},"publisher-location":"New York, NY, USA","reference-count":68,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,2,22]],"date-time":"2022-02-22T00:00:00Z","timestamp":1645488000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Natural Science Foundation of China (NSFC)","award":["62072297 and 61832006"],"award-info":[{"award-number":["62072297 and 61832006"]}]},{"name":"National Key R&D Program of China","award":["2021ZD0110104"],"award-info":[{"award-number":["2021ZD0110104"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,2,28]]},"DOI":"10.1145\/3503222.3507752","type":"proceedings-article","created":{"date-parts":[[2022,2,22]],"date-time":"2022-02-22T20:49:01Z","timestamp":1645562941000},"page":"388-401","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":49,"title":["VELTAIR: towards high-performance multi-tenant deep learning services via adaptive compilation and scheduling"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0874-0682","authenticated-orcid":false,"given":"Zihan","family":"Liu","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University, China \/ Shanghai Qi Zhi Institute, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5660-5493","authenticated-orcid":false,"given":"Jingwen","family":"Leng","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, China \/ Shanghai Qi Zhi Institute, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7387-599X","authenticated-orcid":false,"given":"Zhihui","family":"Zhang","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, China \/ Shanghai Qi Zhi Institute, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5832-0347","authenticated-orcid":false,"given":"Quan","family":"Chen","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, China \/ Shanghai Qi Zhi Institute, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6218-4659","authenticated-orcid":false,"given":"Chao","family":"Li","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, China \/ Shanghai Qi Zhi Institute, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0034-2302","authenticated-orcid":false,"given":"Minyi","family":"Guo","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, China \/ Shanghai Qi Zhi Institute, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2022,2,22]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"AMD. 2020. Ryzen Threadripper 3990X Processor. https:\/\/www.amd.com\/en\/products\/cpu\/amd-ryzen-threadripper-3990x"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00081"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2019.8661197"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037700"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2872362.2872368"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304005"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541967"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.5555\/3291168.3291211"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","unstructured":"Tianqi Chen Lianmin Zheng Eddie Q. Yan Ziheng Jiang Thierry Moreau Luis Ceze Carlos Guestrin and Arvind Krishnamurthy. 2018. Learning to Optimize Tensor Programs. In Advances in Neural Information Processing Systems 31. 3393\u20133404. https:\/\/doi.org\/10.5555\/3327144.3327258","DOI":"10.5555\/3327144.3327258"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.40"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00049"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00027"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD46524.2019.00075"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476143"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_16_1","volume-title":"MPI: A message - passing interface standard.","author":"Interface Forum Message Passing","year":"1994","unstructured":"Message Passing Interface Forum. 1994. MPI: A message - passing interface standard."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1080\/14786440109462720"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3410463.3414650"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00031"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304013"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00062"},{"key":"e_1_3_2_1_22_1","volume-title":"XLA: Optimizing Compiler for TensorFlow. https:\/\/www.tensorflow.org\/xla","year":"2020","unstructured":"Google. 2020. XLA: Optimizing Compiler for TensorFlow. https:\/\/www.tensorflow.org\/xla"},{"key":"e_1_3_2_1_23_1","unstructured":"Google. 2021. TensorFlow graph optimization with Grappler. https:\/\/www.tensorflow.org\/guide\/graph_optimization"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.5555\/3488766.3488791"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00020"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18072.2020.9218732"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00059"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_30_1","unstructured":"Intel. 2019. Math Kernel Library for Deep Neural Networks. https:\/\/github.com\/rsdubtso\/mkl-dnn"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359630"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2016.7581261"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00060"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.21"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPA-BDCloud-SocialCom-SustainCom51426.2020.00041"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1007\/s42514-020-00044-7"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2749475"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155650"},{"key":"e_1_3_2_1_41_1","unstructured":"Microsoft. 2021. Optimize and Accelerate Machine Learning Inferencing and Training. https:\/\/onnxruntime.ai\/"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/IWCMC.2018.8450304"},{"key":"e_1_3_2_1_43_1","unstructured":"Graham Neubig Chris Dyer Yoav Goldberg Austin Matthews Waleed Ammar Antonios Anastasopoulos Miguel Ballesteros David Chiang Daniel Clothiaux Trevor Cohn Kevin Duh Manaal Faruqui Cynthia Gan Dan Garrette Yangfeng Ji Lingpeng Kong Adhiguna Kuncoro Gaurav Kumar Chaitanya Malaviya Paul Michel Yusuke Oda Matthew Richardson Naomi Saphra Swabha Swayamdipta and Pengcheng Yin. 2017. DyNet: The Dynamic Neural Network Toolkit. arXiv preprint arXiv:1701.03980."},{"key":"e_1_3_2_1_44_1","unstructured":"NVIDIA. [n.d.]. Multi-Process Service."},{"key":"e_1_3_2_1_45_1","unstructured":"NVIDIA. 2021. NVIDIA A100 Tensor Core GPU. https:\/\/www.nvidia.com\/en-us\/data-center\/a100\/"},{"key":"e_1_3_2_1_46_1","unstructured":"NVIDIA. 2021. NVIDIA cuDNN. https:\/\/developer.nvidia.com\/cudnn"},{"key":"e_1_3_2_1_47_1","unstructured":"NVIDIA. 2021. NVIDIA MULTI-INSTANCE GPU. https:\/\/www.nvidia.com\/en-us\/technologies\/multi-instance-gpu\/"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00045"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.690"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00474"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359658"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"e_1_3_2_1_53_1","volume-title":"Proceedings of the 36th International Conference on Machine Learning, ICML 2019","volume":"6114","author":"Tan Mingxing","year":"2019","unstructured":"Mingxing Tan and Quoc V. Le. 2019. EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks. In Proceedings of the 36th International Conference on Machine Learning, ICML 2019, 9-15 June 2019, Long Beach, California, USA (Proceedings of Machine Learning Research, Vol. 97). PMLR, 6105\u20136114."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/2259016.2259018"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/2517349.2522713"},{"key":"e_1_3_2_1_56_1","volume-title":"Tensor Comprehensions: Framework-Agnostic High-Performance Machine Learning Abstractions. CoRR, abs\/1802.04730","author":"Vasilache Nicolas","year":"2018","unstructured":"Nicolas Vasilache, Oleksandr Zinenko, Theodoros Theodoridis, Priya Goyal, Zachary DeVito, William S. Moses, Sven Verdoolaege, Andrew Adams, and Albert Cohen. 2018. Tensor Comprehensions: Framework-Agnostic High-Performance Machine Learning Abstractions. CoRR, abs\/1802.04730 (2018)."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11227-013-0890-2"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00088"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTSS46320.2019.00042"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485974"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783723"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2020.2988991"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2019.00074"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","unstructured":"Xia Zhao Magnus Jahre and Lieven Eeckhout. 2020. HSM: A Hybrid Slowdown Model for Multitasking GPUs. In Architectural Support for Programming Languages and Operating Systems (ASPLOS). ACM 1371\u20131385. https:\/\/doi.org\/10.1145\/3373376.3378457 10.1145\/3373376.3378457","DOI":"10.1145\/3373376.3378457"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.5555\/3488766.3488815"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","unstructured":"Size Zheng Yun Liang Shuo Wang Renze Chen and Kaiwen Sheng. 2020. FlexTensor: An Automatic Schedule Exploration and Optimization Framework for Tensor Computation on Heterogeneous System. In Architectural Support for Programming Languages and Operating Systems Lausanne (ASPLOS). https:\/\/doi.org\/10.1145\/3373376.3378508 10.1145\/3373376.3378508","DOI":"10.1145\/3373376.3378508"},{"key":"e_1_3_2_1_67_1","unstructured":"Jie Zhou Ganqu Cui Shengding Hu Zhengyan Zhang Cheng Yang Zhiyuan Liu Lifeng Wang Changcheng Li and Maosong Sun. 2021. Graph Neural Networks: A Review of Methods and Applications. arxiv:1812.08434."},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","unstructured":"Yangjie Zhou Mengtian Yang Cong Guo Jingwen Leng Yun Liang Quan Chen Minyi Guo and Yuhao Zhu. 2021. Characterizing and Demystifying the Implicit Convolution Algorithm on Commercial Matrix-Multiplication Accelerators. In 2021 IEEE International Symposium on Workload Characterization (IISWC). https:\/\/doi.org\/10.1109\/IISWC53511.2021.00029 10.1109\/IISWC53511.2021.00029","DOI":"10.1109\/IISWC53511.2021.00029"}],"event":{"name":"ASPLOS '22: 27th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","location":"Lausanne Switzerland","acronym":"ASPLOS '22","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGOPS ACM Special Interest Group on Operating Systems","SIGARCH ACM Special Interest Group on Computer Architecture","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the 27th ACM International Conference on Architectural Support for Programming Languages and Operating Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3503222.3507752","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3503222.3507752","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:11:40Z","timestamp":1750191100000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3503222.3507752"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,2,22]]},"references-count":68,"alternative-id":["10.1145\/3503222.3507752","10.1145\/3503222"],"URL":"https:\/\/doi.org\/10.1145\/3503222.3507752","relation":{},"subject":[],"published":{"date-parts":[[2022,2,22]]},"assertion":[{"value":"2022-02-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}