{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T21:49:23Z","timestamp":1765057763224,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":84,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,2,22]],"date-time":"2022-02-22T00:00:00Z","timestamp":1645488000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2019YFF0302600"],"award-info":[{"award-number":["2019YFF0302600"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Natural Science Foundation of China","award":["62022057, 61832006, 61632017, 61872240, 62072297"],"award-info":[{"award-number":["62022057, 61832006, 61632017, 61872240, 62072297"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,2,28]]},"DOI":"10.1145\/3503222.3507721","type":"proceedings-article","created":{"date-parts":[[2022,2,22]],"date-time":"2022-02-22T20:49:01Z","timestamp":1645562941000},"page":"570-582","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":21,"title":["Astraea: towards QoS-aware and resource-efficient multi-stage GPU services"],"prefix":"10.1145","author":[{"given":"Wei","family":"Zhang","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5832-0347","authenticated-orcid":false,"given":"Quan","family":"Chen","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, China"}]},{"given":"Kaihua","family":"Fu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, China"}]},{"given":"Ningxin","family":"Zheng","sequence":"additional","affiliation":[{"name":"Microsoft Research, China"}]},{"given":"Zhiyi","family":"Huang","sequence":"additional","affiliation":[{"name":"University of Otago, New Zealand"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5660-5493","authenticated-orcid":false,"given":"Jingwen","family":"Leng","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0034-2302","authenticated-orcid":false,"given":"Minyi","family":"Guo","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, China"}]}],"member":"320","published-online":{"date-parts":[[2022,2,22]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n. d.]. AIBench: A Datacenter AI Benchmark Suite. https:\/\/www.benchcouncil.org\/AIBench\/"},{"key":"e_1_3_2_1_2_1","unstructured":"[n. d.]. Apache Thrift.. https:\/\/thrift.apache.org."},{"key":"e_1_3_2_1_3_1","unstructured":"[n. d.]. Automatic Alternative Text.. https:\/\/wordpress.org\/plugins\/automatic-alternative-text\/"},{"key":"e_1_3_2_1_4_1","unstructured":"[n. d.]. Best Caption For Facebook and Instagram.. https:\/\/play.google.com\/store\/apps\/details?id=com.caption.facebook.instagram&hl=en_US"},{"key":"e_1_3_2_1_5_1","unstructured":"[n. d.]. bgfx - Cross-platform rendering library. https:\/\/github.com\/bkaradzic\/bgfx"},{"key":"e_1_3_2_1_6_1","unstructured":"[n. d.]. Caption AI :Captions and Hashtags for Instagram\/FB.. https:\/\/play.google.com\/store\/apps\/details?id=caption.ai&hl=en_US"},{"key":"e_1_3_2_1_7_1","unstructured":"[n. d.]. The datacenter has an appetite for GPU compute.. https:\/\/www.nextplatform.com\/2020\/02\/15\/the-datacenter-has-an-appetite-for-gpu-compute\/"},{"key":"e_1_3_2_1_8_1","unstructured":"[n. d.]. Facial recognition api for Python. github.com\/ageitgey\/face_recognition"},{"key":"e_1_3_2_1_9_1","unstructured":"[n. d.]. FFmpeg:A complete cross-platform solution to record convert and stream audio and video.. https:\/\/ffmpeg.org\/"},{"key":"e_1_3_2_1_10_1","unstructured":"[n. d.]. GPU in AI & Machine Learning Use Cases.. https:\/\/www.weka.io\/blog\/gpu-for-ai-ml-deep-learning\/"},{"key":"e_1_3_2_1_11_1","unstructured":"[n. d.]. gRPC. https:\/\/www.grpc.io\/"},{"key":"e_1_3_2_1_12_1","unstructured":"[n. d.]. Moonlight. https:\/\/moonlight-stream.org\/"},{"key":"e_1_3_2_1_13_1","unstructured":"[n. d.]. NVIDIA CUDAAPI. https:\/\/docs.nvidia.com\/cuda\/cuda-runtime-api\/group__CUDART__DEVICE.html"},{"key":"e_1_3_2_1_14_1","unstructured":"[n. d.]. NVIDIA DALI. https:\/\/github.com\/NVIDIA\/DALI"},{"key":"e_1_3_2_1_15_1","unstructured":"[n. d.]. Nvidia Night Compute. docs.nvidia.com\/nsight-compute\/NsightCompute\/index.html"},{"key":"e_1_3_2_1_16_1","unstructured":"[n. d.]. OpenNMT: An open source neural machine translation system. opennmt.net\/"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00010"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2019.2901467"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.2200\/S00516ED2V01Y201306CAC024"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11749-016-0481-7"},{"key":"e_1_3_2_1_21_1","unstructured":"Tom B Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry and Amanda Askell. 2020. Language models are few-shot learners. arXiv preprint arXiv:2005.14165."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3018743.3018748"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037700"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/2872362.2872368"},{"key":"e_1_3_2_1_26_1","unstructured":"Yutian Chen Yannis Assael Brendan Shillingford David Budden Scott Reed Heiga Zen Quan Wang Luis C Cobo Andrew Trask and Ben Laurie. 2018. Sample efficient adaptive text-to-speech. arXiv preprint arXiv:1809.10460."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683550"},{"key":"e_1_3_2_1_28_1","volume-title":"Clipper: A low-latency online prediction serving system. In 14th $USENIX$ Symposium on Networked Systems Design and Implementation ($NSDI$ 17). 613\u2013627.","author":"Crankshaw Daniel","year":"2017","unstructured":"Daniel Crankshaw, Xin Wang, Guilio Zhou, Michael J Franklin, Joseph E Gonzalez, and Ion Stoica. 2017. Clipper: A low-latency online prediction serving system. In 14th $USENIX$ Symposium on Networked Systems Design and Implementation ($NSDI$ 17). 613\u2013627."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476143"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/2408776.2408794"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46475-6_25"},{"key":"e_1_3_2_1_34_1","volume-title":"Ant colony optimization","author":"Dorigo Marco","year":"2006","unstructured":"Marco Dorigo, Mauro Birattari, and Thomas Stutzle. 2006. Ant colony optimization. IEEE computational intelligence magazine, 1, 4 (2006), 28\u201339."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2017.37"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS49936.2021.00102"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","unstructured":"Yu Gan Yanqi Zhang Dailun Cheng Ankitha Shetty Priyal Rathi Nayan Katarki Ariana Bruno Justin Hu Brian Ritchken and Brendon Jackson. 2019. An Open-Source Benchmark Suite for Microservices and Their Hardware-Software Implications for Cloud & Edge Systems. In the Twenty-Fourth International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS). 3\u201318. https:\/\/doi.org\/10.1145\/3297858.3304013 10.1145\/3297858.3304013","DOI":"10.1145\/3297858.3304013"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304004"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","unstructured":"Lianli Gao Daiyuan Chen Jingkuan Song Xing Xu Dongxiang Zhang and Heng Tao Shen. 2019. Perceptual Pyramid Adversarial Networks for Text-to-Image Synthesis. https:\/\/doi.org\/10.1609\/aaai.v33i01.33018312 10.1609\/aaai.v33i01.33018312","DOI":"10.1609\/aaai.v33i01.33018312"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3190508.3190541"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2019.00197"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/2806777.2806942"},{"volume-title":"14th $USENIX$ Symposium on Operating Systems Design and Implementation ($OSDI$ 20). 443\u2013462.","author":"Gujarati Arpan","key":"e_1_3_2_1_43_1","unstructured":"Arpan Gujarati, Reza Karimi, Safya Alzayat, Wei Hao, Antoine Kaufmann, Ymir Vigfusson, and Jonathan Mace. 2020. Serving DNNs like clockwork: Performance predictability from the bottom up. In 14th $USENIX$ Symposium on Operating Systems Design and Implementation ($OSDI$ 20). 443\u2013462."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00084"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00047"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683343"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3302424.3303958"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"e_1_3_2_1_49_1","volume-title":"Proc. USENIX ATC. 17\u201330","author":"Kato Shinpei","year":"2011","unstructured":"Shinpei Kato, Karthik Lakshmanan, Raj Rajkumar, and Yutaka Ishikawa. 2011. TimeGraph: GPU scheduling for real-time multi-tasking environments. In Proc. USENIX ATC. 17\u201330."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2019.00017"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jss.2019.07.008"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307681.3325409"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","unstructured":"Yang Liu and Mirella Lapata. 2019. Text summarization with pretrained encoders. arXiv preprint arXiv:1908.08345 https:\/\/doi.org\/10.18653\/v1\/D19-1387 10.18653\/v1\/D19-1387","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_54_1","unstructured":"Yuchen Liu Jiajun Zhang Hao Xiong Long Zhou Zhongjun He Hua Wu Haifeng Wang and Chengqing Zong. 2019. Synchronous Speech Recognition and Speech-to-Text Translation with Interactive Decoding. arXiv preprint arXiv:1912.07240."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1061\/(ASCE)0733-9364(2003)129:4(412)"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/1807167.1807184"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2017.13"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00023"},{"key":"e_1_3_2_1_59_1","unstructured":"NVIDIA. 2017. NVIDIA TESLA V100 GPU ARCHITECTURE.. https:\/\/www.nvidia.com\/en-us\/data-center\/nvidia-ampere-gpu-architecture\/"},{"key":"e_1_3_2_1_60_1","unstructured":"NVIDIA. 2019. NVIDIA DGX-2 System User Guide.. docs.nvidia.com\/dgx\/dgx2-user-guide\/index.html"},{"key":"e_1_3_2_1_61_1","volume-title":"Clarinet: Parallel wave generation in end-to-end text-to-speech. arXiv preprint arXiv:1807.07281.","author":"Ping Wei","year":"2018","unstructured":"Wei Ping, Kainan Peng, and Jitong Chen. 2018. Clarinet: Parallel wave generation in end-to-end text-to-speech. arXiv preprint arXiv:1807.07281."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/2632216"},{"key":"e_1_3_2_1_63_1","volume-title":"FIRM: An Intelligent Fine-grained Resource Management Framework for SLO-Oriented Microservices. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI). 805\u2013825","author":"Qiu Haoran","year":"2020","unstructured":"Haoran Qiu, Subho S Banerjee, Saurabh Jha, Zbigniew T Kalbarczyk, and Ravishankar K Iyer. 2020. FIRM: An Intelligent Fine-grained Resource Management Framework for SLO-Oriented Microservices. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI). 805\u2013825."},{"key":"e_1_3_2_1_64_1","unstructured":"Alec Radford Luke Metz and Soumith Chintala. 2015. Unsupervised representation learning with deep convolutional generative adversarial networks. arXiv preprint arXiv:1511.06434."},{"key":"e_1_3_2_1_65_1","unstructured":"Colin Raffel Noam Shazeer Adam Roberts Katherine Lee Sharan Narang Michael Matena Yanqi Zhou Wei Li and Peter J Liu. 2019. Exploring the limits of transfer learning with a unified text-to-text transformer. arXiv preprint arXiv:1910.10683."},{"key":"e_1_3_2_1_66_1","unstructured":"Scott Reed Zeynep Akata Xinchen Yan Lajanugen Logeswaran Bernt Schiele and Honglak Lee. 2016. Generative adversarial text to image synthesis. arXiv preprint arXiv:1605.05396."},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"crossref","unstructured":"Ha\u015fim Sak Andrew Senior and Fran\u00e7oise Beaufays. 2014. Long short-term memory recurrent neural network architectures for large scale acoustic modeling. In the Fifteenth annual conference of the international speech communication association.","DOI":"10.21437\/Interspeech.2014-80"},{"volume-title":"Linear regression analysis. 329","author":"Seber George AF","key":"e_1_3_2_1_68_1","unstructured":"George AF Seber and Alan J Lee. 2012. Linear regression analysis. 329, John Wiley & Sons."},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359658"},{"key":"e_1_3_2_1_70_1","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556."},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322227"},{"volume-title":"Simulated annealing: Theory and applications","author":"Van Laarhoven Peter JM","key":"e_1_3_2_1_72_1","unstructured":"Peter JM Van Laarhoven and Emile HL Aarts. 1987. Simulated annealing. In Simulated annealing: Theory and applications. Springer, 7\u201315."},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01240-3_47"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2019.2920131"},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTSS46320.2019.00042"},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00243"},{"key":"e_1_3_2_1_78_1","volume-title":"Salus: Fine-grained gpu sharing primitives for deep learning applications. arXiv preprint arXiv:1902.04610.","author":"Yu Peifeng","year":"2019","unstructured":"Peifeng Yu and Mosharaf Chowdhury. 2019. Salus: Fine-grained gpu sharing primitives for deep learning applications. arXiv preprint arXiv:1902.04610."},{"key":"e_1_3_2_1_79_1","volume-title":"G-net: Effective $GPU$ sharing in $NFV$ systems. In 15th $USENIX$ Symposium on Networked Systems Design and Implementation ($NSDI$ 18). 187\u2013200.","author":"Zhang Kai","year":"2018","unstructured":"Kai Zhang, Bingsheng He, Jiayu Hu, Zeke Wang, Bei Hua, Jiayi Meng, and Lishan Yang. 2018. G-net: Effective $GPU$ sharing in $NFV$ systems. In 15th $USENIX$ Symposium on Networked Systems Design and Implementation ($NSDI$ 18). 187\u2013200."},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1145\/3330345.3330351"},{"key":"e_1_3_2_1_81_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD53106.2021.00056"},{"key":"e_1_3_2_1_82_1","doi-asserted-by":"publisher","DOI":"10.1145\/3445814.3446693"},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00059"},{"key":"e_1_3_2_1_84_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00595"}],"event":{"name":"ASPLOS '22: 27th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGOPS ACM Special Interest Group on Operating Systems","SIGARCH ACM Special Interest Group on Computer Architecture","SIGBED ACM Special Interest Group on Embedded Systems"],"location":"Lausanne Switzerland","acronym":"ASPLOS '22"},"container-title":["Proceedings of the 27th ACM International Conference on Architectural Support for Programming Languages and Operating Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3503222.3507721","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3503222.3507721","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:11:39Z","timestamp":1750191099000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3503222.3507721"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,2,22]]},"references-count":84,"alternative-id":["10.1145\/3503222.3507721","10.1145\/3503222"],"URL":"https:\/\/doi.org\/10.1145\/3503222.3507721","relation":{},"subject":[],"published":{"date-parts":[[2022,2,22]]},"assertion":[{"value":"2022-02-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}