{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,6]],"date-time":"2026-06-06T01:14:42Z","timestamp":1780708482040,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":73,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,11,1]],"date-time":"2021-11-01T00:00:00Z","timestamp":1635724800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,11]]},"DOI":"10.1145\/3472883.3486987","type":"proceedings-article","created":{"date-parts":[[2021,10,27]],"date-time":"2021-10-27T10:48:16Z","timestamp":1635331696000},"page":"639-653","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":45,"title":["Morphling"],"prefix":"10.1145","author":[{"given":"Luping","family":"Wang","sequence":"first","affiliation":[{"name":"HKUST, Alibaba Group"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Lingyun","family":"Yang","sequence":"additional","affiliation":[{"name":"HKUST, Alibaba Group"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yinghao","family":"Yu","sequence":"additional","affiliation":[{"name":"Alibaba Group, HKUST"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wei","family":"Wang","sequence":"additional","affiliation":[{"name":"HKUST"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Bo","family":"Li","sequence":"additional","affiliation":[{"name":"HKUST"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xianchao","family":"Sun","sequence":"additional","affiliation":[{"name":"Alibaba Group"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jian","family":"He","sequence":"additional","affiliation":[{"name":"Alibaba Group"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Liping","family":"Zhang","sequence":"additional","affiliation":[{"name":"Alibaba Group"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2021,11]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"2021. Custom Resources. https:\/\/kubernetes.io\/docs\/concepts\/extend-kubernetes\/api-extension\/custom-resources\/.  2021. Custom Resources. https:\/\/kubernetes.io\/docs\/concepts\/extend-kubernetes\/api-extension\/custom-resources\/."},{"key":"e_1_3_2_2_2_1","unstructured":"2021. Deliver high performance ML inference with AWS Inferentia. https:\/\/d1.awsstatic.com\/events\/reinvent\/2019\/REPEAT_1_Deliver_high_performance_ML_inference_with_AWS_Inferentia_CMP324-R1.pdf.  2021. Deliver high performance ML inference with AWS Inferentia. https:\/\/d1.awsstatic.com\/events\/reinvent\/2019\/REPEAT_1_Deliver_high_performance_ML_inference_with_AWS_Inferentia_CMP324-R1.pdf."},{"key":"e_1_3_2_2_3_1","unstructured":"2021. Docker. https:\/\/www.docker.com.  2021. Docker. https:\/\/www.docker.com."},{"key":"e_1_3_2_2_4_1","unstructured":"2021. Httperf. https:\/\/github.com\/httperf\/httperf.  2021. Httperf. https:\/\/github.com\/httperf\/httperf."},{"key":"e_1_3_2_2_5_1","unstructured":"2021. Jmeter. https:\/\/jmeter.apache.org\/.  2021. Jmeter. https:\/\/jmeter.apache.org\/."},{"key":"e_1_3_2_2_6_1","unstructured":"2021. Kubernetes: Production-Grade Container Orchestration. https:\/\/kubernetes.io\/.  2021. Kubernetes: Production-Grade Container Orchestration. https:\/\/kubernetes.io\/."},{"key":"e_1_3_2_2_7_1","unstructured":"2021. Machine Learning on AWS. https:\/\/aws.amazon.com\/machine-learning.  2021. Machine Learning on AWS. https:\/\/aws.amazon.com\/machine-learning."},{"key":"e_1_3_2_2_8_1","unstructured":"2021. Module: Tensorflow Keras Applications. https:\/\/www.tensorflow.org\/api_docs\/python\/tf\/keras\/applications.  2021. Module: Tensorflow Keras Applications. https:\/\/www.tensorflow.org\/api_docs\/python\/tf\/keras\/applications."},{"key":"e_1_3_2_2_9_1","unstructured":"2021. NVIDIA Data Center Deep Learning Product Performance. https:\/\/developer.nvidia.com\/deep-learning-performance-training-inference.  2021. NVIDIA Data Center Deep Learning Product Performance. https:\/\/developer.nvidia.com\/deep-learning-performance-training-inference."},{"key":"e_1_3_2_2_10_1","unstructured":"2021. NVIDIA TensorRT Inference Server. https:\/\/github.com\/triton-inference-server\/server.  2021. NVIDIA TensorRT Inference Server. https:\/\/github.com\/triton-inference-server\/server."},{"key":"e_1_3_2_2_11_1","unstructured":"2021. NVIDIA TESLA M60 GPU ACCELERATOR. https:\/\/www.nvidia.com\/content\/dam\/en-zz\/Solutions\/design-visualization\/solutions\/resources\/documents1\/nvidia-m60-datasheet.pdf.  2021. NVIDIA TESLA M60 GPU ACCELERATOR. https:\/\/www.nvidia.com\/content\/dam\/en-zz\/Solutions\/design-visualization\/solutions\/resources\/documents1\/nvidia-m60-datasheet.pdf."},{"key":"e_1_3_2_2_12_1","unstructured":"2021. Nvidia Virtual GPU Technology. https:\/\/www.nvidia.com\/en-us\/data-center\/virtual-gpu-technology\/.  2021. Nvidia Virtual GPU Technology. https:\/\/www.nvidia.com\/en-us\/data-center\/virtual-gpu-technology\/."},{"key":"e_1_3_2_2_13_1","unstructured":"2021. Redis: an open source in-memory data structure store. https:\/\/redis.io.  2021. Redis: an open source in-memory data structure store. https:\/\/redis.io."},{"key":"e_1_3_2_2_14_1","unstructured":"2021. Siege. https:\/\/www.joedog.org\/siege-home\/.  2021. Siege. https:\/\/www.joedog.org\/siege-home\/."},{"key":"e_1_3_2_2_15_1","unstructured":"2021. TensorFlow Hub. https:\/\/tfhub.dev\/.  2021. TensorFlow Hub. https:\/\/tfhub.dev\/."},{"key":"e_1_3_2_2_16_1","unstructured":"2021. TensorFlow Serving for model deployment in production. https:\/\/www.tensorflow.org\/serving\/.  2021. TensorFlow Serving for model deployment in production. https:\/\/www.tensorflow.org\/serving\/."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/2556195.2556252"},{"key":"e_1_3_2_2_18_1","volume-title":"Proc. USENIX","author":"Alipourfard Omid","year":"2017","unstructured":"Omid Alipourfard , Hongqiang Harry Liu , Jianshu Chen , Shivaram Venkataraman , Minlan Yu , and Ming Zhang . 2017 . Cherrypick: Adaptively unearthing the best cloud configurations for big data analytics . In Proc. USENIX , 2017. Omid Alipourfard, Hongqiang Harry Liu, Jianshu Chen, Shivaram Venkataraman, Minlan Yu, and Ming Zhang. 2017. Cherrypick: Adaptively unearthing the best cloud configurations for big data analytics. In Proc. USENIX, 2017."},{"key":"e_1_3_2_2_19_1","volume-title":"Neural networks: Tricks of the trade","author":"Bottou L\u00e9on","unstructured":"L\u00e9on Bottou . 2012. Stochastic gradient descent tricks . In Neural networks: Tricks of the trade . Springer , 421--436. L\u00e9on Bottou. 2012. Stochastic gradient descent tricks. In Neural networks: Tricks of the trade. Springer, 421--436."},{"key":"e_1_3_2_2_20_1","volume-title":"Noah Constant, Mario Guajardo-C\u00e9spedes, Steve Yuan, Chris Tar, et al.","author":"Cer Daniel","year":"2018","unstructured":"Daniel Cer , Yinfei Yang , Sheng-yi Kong, Nan Hua , Nicole Limtiaco , Rhomni St John , Noah Constant, Mario Guajardo-C\u00e9spedes, Steve Yuan, Chris Tar, et al. 2018 . Universal sentence encoder. arXiv preprint arXiv:1803.11175 (2018). Daniel Cer, Yinfei Yang, Sheng-yi Kong, Nan Hua, Nicole Limtiaco, Rhomni St John, Noah Constant, Mario Guajardo-C\u00e9spedes, Steve Yuan, Chris Tar, et al. 2018. Universal sentence encoder. arXiv preprint arXiv:1803.11175 (2018)."},{"key":"e_1_3_2_2_21_1","volume-title":"Proc. USENIX NSDI","author":"Crankshaw Daniel","year":"2017","unstructured":"Daniel Crankshaw , Xin Wang , Guilio Zhou , Michael J Franklin , Joseph E Gonzalez , and Ion Stoica . 2017 . Clipper: A low-latency online prediction serving system . In Proc. USENIX NSDI , 2017. Daniel Crankshaw, Xin Wang, Guilio Zhou, Michael J Franklin, Joseph E Gonzalez, and Ion Stoica. 2017. Clipper: A low-latency online prediction serving system. In Proc. USENIX NSDI, 2017."},{"key":"e_1_3_2_2_22_1","volume-title":"Proc. ACM SIGKDD","author":"Dalessandro Brian","year":"2014","unstructured":"Brian Dalessandro , Daizhuo Chen , Troy Raeder , Claudia Perlich , Melinda Han Williams , and Foster Provost . 2014 . Scalable handsfree transfer learning for online advertising . In Proc. ACM SIGKDD , 2014. Brian Dalessandro, Daizhuo Chen, Troy Raeder, Claudia Perlich, Melinda Han Williams, and Foster Provost. 2014. Scalable handsfree transfer learning for online advertising. In Proc. ACM SIGKDD, 2014."},{"key":"e_1_3_2_2_23_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin , Ming-Wei Chang , Kenton Lee , and Kristina Toutanova . 2018 . Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018). Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/2168836.2168847"},{"key":"e_1_3_2_2_25_1","volume-title":"Proc. PMLR ICML","author":"Finn Chelsea","year":"2017","unstructured":"Chelsea Finn , Pieter Abbeel , and Sergey Levine . 2017 . Model-agnostic meta-learning for fast adaptation of deep networks . In Proc. PMLR ICML , 2017. Chelsea Finn, Pieter Abbeel, and Sergey Levine. 2017. Model-agnostic meta-learning for fast adaptation of deep networks. In Proc. PMLR ICML, 2017."},{"key":"e_1_3_2_2_26_1","volume-title":"A tutorial on Bayesian optimization. arXiv preprint arXiv:1807.02811","author":"Frazier Peter I","year":"2018","unstructured":"Peter I Frazier . 2018. A tutorial on Bayesian optimization. arXiv preprint arXiv:1807.02811 ( 2018 ). Peter I Frazier. 2018. A tutorial on Bayesian optimization. arXiv preprint arXiv:1807.02811 (2018)."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3097983.3098043"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3135974.3135993"},{"key":"e_1_3_2_2_29_1","volume-title":"Proc. USENIX OSDI","author":"Gujarati Arpan","year":"2020","unstructured":"Arpan Gujarati , Reza Karimi , Safya Alzayat , Wei Hao , Antoine Kaufmann , Ymir Vigfusson , and Jonathan Mace . 2020 . Serving DNNs like Clockwork: Performance Predictability from the Bottom Up . In Proc. USENIX OSDI , 2020. Arpan Gujarati, Reza Karimi, Safya Alzayat, Wei Hao, Antoine Kaufmann, Ymir Vigfusson, and Jonathan Mace. 2020. Serving DNNs like Clockwork: Performance Predictability from the Bottom Up. In Proc. USENIX OSDI, 2020."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00059"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_38"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11788"},{"key":"e_1_3_2_2_34_1","volume-title":"Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861","author":"Howard Andrew G","year":"2017","unstructured":"Andrew G Howard , Menglong Zhu , Bo Chen , Dmitry Kalenichenko , Weijun Wang , Tobias Weyand , Marco Andreetto , and Hartwig Adam . 2017 . Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861 (2017). Andrew G Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, and Hartwig Adam. 2017. Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861 (2017)."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2018.00070"},{"key":"e_1_3_2_2_36_1","volume-title":"Scout: An experienced guide to find the best cloud configuration. arXiv preprint arXiv:1803.01296","author":"Hsu Chin-Jung","year":"2018","unstructured":"Chin-Jung Hsu , Vivek Nair , Tim Menzies , and Vincent W Freeh . 2018 . Scout: An experienced guide to find the best cloud configuration. arXiv preprint arXiv:1803.01296 (2018). Chin-Jung Hsu, Vivek Nair, Tim Menzies, and Vincent W Freeh. 2018. Scout: An experienced guide to find the best cloud configuration. arXiv preprint arXiv:1803.01296 (2018)."},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_39"},{"key":"e_1_3_2_2_38_1","volume-title":"Alexey Tumanov, Joseph Gonzalez, and Ion Stoica.","author":"Jain Paras","year":"2018","unstructured":"Paras Jain , Xiangxi Mo , Ajay Jain , Harikaran Subbaraj , Rehan Sohail Durrani , Alexey Tumanov, Joseph Gonzalez, and Ion Stoica. 2018 . Dynamic space-time scheduling for gpu inference. arXiv preprint arXiv:1901.00041 (2018). Paras Jain, Xiangxi Mo, Ajay Jain, Harikaran Subbaraj, Rehan Sohail Durrani, Alexey Tumanov, Joseph Gonzalez, and Ion Stoica. 2018. Dynamic space-time scheduling for gpu inference. arXiv preprint arXiv:1901.00041 (2018)."},{"key":"e_1_3_2_2_39_1","volume-title":"Dissecting the NVidia Turing T4 GPU via microbenchmarking. arXiv preprint arXiv:1903.07486","author":"Jia Zhe","year":"2019","unstructured":"Zhe Jia , Marco Maggioni , Jeffrey Smith , and Daniele Paolo Scarpazza . 2019. Dissecting the NVidia Turing T4 GPU via microbenchmarking. arXiv preprint arXiv:1903.07486 ( 2019 ). Zhe Jia, Marco Maggioni, Jeffrey Smith, and Daniele Paolo Scarpazza. 2019. Dissecting the NVidia Turing T4 GPU via microbenchmarking. arXiv preprint arXiv:1903.07486 (2019)."},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3230543.3230574"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2018.2889042"},{"key":"e_1_3_2_2_42_1","volume-title":"ALBERT: A Lite BERT for Self-supervised Learning of Language Representations. arXiv preprint arXiv:1909.11942","author":"Lan Zhenzhong","year":"2019","unstructured":"Zhenzhong Lan , Mingda Chen , Sebastian Goodman , Kevin Gimpel , Piyush Sharma , and Radu Soricut . 2019 . ALBERT: A Lite BERT for Self-supervised Learning of Language Representations. arXiv preprint arXiv:1909.11942 (2019). Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, and Radu Soricut. 2019. ALBERT: A Lite BERT for Self-supervised Learning of Language Representations. arXiv preprint arXiv:1909.11942 (2019)."},{"key":"e_1_3_2_2_43_1","volume-title":"Markus Weimer, and Matteo Interlandi.","author":"Lee Yunseong","year":"2018","unstructured":"Yunseong Lee , Alberto Scolari , Byung-Gon Chun , Marco Domenico Santambrogio , Markus Weimer, and Matteo Interlandi. 2018 . PRETZEL : Opening the black box of machine learning prediction serving systems. In USENIX OSDI , 2018. Yunseong Lee, Alberto Scolari, Byung-Gon Chun, Marco Domenico Santambrogio, Markus Weimer, and Matteo Interlandi. 2018. PRETZEL: Opening the black box of machine learning prediction serving systems. In USENIX OSDI, 2018."},{"key":"e_1_3_2_2_44_1","volume-title":"Meta-sgd: Learning to learn quickly for few-shot learning. arXiv preprint arXiv:1707.09835","author":"Li Zhenguo","year":"2017","unstructured":"Zhenguo Li , Fengwei Zhou , Fei Chen , and Hang Li . 2017 . Meta-sgd: Learning to learn quickly for few-shot learning. arXiv preprint arXiv:1707.09835 (2017). Zhenguo Li, Fengwei Zhou, Fei Chen, and Hang Li. 2017. Meta-sgd: Learning to learn quickly for few-shot learning. arXiv preprint arXiv:1707.09835 (2017)."},{"key":"e_1_3_2_2_45_1","volume-title":"Christine Cheng, Cody Coleman, Greg Diamos, David Kanter, Paulius Micikevicius, David Patterson, Guenther Schmuelling, Hanlin Tang, et al.","author":"Mattson Peter","year":"2020","unstructured":"Peter Mattson , Vijay Janapa Reddi , Christine Cheng, Cody Coleman, Greg Diamos, David Kanter, Paulius Micikevicius, David Patterson, Guenther Schmuelling, Hanlin Tang, et al. 2020 . MLPerf: An industry standard benchmark suite for machine learning performance. IEEE Micro , 2020 40, 2 (2020), 8--16. Peter Mattson, Vijay Janapa Reddi, Christine Cheng, Cody Coleman, Greg Diamos, David Kanter, Paulius Micikevicius, David Patterson, Guenther Schmuelling, Hanlin Tang, et al. 2020. MLPerf: An industry standard benchmark suite for machine learning performance. IEEE Micro, 2020 40, 2 (2020), 8--16."},{"key":"e_1_3_2_2_46_1","volume-title":"Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781","author":"Mikolov Tomas","year":"2013","unstructured":"Tomas Mikolov , Kai Chen , Greg Corrado , and Jeffrey Dean . 2013. Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781 ( 2013 ). Tomas Mikolov, Kai Chen, Greg Corrado, and Jeffrey Dean. 2013. Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781 (2013)."},{"key":"e_1_3_2_2_47_1","volume-title":"Learning search spaces for bayesian optimization: Another view of hyperparameter transfer learning. arXiv preprint arXiv:1909.12552","author":"Perrone Valerio","year":"2019","unstructured":"Valerio Perrone , Huibin Shen , Matthias Seeger , Cedric Archambeau , and Rodolphe Jenatton . 2019. Learning search spaces for bayesian optimization: Another view of hyperparameter transfer learning. arXiv preprint arXiv:1909.12552 ( 2019 ). Valerio Perrone, Huibin Shen, Matthias Seeger, Cedric Archambeau, and Rodolphe Jenatton. 2019. Learning search spaces for bayesian optimization: Another view of hyperparameter transfer learning. arXiv preprint arXiv:1909.12552 (2019)."},{"key":"e_1_3_2_2_48_1","article-title":"Scanner: Efficient video analysis at scale","volume":"2018","author":"Poms Alex","year":"2018","unstructured":"Alex Poms , Will Crichton , Pat Hanrahan , and Kayvon Fatahalian . 2018 . Scanner: Efficient video analysis at scale . ACM Trans. Graph. , 2018 37, 4 (2018). Alex Poms, Will Crichton, Pat Hanrahan, and Kayvon Fatahalian. 2018. Scanner: Efficient video analysis at scale. ACM Trans. Graph., 2018 37, 4 (2018).","journal-title":"ACM Trans. Graph."},{"key":"e_1_3_2_2_49_1","volume-title":"Proc. PMLR ICML","author":"Rana Santu","year":"2017","unstructured":"Santu Rana , Cheng Li , Sunil Gupta , Vu Nguyen , and Svetha Venkatesh . 2017 . High dimensional Bayesian optimization with elastic Gaussian process . In Proc. PMLR ICML , 2017. Santu Rana, Cheng Li, Sunil Gupta, Vu Nguyen, and Svetha Venkatesh. 2017. High dimensional Bayesian optimization with elastic Gaussian process. In Proc. PMLR ICML, 2017."},{"key":"e_1_3_2_2_50_1","volume-title":"Proc. USENIX ATC","author":"Romero Francisco","year":"2021","unstructured":"Francisco Romero , Qian Li , Neeraja J Yadwadkar , and Christos Kozyrakis . 2021 . INFaaS: Automated Model-less Inference Serving . In Proc. USENIX ATC , 2021. Francisco Romero, Qian Li, Neeraja J Yadwadkar, and Christos Kozyrakis. 2021. INFaaS: Automated Model-less Inference Serving. In Proc. USENIX ATC, 2021."},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3342195.3387524"},{"key":"e_1_3_2_2_52_1","volume-title":"CUDA by example: an introduction to general-purpose GPU programming","author":"Sanders Jason","unstructured":"Jason Sanders and Edward Kandrot . 2010. CUDA by example: an introduction to general-purpose GPU programming . Addison-Wesley Professional . Jason Sanders and Edward Kandrot. 2010. CUDA by example: an introduction to general-purpose GPU programming. Addison-Wesley Professional."},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00474"},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2014.2303136"},{"key":"e_1_3_2_2_55_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman . 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 ( 2014 ). Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_2_56_1","volume-title":"Practical bayesian optimization of machine learning algorithms. arXiv preprint arXiv:1206.2944","author":"Snoek Jasper","year":"2012","unstructured":"Jasper Snoek , Hugo Larochelle , and Ryan P Adams . 2012. Practical bayesian optimization of machine learning algorithms. arXiv preprint arXiv:1206.2944 ( 2012 ). Jasper Snoek, Hugo Larochelle, and Ryan P Adams. 2012. Practical bayesian optimization of machine learning algorithms. arXiv preprint arXiv:1206.2944 (2012)."},{"key":"e_1_3_2_2_57_1","unstructured":"Richard Socher. 2014. Recursive deep learning for natural language processing and computer vision. Citeseer.  Richard Socher. 2014. Recursive deep learning for natural language processing and computer vision. Citeseer."},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00293"},{"key":"e_1_3_2_2_59_1","volume-title":"International Conference on Machine Learning. PMLR.","author":"Tan Mingxing","year":"2019","unstructured":"Mingxing Tan and Quoc Le . 2019 . Efficientnet: Rethinking model scaling for convolutional neural networks . In International Conference on Machine Learning. PMLR. Mingxing Tan and Quoc Le. 2019. Efficientnet: Rethinking model scaling for convolutional neural networks. In International Conference on Machine Learning. PMLR."},{"key":"e_1_3_2_2_60_1","volume-title":"Proc. PMLR ICCV","author":"Teshima Takeshi","year":"2020","unstructured":"Takeshi Teshima , Issei Sato , and Masashi Sugiyama . 2020 . Few-shot domain adaptation by causal mechanism transfer . In Proc. PMLR ICCV , 2020. Takeshi Teshima, Issei Sato, and Masashi Sugiyama. 2020. Few-shot domain adaptation by causal mechanism transfer. In Proc. PMLR ICCV, 2020."},{"key":"e_1_3_2_2_61_1","volume-title":"Proc. USENIX NSDI","author":"Venkataraman Shivaram","year":"2016","unstructured":"Shivaram Venkataraman , Zongheng Yang , Michael Franklin , Benjamin Recht , and Ion Stoica . 2016 . Ernest: Efficient performance prediction for large-scale advanced analytics . In Proc. USENIX NSDI , 2016. Shivaram Venkataraman, Zongheng Yang, Michael Franklin, Benjamin Recht, and Ion Stoica. 2016. Ernest: Efficient performance prediction for large-scale advanced analytics. In Proc. USENIX NSDI, 2016."},{"key":"e_1_3_2_2_62_1","volume-title":"Beng Chin Ooi, Jie Shao, and Moaz Reyad.","author":"Wang Wei","year":"2018","unstructured":"Wei Wang , Jinyang Gao , Meihui Zhang , Sheng Wang , Gang Chen , Teck Khim Ng , Beng Chin Ooi, Jie Shao, and Moaz Reyad. 2018 . Rafiki: machine learning as an analytics service system. VLDB Endowment , 2018 12, 2 (2018). Wei Wang, Jinyang Gao, Meihui Zhang, Sheng Wang, Gang Chen, Teck Khim Ng, Beng Chin Ooi, Jie Shao, and Moaz Reyad. 2018. Rafiki: machine learning as an analytics service system. VLDB Endowment, 2018 12, 2 (2018)."},{"key":"e_1_3_2_2_63_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46466-4_37"},{"key":"e_1_3_2_2_64_1","volume-title":"Proc. ACM\/IEEE ISCA","author":"Wang Zhenning","year":"2017","unstructured":"Zhenning Wang , Jun Yang , Rami Melhem , Bruce Childers , Youtao Zhang , and Minyi Guo . 2017 . Quality of service support for finegrained sharing on GPUs . In Proc. ACM\/IEEE ISCA , 2017. Zhenning Wang, Jun Yang, Rami Melhem, Bruce Childers, Youtao Zhang, and Minyi Guo. 2017. Quality of service support for finegrained sharing on GPUs. In Proc. ACM\/IEEE ISCA, 2017."},{"key":"e_1_3_2_2_65_1","volume-title":"Proc. IJCAI","author":"Wang Ziyu","year":"2013","unstructured":"Ziyu Wang , Masrour Zoghi , Frank Hutter , David Matheson , Nando De Freitas , 2013 . Bayesian Optimization in High Dimensions via Random Embeddings .. In Proc. IJCAI , 2013. Ziyu Wang, Masrour Zoghi, Frank Hutter, David Matheson, Nando De Freitas, et al. 2013. Bayesian Optimization in High Dimensions via Random Embeddings.. In Proc. IJCAI, 2013."},{"key":"e_1_3_2_2_66_1","volume-title":"Proc. USENIX NSDI","author":"Weng Qizhen","year":"2022","unstructured":"Qizhen Weng , Wencong Xiao , Yinghao Yu , Wei Wang , Chen Wang , Jian He , Yong Li , Liping Zhang , Wei Lin , and Yu Ding . 2022 . MLaaS in the Wild: Workload Analysis and Scheduling in Large Heterogeneous GPU Clusters . In Proc. USENIX NSDI , 2022. Qizhen Weng, Wencong Xiao, Yinghao Yu, Wei Wang, Chen Wang, Jian He, Yong Li, Liping Zhang, Wei Lin, and Yu Ding. 2022. MLaaS in the Wild: Workload Analysis and Scheduling in Large Heterogeneous GPU Clusters. In Proc. USENIX NSDI, 2022."},{"key":"e_1_3_2_2_67_1","doi-asserted-by":"publisher","DOI":"10.1145\/3127479.3131614"},{"key":"e_1_3_2_2_68_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2016.25"},{"key":"e_1_3_2_2_69_1","volume-title":"Salus: Fine-grained gpu sharing primitives for deep learning applications. arXiv preprint arXiv:1902.04610","author":"Yu Peifeng","year":"2019","unstructured":"Peifeng Yu and Mosharaf Chowdhury . 2019 . Salus: Fine-grained gpu sharing primitives for deep learning applications. arXiv preprint arXiv:1902.04610 (2019). Peifeng Yu and Mosharaf Chowdhury. 2019. Salus: Fine-grained gpu sharing primitives for deep learning applications. arXiv preprint arXiv:1902.04610 (2019)."},{"key":"e_1_3_2_2_70_1","volume-title":"SLO-Aware Machine Learning Inference Serving. In Proc. USENIX ATC","author":"Zhang Chengliang","year":"2019","unstructured":"Chengliang Zhang , Minchen Yu , Wei Wang , and Feng Yan . 2019 . MArk: Exploiting Cloud Services for Cost-Effective , SLO-Aware Machine Learning Inference Serving. In Proc. USENIX ATC , 2019. Chengliang Zhang, Minchen Yu, Wei Wang, and Feng Yan. 2019. MArk: Exploiting Cloud Services for Cost-Effective, SLO-Aware Machine Learning Inference Serving. In Proc. USENIX ATC, 2019."},{"key":"e_1_3_2_2_71_1","volume-title":"Neural architecture search with reinforcement learning. arXiv preprint arXiv:1611.01578","author":"Zoph Barret","year":"2016","unstructured":"Barret Zoph and Quoc V Le. 2016. Neural architecture search with reinforcement learning. arXiv preprint arXiv:1611.01578 ( 2016 ). Barret Zoph and Quoc V Le. 2016. Neural architecture search with reinforcement learning. arXiv preprint arXiv:1611.01578 (2016)."},{"key":"e_1_3_2_2_72_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00907"},{"key":"e_1_3_2_2_73_1","unstructured":"Corey Zumar. 2018. InferLine: ML Inference Pipeline Composition Framework. (2018).  Corey Zumar. 2018. InferLine: ML Inference Pipeline Composition Framework. (2018)."}],"event":{"name":"SoCC '21: ACM Symposium on Cloud Computing","location":"Seattle WA USA","acronym":"SoCC '21","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the ACM Symposium on Cloud Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3472883.3486987","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3472883.3486987","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:11:57Z","timestamp":1750191117000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3472883.3486987"}},"subtitle":["Fast, Near-Optimal Auto-Configuration for Cloud-Native Model Serving"],"short-title":[],"issued":{"date-parts":[[2021,11]]},"references-count":73,"alternative-id":["10.1145\/3472883.3486987","10.1145\/3472883"],"URL":"https:\/\/doi.org\/10.1145\/3472883.3486987","relation":{},"subject":[],"published":{"date-parts":[[2021,11]]},"assertion":[{"value":"2021-11-01","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}