{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T11:40:02Z","timestamp":1755776402005,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":94,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,30]],"date-time":"2025-03-30T00:00:00Z","timestamp":1743292800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-sa\/4.0\/"}],"funder":[{"name":"IBM-Illinois Discovery Accelerator Institute","award":["107275"],"award-info":[{"award-number":["107275"]}]},{"name":"NSF CNS","award":["1908888"],"award-info":[{"award-number":["1908888"]}]},{"name":"NSF IIS","award":["1909577"],"award-info":[{"award-number":["1909577"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,30]]},"DOI":"10.1145\/3689031.3696071","type":"proceedings-article","created":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T06:25:20Z","timestamp":1742970320000},"page":"524-540","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["A House United Within Itself: SLO-Awareness for On-Premises Containerized ML Inference Clusters via Faro"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-4552-4660","authenticated-orcid":false,"given":"Beomyeol","family":"Jeon","sequence":"first","affiliation":[{"name":"University of Illinois, Urbana-Champaign, Urbana, Illinois, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0204-2362","authenticated-orcid":false,"given":"Chen","family":"Wang","sequence":"additional","affiliation":[{"name":"IBM Research, Yorktown Heights, New York, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2330-8986","authenticated-orcid":false,"given":"Diana","family":"Arroyo","sequence":"additional","affiliation":[{"name":"IBM Research, Yorktown Heights, New York, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5346-7331","authenticated-orcid":false,"given":"Alaa","family":"Youssef","sequence":"additional","affiliation":[{"name":"IBM Research, Yorktown Heights, New York, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9372-5937","authenticated-orcid":false,"given":"Indranil","family":"Gupta","sequence":"additional","affiliation":[{"name":"University of Illinois, Urbana-Champaign, Urbana, Illinois, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,3,30]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330701"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00073"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPDC.2006.1652143"},{"key":"e_1_3_2_1_4_1","volume-title":"2nd USENIX Workshop on Hot Topics in Management of Internet, Cloud, and Enterprise Networks and Services (Hot-ICE 12)","author":"Baset Salman A.","year":"2012","unstructured":"Salman A. Baset, Long Wang, and Chunqiang Tang. 2012. Towards an Understanding of Oversubscription in Cloud. In 2nd USENIX Workshop on Hot Topics in Management of Internet, Cloud, and Enterprise Networks and Services (Hot-ICE 12). USENIX Association, San Jose, CA. https:\/\/www.usenix.org\/conference\/hot-ice12\/workshop-program\/presentation\/baset"},{"key":"e_1_3_2_1_5_1","volume-title":"Proceedings of 17th USENIX Symposium on Operating Systems Design and Implementation (OSDI 23)","author":"Bhardwaj Romil","year":"2023","unstructured":"Romil Bhardwaj, Kirthevasan Kandasamy, Asim Biswal, Wenshuo Guo, Benjamin Hindman, Joseph Gonzalez, Michael Jordan, and Ion Stoica. 2023. Cilantro: Performance-Aware Resource Allocation for General Objectives via Online Feedback. In Proceedings of 17th USENIX Symposium on Operating Systems Design and Implementation (OSDI 23). USENIX Association, Boston, MA, 623--643. https:\/\/www.usenix.org\/conference\/osdi23\/presentation\/bhardwaj"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/IC2E.2019.00-10"},{"key":"e_1_3_2_1_7_1","volume-title":"Apollo: Scalable and Coordinated Scheduling for Cloud-Scale Computing. In 11th USENIX Symposium on Operating Systems Design and Implementation (OSDI 14)","author":"Boutin Eric","year":"2014","unstructured":"Eric Boutin, Jaliya Ekanayake, Wei Lin, Bing Shi, Jingren Zhou, Zhengping Qian, Ming Wu, and Lidong Zhou. 2014. Apollo: Scalable and Coordinated Scheduling for Cloud-Scale Computing. In 11th USENIX Symposium on Operating Systems Design and Implementation (OSDI 14). USENIX Association, Broomfield, CO, 285--300. https:\/\/www.usenix.org\/conference\/osdi14\/technical-sessions\/presentation\/boutin"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i6.25854"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3342195.3387555"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3342195.3387555"},{"key":"e_1_3_2_1_11_1","volume-title":"Proceedings of 2022 Machine Learning and Systems (MLSys 22)","volume":"4","author":"Cho Junguk","year":"2022","unstructured":"Junguk Cho, Diman Zad Tootaghaj, Lianjie Cao, and Puneet Sharma. 2022. SLA-Driven ML Inference Framework for Clouds with Heterogeneous Accelerators. In Proceedings of 2022 Machine Learning and Systems (MLSys 22), D. Marculescu, Y. Chi, and C. Wu (Eds.), Vol. 4. Santa Clara, CA, USA, 20--32. https:\/\/proceedings.mlsys.org\/paper\/2022\/file\/0777d5c17d4066b82ab86dff8a46af6f-Paper.pdf"},{"volume-title":"Internetworking with TCP\/IP","author":"Comer D.","key":"e_1_3_2_1_12_1","unstructured":"D. Comer. 2000. Internetworking with TCP\/IP. Prentice Hall, Upper Saddle River, NJ, USA."},{"key":"e_1_3_2_1_13_1","volume-title":"Proceedings of 14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17)","author":"Crankshaw Daniel","year":"2017","unstructured":"Daniel Crankshaw, Xin Wang, Guilio Zhou, Michael J. Franklin, Joseph E. Gonzalez, and Ion Stoica. 2017. Clipper: A Low-Latency Online Prediction Serving System. In Proceedings of 14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17). USENIX Association, Boston, MA, 613--627. https:\/\/www.usenix.org\/conference\/nsdi17\/technical-sessions\/presentation\/crankshaw"},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of the 27th International Conference on Neural Information Processing Systems -","volume":"2","author":"Dauphin Yann N.","year":"2014","unstructured":"Yann N. Dauphin, Razvan Pascanu, Caglar Gulcehre, Kyunghyun Cho, Surya Ganguli, and Yoshua Bengio. 2014. Identifying and Attacking the Saddle Point Problem in High-Dimensional Non-Convex Optimization. In Proceedings of the 27th International Conference on Neural Information Processing Systems - Volume 2 (NIPS 14). MIT Press, Montreal, Canada, 2933--2941."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2499368.2451125"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541941"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3344341.3368805"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2168836.2168847"},{"key":"e_1_3_2_1_19_1","unstructured":"Cloud Native Computing Foundation. 2024. Kubernetes. https:\/\/kubernetes.io\/"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-"},{"key":"e_1_3_2_1_21_1","volume-title":"Democratizing Content Publication with Coral. In First Symposium on Networked Systems Design and Implementation (NSDI 04)","author":"Freedman Michael","year":"2004","unstructured":"Michael Freedman, Eric Freudenthal, and David Mazi\u00e8res. 2004. Democratizing Content Publication with Coral. In First Symposium on Networked Systems Design and Implementation (NSDI 04). USENIX Association, San Francisco, CA. https:\/\/www.usenix.org\/conference\/nsdi-04\/democratizing-content-publication-coral"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.tcs.2009"},{"key":"e_1_3_2_1_23_1","volume-title":"Proceedings of 18th USENIX Symposium on Networked Systems Design and Implementation (NSDI 21)","author":"Fu Silvery","year":"2021","unstructured":"Silvery Fu, Saurabh Gupta, Radhika Mittal, and Sylvia Ratnasamy. 2021. On the Use of ML for Blackbox System Performance Prediction. In Proceedings of 18th USENIX Symposium on Networked Systems Design and Implementation (NSDI 21). USENIX Association, Virtual Event, 763--784. https:\/\/www.usenix.org\/conference\/nsdi21\/presentation\/fu"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/2382553.2382556"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-statistics-062713-085831"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/2619239.2626334"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/1592568.1592576"},{"volume-title":"Proceedings of the 18th ACM\/IFIP\/USENIX Middleware Conference (Middleware 17)","author":"Gujarati Arpan","key":"e_1_3_2_1_28_1","unstructured":"Arpan Gujarati, Sameh Elnikety, Yuxiong He, Kathryn S. McKinley, and Bj\u00f6rn B. Brandenburg. 2017. Swayam: Distributed Autoscaling to Meet SLAs of Machine Learning Inference Services with Resource Efficiency. In Proceedings of the 18th ACM\/IFIP\/USENIX Middleware Conference (Middleware 17). Association for Computing Machinery, Las Vegas, Nevada, USA, 109--120."},{"key":"e_1_3_2_1_29_1","volume-title":"Proceedings of the 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Gujarati Arpan","year":"2020","unstructured":"Arpan Gujarati, Reza Karimi, Safya Alzayat, Wei Hao, Antoine Kaufmann, Ymir Vigfusson, and Jonathan Mace. 2020. Serving DNNs like Clockwork: Performance Predictability from the Bottom Up. In Proceedings of the 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20). USENIX Association, Virtual Event, 443--462."},{"volume-title":"Proceedings of the 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22)","author":"Gunasekaran Jashwant Raj","key":"e_1_3_2_1_30_1","unstructured":"Jashwant Raj Gunasekaran, Cyan Subhra Mishra, Prashanth Thinakaran, Bikash Sharma, Mahmut Taylan Kandemir, and Chita R. Das. 2022. Cocktail: A Multidimensional Optimization for Model Serving in Cloud. In Proceedings of the 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22). USENIX Association, Renton, WA, 1041--1057. https:\/\/www.usenix.org\/conference\/nsdi22\/presentation\/gunasekaran"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00084"},{"key":"e_1_3_2_1_32_1","unstructured":"Red Hat. 2024. Red Hat OpenShift. https:\/\/www.redhat.com\/en\/technologies\/cloud-computing\/openshift"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_34_1","first-page":"1","article-title":"Darts: User-Friendly Modern Machine Learning for Time Series","volume":"23","author":"Herzen Julien","year":"2022","unstructured":"Julien Herzen, Francesco L\u00e4ssig, Samuele Giuliano Piazzetta, Thomas Neuer, L\u00e9o Tafti, Guillaume Raille, Tomas Van Pottelbergh, Marek Pasieka, Andrzej Skrodzki, Nicolas Huguenin, Maxime Dumonal, Jan Ko\u015bcisz, Dennis Bader, Fr\u00e9d\u00e9rick Gusset, Mounir Benheddi, Camila Williamson, Michal Kosinski, Matej Petrik, and Ga\u00ebl Grosch. 2022. Darts: User-Friendly Modern Machine Learning for Time Series. Journal of Machine Learning Research 23, 124 (2022), 1--6. http:\/\/jmlr.org\/papers\/v23\/21-1177.html","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_35_1","unstructured":"Xinyu Hu Olcay Cirit Tanmay Binaykiya and Ramit Hora. 2022. DeepETA: How Uber Predicts Arrival Times Using Deep Learning https:\/\/www.uber.com\/blog\/deepeta-how-uber-predicts-arrival-times\/."},{"key":"e_1_3_2_1_36_1","unstructured":"IBM. 2024. IBM Cloud VPC Solutions. https:\/\/www.ibm.com\/cloud\/vpc"},{"key":"e_1_3_2_1_37_1","unstructured":"IBM. 2024. Service Level Agreements (SLAs) for IBM Cloud. https:\/\/cloud.ibm.com\/docs\/overview?topic=overview-slas"},{"key":"e_1_3_2_1_38_1","volume-title":"Morpheus: Towards Automated SLOs for Enterprise Clusters. In 12th USENIX Symposium on Operating Systems Design and Implementation (OSDI 16)","author":"Jyothi Sangeetha Abdu","year":"2016","unstructured":"Sangeetha Abdu Jyothi, Carlo Curino, Ishai Menache, Shravan Matthur Narayanamurthy, Alexey Tumanov, Jonathan Yaniv, Ruslan Mavlyutov, Inigo Goiri, Subru Krishnan, Janardhan Kulkarni, and Sriram Rao. 2016. Morpheus: Towards Automated SLOs for Enterprise Clusters. In 12th USENIX Symposium on Operating Systems Design and Implementation (OSDI 16). USENIX Association, Savannah, GA, 117--134. https:\/\/www.usenix.org\/conference\/osdi16\/technical-sessions\/presentation\/jyothi"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3267809.3267832"},{"key":"e_1_3_2_1_40_1","unstructured":"Kirthevasan Kandasamy Gur-Eyal Sela Joseph E Gonzalez Michael I Jordan and Ion Stoica. 2020. Online Learning Demands in Max-min Fairness. arXiv:2012.08648 [stat.ML]"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/30.1-2.81"},{"key":"e_1_3_2_1_42_1","unstructured":"Dieter Kraft. 1988. A software package for sequential quadratic programming Ein Software-Paket zur sequentiellen quadratischen Optimierung Forschungsbericht. Deutsche Forschungs- und Versuchsanstalt f\u00fcr Luft- und Raumfahrt DFVLR. Technical Report. Institut f\u00fcr Dynamik der Flugsysteme Deutsche Forschungs- und Versuchsanstalt f\u00fcr Luft- und Raumfahrt DFVLR Oberpfaffenhofen K\u00f6ln. https:\/\/www.tib.eu\/de\/suchen\/id\/TIBKAT%3A016896521"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/2901318.2901351"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/2833157.2833162"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304044"},{"key":"e_1_3_2_1_46_1","volume-title":"Tune: A Research Platform for Distributed Model Selection and Training. arXiv:1807.05118 [cs.LG]","author":"Liaw Richard","year":"2018","unstructured":"Richard Liaw, Eric Liang, Robert Nishihara, Philipp Moritz, Joseph E. Gonzalez, and Ion Stoica. 2018. Tune: A Research Platform for Distributed Model Selection and Training. arXiv:1807.05118 [cs.LG]"},{"key":"e_1_3_2_1_47_1","volume-title":"Themis: Fair and Efficient GPU Cluster Scheduling. In 17th USENIX Symposium on Networked Systems Design and Implementation (NSDI 20)","author":"Mahajan Kshiteej","year":"2020","unstructured":"Kshiteej Mahajan, Arjun Balasubramanian, Arjun Singhvi, Shivaram Venkataraman, Aditya Akella, Amar Phanishayee, and Shuchi Chawla. 2020. Themis: Fair and Efficient GPU Cluster Scheduling. In 17th USENIX Symposium on Networked Systems Design and Implementation (NSDI 20). USENIX Association, Santa Clara, CA, 289--304. https:\/\/www.usenix.org\/conference\/nsdi20\/presentation\/mahajan"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1002\/(SICI)1099-131X(199705)16:3&lt;147::AID-FOR652&gt;3.0.CO;2-X"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3005745.3005750"},{"key":"e_1_3_2_1_50_1","unstructured":"Dave McCarthy. 2023. Xbox Releases Second Transparency Report Demonstrating the Integral Role of Proactive Content Moderation https:\/\/news.xbox.com\/en-us\/2023\/05\/22\/xbox-releases-second-transparency-report\/."},{"key":"e_1_3_2_1_51_1","volume-title":"Proceedings of the 10th International Conference on Autonomic Computing (ICAC 13)","author":"Mickulicz Nathan D.","year":"2013","unstructured":"Nathan D. Mickulicz, Priya Narasimhan, and Rajeev Gandhi. 2013. To Auto Scale or Not to Auto Scale. In Proceedings of the 10th International Conference on Autonomic Computing (ICAC 13). USENIX Association, San Jose, CA, 145--151. https:\/\/www.usenix.org\/conference\/icac13\/technical-sessions\/presentation\/mickulicz"},{"key":"e_1_3_2_1_52_1","volume-title":"Proceedings of the 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Moritz Philipp","year":"2018","unstructured":"Philipp Moritz, Robert Nishihara, Stephanie Wang, Alexey Tumanov, Richard Liaw, Eric Liang, Melih Elibol, Zongheng Yang, William Paul, Michael I. Jordan, and Ion Stoica. 2018. Ray: A Distributed Framework for Emerging AI Applications. In Proceedings of the 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18). USENIX Association, Carlsbad, CA, USA, 561--577. https:\/\/www.usenix.org\/conference\/osdi18\/presentation\/moritz"},{"key":"e_1_3_2_1_53_1","volume-title":"Heterogeneity-Aware Cluster Scheduling Policies for Deep Learning Workloads. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Narayanan Deepak","year":"2020","unstructured":"Deepak Narayanan, Keshav Santhanam, Fiodar Kazhamiaka, Amar Phanishayee, and Matei Zaharia. 2020. Heterogeneity-Aware Cluster Scheduling Policies for Deep Learning Workloads. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20). USENIX Association, 481--498."},{"key":"e_1_3_2_1_54_1","unstructured":"Netapp. 2023. A little more talk a little more action. https:\/\/www.netapp.com\/media\/57081-NA-644-0721-More-talk-more-action.pdf"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cageo.2022.105126"},{"key":"e_1_3_2_1_56_1","volume-title":"High-Performance ML Serving. In Workshop on ML Systems at NIPS","author":"Olston Christopher","year":"2017","unstructured":"Christopher Olston, Fangwei Li, Jeremiah Harmsen, Jordan Soyke, Kiril Gorovoy, Li Lao, Noah Fiedel, Sukriti Ramesh, and Vinu Rajashekhar. 2017. TensorFlow-Serving: Flexible, High-Performance ML Serving. In Workshop on ML Systems at NIPS 2017. Long Beach, CA, USA."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/2517349.2522716"},{"key":"e_1_3_2_1_58_1","unstructured":"Razvan Pascanu Yann N. Dauphin Surya Ganguli and Yoshua Bengio. 2014. On the saddle point problem for non-convex optimization. arXiv:1405.4604 [cs.LG]"},{"key":"e_1_3_2_1_59_1","volume-title":"Proceedings of the 33rd International Conference on Neural Information Processing Systems (NeurIPS 19)","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas K\u00f6pf, Edward Yang, Zach DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In Proceedings of the 33rd International Conference on Neural Information Processing Systems (NeurIPS 19). Curran Associates Inc., Vancouver, Canada, Article 721, 12 pages. http:\/\/papers.neurips.cc\/paper\/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3620666.3651329"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-94-015-8330-5_4"},{"key":"e_1_3_2_1_62_1","unstructured":"Mary K. Pratt. 2022. How Visa fights fraud. https:\/\/www.csoonline.com\/article\/573009\/how-visa-fights-fraud.html"},{"volume-title":"Pollux: Co-adaptive Cluster Scheduling for Goodput-Optimized Deep Learning. In 15th USENIX Symposium on Operating Systems Design and Implementation (OSDI 21)","author":"Qiao Aurick","key":"e_1_3_2_1_63_1","unstructured":"Aurick Qiao, Sang Keun Choe, Suhas Jayaram Subramanya, Willie Neiswanger, Qirong Ho, Hao Zhang, Gregory R. Ganger, and Eric P. Xing. 2021. Pollux: Co-adaptive Cluster Scheduling for Goodput-Optimized Deep Learning. In 15th USENIX Symposium on Operating Systems Design and Implementation (OSDI 21). USENIX Association, 1--18. https:\/\/www.usenix.org\/conference\/osdi21\/presentation\/qiao"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3406703"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00045"},{"key":"e_1_3_2_1_66_1","volume-title":"Proceedings of the 2021 USENIX Annual Technical Conference (USENIX ATC 21)","author":"Romero Francisco","year":"2021","unstructured":"Francisco Romero, Qian Li, Neeraja J. Yadwadkar, and Christos Kozyrakis. 2021. INFaaS: Automated Model-less Inference Serving. In Proceedings of the 2021 USENIX Annual Technical Conference (USENIX ATC 21). USENIX Association, Virtual Event, 397--411."},{"key":"e_1_3_2_1_67_1","volume-title":"Artificial Intelligence: A Modern Approach","author":"Russell Stuart","year":"2009","unstructured":"Stuart Russell and Peter Norvig. 2009. Artificial Intelligence: A Modern Approach (3rd ed.). Prentice Hall Press, USA.","edition":"3"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1145\/3342195.3387524"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/CCGrid51090.2021.00098"},{"key":"e_1_3_2_1_70_1","unstructured":"Amazon Web Services. 2024. AWS Service Level Agreements (SLAs). https:\/\/aws.amazon.com\/legal\/service-level-agreements"},{"key":"e_1_3_2_1_71_1","volume-title":"Proceedings of the 2020 USENIX Annual Technical Conference (USENIX ATC 20)","author":"Shahrad Mohammad","year":"2020","unstructured":"Mohammad Shahrad, Rodrigo Fonseca, Inigo Goiri, Gohar Chaudhry, Paul Batum, Jason Cooke, Eduardo Laureano, Colby Tresness, Mark Russinovich, and Ricardo Bianchini. 2020. Serverless in the Wild: Characterizing and Optimizing the Serverless Workload at a Large Cloud Provider. In Proceedings of the 2020 USENIX Annual Technical Conference (USENIX ATC 20). USENIX Association, Virtual Event, 205--218. https:\/\/www.usenix.org\/conference\/atc20\/presentation\/shahrad"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1145\/2038916.2038921"},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"crossref","unstructured":"J.F. Shortle J.M. Thompson D. Gross and C.M. Harris. 2018. Fundamentals of Queueing Theory. John Wiley & Sons Ltd Hoboken NJ USA.","DOI":"10.1002\/9781119453765"},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA.2018.00227"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1008202821328"},{"key":"e_1_3_2_1_76_1","unstructured":"Kubernetes Team. 2024. Kubernetes Horizontal Pod Autoscaler. https:\/\/kubernetes.io\/docs\/tasks\/run-application\/horizontal-pod-autoscale\/"},{"key":"e_1_3_2_1_77_1","unstructured":"Kubernetes Team. 2024. Resource Quotas. https:\/\/kubernetes.io\/docs\/concepts\/policy\/resource-quotas\/"},{"key":"e_1_3_2_1_78_1","unstructured":"Ray Team. 2024. Ray Serve Autoscaling. https:\/\/docs.ray.io\/en\/releases-2.0.0\/serve\/scaling-and-resource-allocation.html"},{"key":"e_1_3_2_1_79_1","volume-title":"Ray Serve: Scalable and Programmable Serving. https:\/\/docs.ray. io\/en\/releases-2.0.0\/serve","author":"Team Ray","year":"2024","unstructured":"Ray Team. 2024. Ray Serve: Scalable and Programmable Serving. https:\/\/docs.ray. io\/en\/releases-2.0.0\/serve"},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aeue.2005.11.008"},{"key":"e_1_3_2_1_81_1","unstructured":"Twitter. 2018. Twitter stream traces. https:\/\/archive.org\/details\/archiveteam-twitter-stream-2018-04"},{"key":"e_1_3_2_1_82_1","doi-asserted-by":"publisher","DOI":"10.1145\/2741948.2741964"},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41592-019-0686-2"},{"key":"e_1_3_2_1_84_1","doi-asserted-by":"publisher","DOI":"10.1145\/2391229.2391243"},{"key":"e_1_3_2_1_85_1","doi-asserted-by":"publisher","DOI":"10.1109\/WISA.2013.64"},{"key":"e_1_3_2_1_86_1","volume-title":"Proceedings of the 18th USENIX Symposium on Networked Systems Design and Implementation (NSDI 21)","author":"Wang Stephanie","year":"2021","unstructured":"Stephanie Wang, Eric Liang, Edward Oakes, Ben Hindman, Frank Sifei Luan, Audrey Cheng, and Ion Stoica. 2021. Ownership: A Distributed Futures System for Fine-Grained Tasks. In Proceedings of the 18th USENIX Symposium on Networked Systems Design and Implementation (NSDI 21). USENIX Association, Virtual Event, 671--686. https:\/\/www.usenix.org\/conference\/nsdi21\/presentation\/cheng"},{"key":"e_1_3_2_1_87_1","volume-title":"Proceedings of the 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22)","author":"Weng Qizhen","year":"2022","unstructured":"Qizhen Weng, Wencong Xiao, Yinghao Yu, Wei Wang, Cheng Wang, Jian He, Yong Li, Liping Zhang, Wei Lin, and Yu Ding. 2022. MLaaS in the Wild: Workload Analysis and Scheduling in Large-Scale Heterogeneous GPU Clusters. In Proceedings of the 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22). USENIX Association, Renton, WA, 945--960. https:\/\/www.usenix.org\/conference\/nsdi22\/presentation\/weng"},{"key":"e_1_3_2_1_88_1","doi-asserted-by":"publisher","DOI":"10.1002\/9780470455432.ch4"},{"key":"e_1_3_2_1_89_1","volume-title":"Gandiva: Introspective Cluster Scheduling for Deep Learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Xiao Wencong","year":"2018","unstructured":"Wencong Xiao, Romil Bhardwaj, Ramachandran Ramjee, Muthian Sivathanu, Nipun Kwatra, Zhenhua Han, Pratyush Patel, Xuan Peng, Hanyu Zhao, Quanlu Zhang, Fan Yang, and Lidong Zhou. 2018. Gandiva: Introspective Cluster Scheduling for Deep Learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18). USENIX Association, Carlsbad, CA, 595--610. https:\/\/www.usenix.org\/conference\/osdi18\/presentation\/xiao"},{"key":"e_1_3_2_1_90_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539063"},{"key":"e_1_3_2_1_91_1","volume-title":"Proceedings of the 2019 USENIX Annual Technical Conference (USENIX ATC 19)","author":"Zhang Chengliang","year":"2019","unstructured":"Chengliang Zhang, Minchen Yu, Wei Wang, and Feng Yan. 2019. MArk: Exploiting Cloud Services for Cost-Effective, SLO-Aware Machine Learning Inference Serving. In Proceedings of the 2019 USENIX Annual Technical Conference (USENIX ATC 19). USENIX Association, Renton, WA, 1049--1062. https:\/\/www.usenix.org\/conference\/atc19\/presentation\/zhang-chengliang"},{"volume-title":"Proceedings of the 14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17)","author":"Zhang Haoyu","key":"e_1_3_2_1_92_1","unstructured":"Haoyu Zhang, Ganesh Ananthanarayanan, Peter Bodik, Matthai Philipose, Paramvir Bahl, and Michael J. Freedman. 2017. Live Video Analytics at Scale with Approximation and Delay-Tolerance. In Proceedings of the 14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17). USENIX Association, Boston, MA, 377--392. https:\/\/www.usenix.org\/conference\/nsdi17\/technical-sessions\/presentation\/zhang"},{"key":"e_1_3_2_1_93_1","doi-asserted-by":"publisher","DOI":"10.1109\/CCGrid54584.2022.00026"},{"key":"e_1_3_2_1_94_1","volume-title":"Shockwave: Fair and Efficient Cluster Scheduling for Dynamic Adaptation in Machine Learning. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23)","author":"Zheng Pengfei","year":"2023","unstructured":"Pengfei Zheng, Rui Pan, Tarannum Khan, Shivaram Venkataraman, and Aditya Akella. 2023. Shockwave: Fair and Efficient Cluster Scheduling for Dynamic Adaptation in Machine Learning. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23). USENIX Association, Boston, MA, 703--723. https:\/\/www.usenix.org\/conference\/nsdi23\/presentation\/zheng"}],"event":{"name":"EuroSys '25: Twentieth European Conference on Computer Systems","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"],"location":"Rotterdam Netherlands","acronym":"EuroSys '25"},"container-title":["Proceedings of the Twentieth European Conference on Computer Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3689031.3696071","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3689031.3696071","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T11:18:03Z","timestamp":1755775083000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3689031.3696071"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,30]]},"references-count":94,"alternative-id":["10.1145\/3689031.3696071","10.1145\/3689031"],"URL":"https:\/\/doi.org\/10.1145\/3689031.3696071","relation":{},"subject":[],"published":{"date-parts":[[2025,3,30]]},"assertion":[{"value":"2025-03-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}