{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T11:54:21Z","timestamp":1774958061285,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":79,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,8,7]],"date-time":"2023-08-07T00:00:00Z","timestamp":1691366400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"United States Air Force Research Laboratory","award":["FA8750-19- 2-1000"],"award-info":[{"award-number":["FA8750-19- 2-1000"]}]},{"name":"Assistant Secretary of Defense for Research and Engineering","award":["FA8702-15-D-0001"],"award-info":[{"award-number":["FA8702-15-D-0001"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,8,7]]},"DOI":"10.1145\/3588195.3592997","type":"proceedings-article","created":{"date-parts":[[2023,8,7]],"date-time":"2023-08-07T20:47:00Z","timestamp":1691441220000},"page":"3-16","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["Kairos: Building Cost-Efficient Machine Learning Inference Systems with Heterogeneous Cloud Resources"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9778-1023","authenticated-orcid":false,"given":"Baolin","family":"Li","sequence":"first","affiliation":[{"name":"Northeastern University, Boston, MA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-4937-6054","authenticated-orcid":false,"given":"Siddharth","family":"Samsi","sequence":"additional","affiliation":[{"name":"MIT Lincoln Laboratory, Lexington, MA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4598-2808","authenticated-orcid":false,"given":"Vijay","family":"Gadepally","sequence":"additional","affiliation":[{"name":"MIT Lincoln Laboratory, Lexington, MA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7253-2458","authenticated-orcid":false,"given":"Devesh","family":"Tiwari","sequence":"additional","affiliation":[{"name":"Northeastern University, Boston, MA, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,8,7]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"1049","volume-title":"Mark: Exploiting cloud services for cost-effective, slo-aware machine learning inference serving. In 2019 {USENIX} Annual Technical Conference ({USENIX} {ATC} 19)","author":"Zhang Chengliang","year":"2019","unstructured":"Chengliang Zhang, Minchen Yu, Wei Wang, and Feng Yan. Mark: Exploiting cloud services for cost-effective, slo-aware machine learning inference serving. In 2019 {USENIX} Annual Technical Conference ({USENIX} {ATC} 19), pages 1049--1062, 2019."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00047"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00059"},{"key":"e_1_3_2_1_4_1","first-page":"613","volume-title":"14th {USENIX} Symposium on Networked Systems Design and Implementation ({NSDI} 17)","author":"Crankshaw Daniel","year":"2017","unstructured":"Daniel Crankshaw, XinWang, Guilio Zhou, Michael J Franklin, Joseph E Gonzalez, and Ion Stoica. Clipper: A low-latency online prediction serving system. In 14th {USENIX} Symposium on Networked Systems Design and Implementation ({NSDI} 17), pages 613--627, 2017."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421285"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3330345.3330384"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00049"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00073"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472883.3486987"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/2499368.2451125"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/2901318.2901355"},{"key":"e_1_3_2_1_12_1","first-page":"629","volume-title":"International Conference on Machine Learning","author":"Banerjee Subho","year":"2020","unstructured":"Subho Banerjee, Saurabh Jha, Zbigniew Kalbarczyk, and Ravishankar Iyer. Inductive-bias-driven reinforcement learning for efficient schedules in heterogeneous clusters. In International Conference on Machine Learning, pages 629--641. PMLR, 2020."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTAS.2018.00028"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTSS46320.2019.00042"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472883.3486993"},{"key":"e_1_3_2_1_16_1","first-page":"1","volume-title":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","author":"Li Baolin","year":"2021","unstructured":"Baolin Li, Rohan Basu Roy, Tirthak Patel, Vijay Gadepally, Karen Gettings, and Devesh Tiwari. Ribbon: cost-effective and qos-aware deep learning model inference using a diverse pool of cloud computing instances. In Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, pages 1--13, 2021."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00084"},{"key":"e_1_3_2_1_18_1","first-page":"443","volume-title":"14th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 20)","author":"Gujarati Arpan","year":"2020","unstructured":"Arpan Gujarati, Reza Karimi, Safya Alzayat, Wei Hao, Antoine Kaufmann, Ymir Vigfusson, and Jonathan Mace. Serving dnns like clockwork: Performance predictability from the bottom up. In 14th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 20), pages 443--462, 2020."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00025"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155650"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/2644865.2541941"},{"key":"e_1_3_2_1_22_1","first-page":"499","volume-title":"Hawk: Hybrid datacenter scheduling. In 2015 {USENIX} Annual Technical Conference ({USENIX} {ATC} 15)","author":"Delgado Pamela","year":"2015","unstructured":"Pamela Delgado, Florin Dinu, Anne-Marie Kermarrec, and Willy Zwaenepoel. Hawk: Hybrid datacenter scheduling. In 2015 {USENIX} Annual Technical Conference ({USENIX} {ATC} 15), pages 499--510, 2015."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2749475"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2017.13"},{"key":"e_1_3_2_1_25_1","first-page":"481","volume-title":"14th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 20)","author":"Narayanan Deepak","year":"2020","unstructured":"Deepak Narayanan, Keshav Santhanam, Fiodar Kazhamiaka, Amar Phanishayee, and Matei Zaharia. Heterogeneity-aware cluster scheduling policies for deep learning workloads. In 14th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 20), pages 481--498, 2020."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2012.6237019"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2015.2398438"},{"key":"e_1_3_2_1_28_1","first-page":"1","article-title":"Shopping for a cloud made easy","volume":"10","author":"Li Ang","year":"2010","unstructured":"Ang Li, Xiaowei Yang, Ming Zhang, and S Kandula. Cloudcmp: Shopping for a cloud made easy. HotCloud, 10:1--7, 2010.","journal-title":"HotCloud"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3185768.3186286"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/2987550.2987556"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3127479.3131614"},{"key":"e_1_3_2_1_32_1","first-page":"329","volume-title":"10th {USENIX} Symposium on Networked Systems Design and Implementation ({NSDI} 13)","author":"Xu Yunjing","year":"2013","unstructured":"Yunjing Xu, Zachary Musgrave, Brian Noble, and Michael Bailey. Bobtail: Avoiding long tails in the cloud. In 10th {USENIX} Symposium on Networked Systems Design and Implementation ({NSDI} 13), pages 329--341, 2013."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/UCC.2018.00019"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3431920.3439304"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3445814.3446693"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3542929.3563510"},{"key":"e_1_3_2_1_37_1","first-page":"937","volume-title":"14th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 20)","author":"Mai Luo","year":"2020","unstructured":"Luo Mai, Guo Li, Marcel Wagenl\u00e4nder, Konstantinos Fertakis, Andrei-Octavian Brabete, and Peter Pietzuch. Kungfu: Making training in distributed machine learning adaptive. In 14th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 20), pages 937--954, 2020."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3127479.3129262"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00090"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTAS48715.2020.00-20"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTAS.2019.00033"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3302424.3303958"},{"key":"e_1_3_2_1_43_1","first-page":"353","volume-title":"Accurate learning for energy and timeliness. In 2020 {USENIX} Annual Technical Conference ({USENIX} {ATC} 20)","author":"Wan Chengcheng","year":"2020","unstructured":"Chengcheng Wan, Muhammad Santriaji, Eri Rogers, Henry Hoffmann, Michael Maire, and Shan Lu. {ALERT}: Accurate learning for energy and timeliness. In 2020 {USENIX} Annual Technical Conference ({USENIX} {ATC} 20), pages 353--369, 2020."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476143"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472883.3486972"},{"key":"e_1_3_2_1_46_1","first-page":"397","volume-title":"2021 USENIX Annual Technical Conference (USENIX ATC 21)","author":"Romero Francisco","year":"2021","unstructured":"Francisco Romero, Qian Li, Neeraja J Yadwadkar, and Christos Kozyrakis. {INFaaS}: Automated model-less inference serving. In 2021 USENIX Annual Technical Conference (USENIX ATC 21), pages 397--411, 2021."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-94-015-7744-1_2"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2021.102815"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/71.993206"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/100216.100262"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF02278710"},{"key":"e_1_3_2_1_52_1","volume-title":"The hungarian method for the assignment problem. Naval research logistics quarterly, 2(1--2):83--97","author":"Kuhn Harold W","year":"1955","unstructured":"Harold W Kuhn. The hungarian method for the assignment problem. Naval research logistics quarterly, 2(1--2):83--97, 1955."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAES.2016.140952"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA53966.2022.00020"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-00296-0_5"},{"key":"e_1_3_2_1_56_1","volume-title":"Approximation algorithms for scheduling unrelated parallel machines. Mathematical programming, 46: 259--271","author":"Lenstra Jan Karel","year":"1990","unstructured":"Jan Karel Lenstra, David B Shmoys, and \u00c9va Tardos. Approximation algorithms for scheduling unrelated parallel machines. Mathematical programming, 46: 259--271, 1990."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/1060590.1060639"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/REAL.1996.563715"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/800175.809851"},{"key":"e_1_3_2_1_60_1","volume-title":"A survey of techniques for approximate computing. ACM Computing Surveys (CSUR), 48(4):1--33","author":"Mittal Sparsh","year":"2016","unstructured":"Sparsh Mittal. A survey of techniques for approximate computing. ACM Computing Surveys (CSUR), 48(4):1--33, 2016."},{"key":"e_1_3_2_1_61_1","first-page":"012015","volume-title":"Journal of Physics: Conference Series","volume":"1361","author":"Nainggolan Rena","unstructured":"Rena Nainggolan, Resianta Perangin-angin, Emma Simarmata, and Astuti Feriani Tarigan. Improved the performance of the k-means cluster using the sum of squared error (sse) optimized by using the elbow method. In Journal of Physics: Conference Series, volume 1361, page 012015. IOP Publishing, 2019."},{"key":"e_1_3_2_1_62_1","unstructured":"Nvidia triton inference server. URL https:\/\/docs.nvidia.com\/deeplearning\/tritoninference-server\/."},{"key":"e_1_3_2_1_63_1","unstructured":"grpc. URL https:\/\/grpc.io\/."},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477132.3483588"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.5555\/3433701.3433758"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00025"},{"key":"e_1_3_2_1_67_1","unstructured":"Maxim Naumov Dheevatsa Mudigere Hao-Jun Michael Shi Jianyu Huang Narayanan Sundaraman Jongsoo Park Xiaodong Wang Udit Gupta Carole-Jean Wu Alisson G Azzolini et al. Deep learning recommendation model for personalization and recommendation systems. arXiv preprint arXiv:1906.00091 2019."},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366424.3382692"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC50251.2020.00024"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3038912.3052569"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988450.2988454"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1145\/3298689.3346997"},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33015941"},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304013"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00045"},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2016.7581261"},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1145\/2851141.2851151"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.52842\/conf.caadria.2016.177"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1109\/IRI.2017.47"}],"event":{"name":"HPDC '23: The 32nd International Symposium on High-Performance Parallel and Distributed Computing","location":"Orlando FL USA","acronym":"HPDC '23","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 32nd International Symposium on High-Performance Parallel and Distributed Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3588195.3592997","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3588195.3592997","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:47:25Z","timestamp":1750178845000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3588195.3592997"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,7]]},"references-count":79,"alternative-id":["10.1145\/3588195.3592997","10.1145\/3588195"],"URL":"https:\/\/doi.org\/10.1145\/3588195.3592997","relation":{},"subject":[],"published":{"date-parts":[[2023,8,7]]},"assertion":[{"value":"2023-08-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}