{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T07:51:30Z","timestamp":1780473090432,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":54,"publisher":"ACM","funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2213636, 2105494, 2211302, 2211888, 2325956, 23091241,19250001"],"award-info":[{"award-number":["2213636, 2105494, 2211302, 2211888, 2325956, 23091241,19250001"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100008428","name":"Department of Energy and Climate Change","doi-asserted-by":"publisher","award":["DE-EE0010143"],"award-info":[{"award-number":["DE-EE0010143"]}],"id":[{"id":"10.13039\/100008428","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006754","name":"Army Research Laboratory","doi-asserted-by":"publisher","award":["W911NF-17-2-0196"],"award-info":[{"award-number":["W911NF-17-2-0196"]}],"id":[{"id":"10.13039\/100006754","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,19]]},"DOI":"10.1145\/3772052.3772243","type":"proceedings-article","created":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T16:19:00Z","timestamp":1768321140000},"page":"416-429","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["FailLite: Failure-Resilient Model Serving for Resource-Constrained Edge Environments"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7115-1517","authenticated-orcid":false,"given":"Li","family":"Wu","sequence":"first","affiliation":[{"name":"University of Massachusetts Amherst, Amherst, Massachusetts, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5765-8194","authenticated-orcid":false,"given":"Walid","family":"Hanafy","sequence":"additional","affiliation":[{"name":"University of Massachusetts Amherst, Amherst, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3883-7220","authenticated-orcid":false,"given":"Tarek","family":"Abdelzaher","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana-Champaign, Champaign, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1722-4927","authenticated-orcid":false,"given":"David","family":"Irwin","sequence":"additional","affiliation":[{"name":"University of Massachusetts Amherst, Amherst, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4937-8912","authenticated-orcid":false,"given":"Jesse","family":"Milzman","sequence":"additional","affiliation":[{"name":"Army Research Laboratory, New York, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5435-1901","authenticated-orcid":false,"given":"Prashant","family":"Shenoy","sequence":"additional","affiliation":[{"name":"University of Massachusetts Amherst, Amherst, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,1,13]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3617232.3624849"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3625549.3658688"},{"key":"e_1_3_2_1_3_1","unstructured":"Amazon Web Services. 2025. Amazon SageMaker. https:\/\/aws.amazon.com\/sagemaker\/ Accessed: 2025-04-09."},{"key":"e_1_3_2_1_4_1","volume-title":"Neuralpower: Predict and Deploy Energy-efficient Convolutional Neural Networks. In Asian Conference on Machine Learning.","author":"Cai Ermao","year":"2017","unstructured":"Ermao Cai, Da-Cheng Juan, Dimitrios Stamoulis, and Diana Marculescu. 2017. Neuralpower: Predict and Deploy Energy-efficient Convolutional Neural Networks. In Asian Conference on Machine Learning."},{"key":"e_1_3_2_1_5_1","volume-title":"Clipper: A Low-Latency Online Prediction Serving System. In 14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17)","author":"Crankshaw Daniel","year":"2017","unstructured":"Daniel Crankshaw, Xin Wang, Guilio Zhou, Michael J. Franklin, Joseph E. Gonzalez, and Ion Stoica. 2017. Clipper: A Low-Latency Online Prediction Serving System. In 14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17). USENIX Association, Boston, MA, 613\u2013627. https:\/\/www.usenix.org\/conference\/nsdi17\/technical-sessions\/presentation\/crankshaw"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/JAS.2019.1911564"},{"key":"e_1_3_2_1_7_1","volume-title":"14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Gujarati Arpan","year":"2020","unstructured":"Arpan Gujarati, Reza Karimi, Safya Alzayat, Wei Hao, Antoine Kaufmann, Ymir Vigfusson, and Jonathan Mace. 2020. Serving DNNs like Clockwork: Performance Predictability from the Bottom Up. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20). USENIX Association, 443\u2013462. https:\/\/www.usenix.org\/conference\/osdi20\/presentation\/gujarati"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTSS.2018.00052"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2019.00012"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/MILCOM58377.2023.10356302"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447555.3465326"},{"key":"e_1_3_2_1_12_1","volume-title":"Proceedings of IEEE\/ACM 31st International Symposium on Quality of Service (IWQoS'23)","author":"Hanafy Walid A.","year":"2023","unstructured":"Walid A. Hanafy, Limin Wang, Hyunseok Chang, Sarit Mukherjee, T. V. Lakshman, and Prashant Shenoy. 2023. Understanding the Benefits of Hardware-Accelerated Communication in Model-Serving Applications. In Proceedings of IEEE\/ACM 31st International Symposium on Quality of Service (IWQoS'23)."},{"key":"e_1_3_2_1_13_1","volume-title":"Proceedings of the Conference on Design, Automation & Test in Europe","author":"Hashemi Soheil","year":"2017","unstructured":"Soheil Hashemi, Nicholas Anthony, Hokchhay Tann, R. Iris Bahar, and Sherief Reda. 2017. Understanding the Impact of Precision Quantization on the Accuracy and Energy of Neural Networks. In Proceedings of the Conference on Design, Automation & Test in Europe (Lausanne, Switzerland) (DATE '17). European Design and Automation Association, Leuven, BEL, 1478\u20131483."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589105"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3372224.3419215"},{"key":"e_1_3_2_1_17_1","volume-title":"Predicting the Computational Cost of Deep Learning Models. In 2018 IEEE International Conference on Big Data (Big Data).","author":"Justus Daniel","year":"2018","unstructured":"Daniel Justus, John Brennan, Stephen Bonner, and Andrew Stephen McGough. 2018. Predicting the Computational Cost of Deep Learning Models. In 2018 IEEE International Conference on Big Data (Big Data)."},{"key":"e_1_3_2_1_18_1","volume-title":"Fault-Tolerant Systems","author":"Koren Israel","unstructured":"Israel Koren and C. Mani Krishna. 2021. Fault-Tolerant Systems (second edition ed.). Morgan Kaufmann."},{"key":"e_1_3_2_1_19_1","volume-title":"Kubeflow: The Machine Learning Toolkit for Kubernetes. https:\/\/www.kubeflow.org\/ Accessed: 2025-04-14.","year":"2025","unstructured":"Kubeflow. 2025. Kubeflow: The Machine Learning Toolkit for Kubernetes. https:\/\/www.kubeflow.org\/ Accessed: 2025-04-14."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607034"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2019.2946140"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3126908.3126964"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCCN61486.2024.10637580"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3582080"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3576842.3582375"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC50251.2020.00023"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM41043.2020.9155389"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5924"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/EDGE55608.2022.00029"},{"key":"e_1_3_2_1_30_1","volume-title":"Defcon: Preventing Overload with Graceful Feature Degradation. In 17th USENIX Symposium on Operating Systems Design and Implementation (OSDI 23)","author":"Meza Justin J.","year":"2023","unstructured":"Justin J. Meza, Thote Gowda, Ahmed Eid, Tomiwa Ijaware, Dmitry Chernyshev, Yi Yu, Md Nazim Uddin, Rohan Das, Chad Nachiappan, Sari Tran, Shuyang Shi, Tina Luo, David Ke Hong, Sankaralingam Panneerselvam, Hans Ragas, Svetlin Manavski, Weidong Wang, and Francois Richard. 2023. Defcon: Preventing Overload with Graceful Feature Degradation. In 17th USENIX Symposium on Operating Systems Design and Implementation (OSDI 23). USENIX Association, Boston, MA, 607\u2013622. https:\/\/www.usenix.org\/conference\/osdi23\/presentation\/meza"},{"key":"e_1_3_2_1_31_1","unstructured":"NVIDIA. 2024. Triton Inference Server. https:\/\/developer.nvidia.com\/triton-inference- server Accessed: 2025-04-13."},{"key":"e_1_3_2_1_32_1","unstructured":"PyTorch. 2024. TorchVision Models and pre-trained weights. https:\/\/pytorch.org\/vision\/stable\/models.html Accessed: 2025-04-14."},{"key":"e_1_3_2_1_33_1","volume-title":"Paleo: A Performance Model for Deep Neural Networks. In The International Conference on Learning Representations (ICLR '17)","author":"Sparks Evan R.","unstructured":"Qi, Evan R. Sparks, and Ameet S. Talwalkar. 2017. Paleo: A Performance Model for Deep Neural Networks. In The International Conference on Learning Representations (ICLR '17)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"Joseph Redmon Santosh Divvala Ross Girshick and Ali Farhadi. 2016. You Only Look Once: Unified Real-Time Object Detection. arXiv:1506.02640 [cs.CV] https:\/\/arxiv.org\/abs\/1506.02640","DOI":"10.1109\/CVPR.2016.91"},{"key":"e_1_3_2_1_35_1","unstructured":"Dillon Reis Jordan Kupec Jacqueline Hong and Ahmad Daoudi. 2024. Real-Time Flying Object Detection with YOLOv8. arXiv:2305.09972 [cs.CV] https:\/\/arxiv.org\/abs\/2305.09972"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366626.3368131"},{"key":"e_1_3_2_1_37_1","volume-title":"Proceedings of the 2nd International Workshop on Challenges in Artificial Intelligence and Machine Learning for Internet of Things","author":"Samplawski Colin","unstructured":"Colin Samplawski, Jin Huang, Deepak Ganesan, and Benjamin M. Marlin. 2020. Towards Objection Detection Under IoT Resource Constraints: Combining Partitioning, Slicing and Compression. In Proceedings of the 2nd International Workshop on Challenges in Artificial Intelligence and Machine Learning for Internet of Things (Virtual Event, Japan) (AIChallengeIoT '20). Association for Computing Machinery, New York, NY, USA, 14\u201320."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2017.9"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/MPRV.2009.82"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3446382.3448360"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359658"},{"key":"e_1_3_2_1_42_1","volume-title":"2019 USENIX Conference on Operational Machine Learning (OpML 19)","author":"Soifer Jonathan","year":"2019","unstructured":"Jonathan Soifer, Jason Li, Mingqin Li, Jeffrey Zhu, Yingnan Li, Yuxiong He, Elton Zheng, Adi Oltean, Maya Mosyak, Chris Barnes, Thomas Liu, and Junhua Wang. 2019. Deep Learning Inference Service at Microsoft. In 2019 USENIX Conference on Operational Machine Learning (OpML 19). Santa Clara, CA, 15\u201317."},{"key":"e_1_3_2_1_43_1","volume-title":"Le","author":"Tan Mingxing","year":"2020","unstructured":"Mingxing Tan and Quoc V. Le. 2020. EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks. arXiv:1905.11946 [cs.LG] https:\/\/arxiv.org\/abs\/1905.11946"},{"key":"e_1_3_2_1_44_1","volume-title":"ALERT: Accurate Learning for Energy and Timeliness. In 2020 USENIX Annual Technical Conference (USENIX ATC 20)","author":"Wan Chengcheng","year":"2020","unstructured":"Chengcheng Wan, Muhammad Santriaji, Eri Rogers, Henry Hoffmann, Michael Maire, and Shan Lu. 2020. ALERT: Accurate Learning for Energy and Timeliness. In 2020 USENIX Annual Technical Conference (USENIX ATC 20). USENIX Association, 353\u2013369."},{"key":"e_1_3_2_1_45_1","volume-title":"2024 USENIX Annual Technical Conference (USENIX ATC 24)","author":"Xiong Yifan","year":"2024","unstructured":"Yifan Xiong, Yuting Jiang, Ziyue Yang, Lei Qu, Guoshuai Zhao, Shuguang Liu, Dong Zhong, Boris Pinzur, Jie Zhang, Yang Wang, et al. 2024. {SuperBench}: Improving Cloud {AI} Infrastructure Reliability with Proactive Validation. In 2024 USENIX Annual Technical Conference (USENIX ATC 24). 835\u2013850."},{"key":"e_1_3_2_1_46_1","volume-title":"Proceedings of the first international workshop on challenges in artificial intelligence and machine learning for internet of things. 25\u201331","author":"Yousefpour Ashkan","year":"2019","unstructured":"Ashkan Yousefpour, Siddartha Devic, Brian Q Nguyen, Aboudy Kreidieh, Alan Liao, Alexandre M Bayen, and Jason P Jue. 2019. Guardians of the deep fog: Failure-resilient DNN inference from edge to cloud. In Proceedings of the first international workshop on challenges in artificial intelligence and machine learning for internet of things. 25\u201331."},{"key":"e_1_3_2_1_47_1","volume-title":"Resilinet: Failure-resilient inference in distributed neural networks. arXiv preprint arXiv:2002.07386","author":"Yousefpour Ashkan","year":"2020","unstructured":"Ashkan Yousefpour, Brian Q Nguyen, Siddartha Devic, Guanhua Wang, Aboudy Kreidieh, Hans Lobel, Alexandre M Bayen, and Jason P Jue. 2020. Resilinet: Failure-resilient inference in distributed neural networks. arXiv preprint arXiv:2002.07386 (2020)."},{"key":"e_1_3_2_1_48_1","unstructured":"Jiahui Yu Linjie Yang Ning Xu Jianchao Yang and Thomas Huang. 2018. Slimmable Neural Networks. arXiv:1812.08928 [cs.CV]"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00071"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCC.2020.3006751"},{"key":"e_1_3_2_1_51_1","volume-title":"Model-Switching: Dealing with Fluctuating Workloads in Machine-Learning-asa-Service Systems. In 12th USENIX Workshop on Hot Topics in Cloud Computing (HotCloud 20)","author":"Zhang Jeff","year":"2020","unstructured":"Jeff Zhang, Sameh Elnikety, Shuayb Zarar, Atul Gupta, and Siddharth Garg. 2020. Model-Switching: Dealing with Fluctuating Workloads in Machine-Learning-asa-Service Systems. In 12th USENIX Workshop on Hot Topics in Cloud Computing (HotCloud 20). USENIX Association. https:\/\/www.usenix.org\/conference\/hotcloud20\/presentation\/zhang"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2021.3058532"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2019.8737478"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3576914.3587511"}],"event":{"name":"SoCC '25: ACM Symposium on Cloud Computing","location":"Online USA","acronym":"SoCC '25","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems","SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the 2025 ACM Symposium on Cloud Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3772052.3772243","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T16:26:11Z","timestamp":1768321571000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3772052.3772243"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,19]]},"references-count":54,"alternative-id":["10.1145\/3772052.3772243","10.1145\/3772052"],"URL":"https:\/\/doi.org\/10.1145\/3772052.3772243","relation":{},"subject":[],"published":{"date-parts":[[2025,11,19]]},"assertion":[{"value":"2026-01-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}