{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T01:16:25Z","timestamp":1772846185132,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":56,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,5,9]],"date-time":"2023-05-09T00:00:00Z","timestamp":1683590400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100004344","name":"Adobe Systems","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100004344","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2211302, 2211888, 2213636, 2105494"],"award-info":[{"award-number":["2211302, 2211888, 2213636, 2105494"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"name":"US Army Contract","award":["W911NF-17-2-0196"],"award-info":[{"award-number":["W911NF-17-2-0196"]}]},{"DOI":"10.13039\/100008536","name":"Amazon Web Services","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100008536","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,5,9]]},"DOI":"10.1145\/3576842.3582375","type":"proceedings-article","created":{"date-parts":[[2023,4,26]],"date-time":"2023-04-26T22:58:08Z","timestamp":1682549888000},"page":"209-221","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":12,"title":["D\u011blen: Enabling Flexible and Adaptive Model-serving for Multi-tenant Edge AI"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4702-5689","authenticated-orcid":false,"given":"Qianlin","family":"Liang","sequence":"first","affiliation":[{"name":"Manning College of Information &amp; Computer Sciences, University of Massachusetts Amherst, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5765-8194","authenticated-orcid":false,"given":"Walid A.","family":"Hanafy","sequence":"additional","affiliation":[{"name":"Manning College of Information &amp; Computer Sciences, University of Massachusetts Amherst, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9304-910X","authenticated-orcid":false,"given":"Noman","family":"Bashir","sequence":"additional","affiliation":[{"name":"Manning College of Information &amp; Computer Sciences, University of Massachusetts Amherst, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2774-9284","authenticated-orcid":false,"given":"Ahmed","family":"Ali-Eldin","sequence":"additional","affiliation":[{"name":"Manning College of Information &amp; Computer Sciences, University of Massachusetts Amherst, USA and Department of Computer Science and Engineering, Chalmers University of Technology, Sweden"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1722-4927","authenticated-orcid":false,"given":"David","family":"Irwin","sequence":"additional","affiliation":[{"name":"Manning College of Information &amp; Computer Sciences, University of Massachusetts Amherst, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5435-1901","authenticated-orcid":false,"given":"Prashant","family":"Shenoy","sequence":"additional","affiliation":[{"name":"Manning College of Information &amp; Computer Sciences, University of Massachusetts Amherst, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,5,9]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Mart\u00edn Abadi Ashish Agarwal Paul Barham Eugene Brevdo Zhifeng Chen Craig Citro Greg\u00a0S. Corrado Andy Davis Jeffrey Dean Matthieu Devin Sanjay Ghemawat Ian Goodfellow Andrew Harp Geoffrey Irving Michael Isard Yangqing Jia Rafal Jozefowicz Lukasz Kaiser Manjunath Kudlur Josh Levenberg Dandelion Man\u00e9 Rajat Monga Sherry Moore Derek Murray Chris Olah Mike Schuster Jonathon Shlens Benoit Steiner Ilya Sutskever Kunal Talwar Paul Tucker Vincent Vanhoucke Vijay Vasudevan Fernanda Vi\u00e9gas Oriol Vinyals Pete Warden Martin Wattenberg Martin Wicke Yuan Yu and Xiaoqiang Zheng. 2015. TensorFlow: Large-Scale Machine Learning on Heterogeneous Systems. https:\/\/www.tensorflow.org\/ Software available from tensorflow.org."},{"key":"e_1_3_2_1_2_1","volume-title":"Abdel rahman Mohamed, and Michael Auli","author":"Baevski Alexei","year":"2020","unstructured":"Alexei Baevski, H. Zhou, Abdel rahman Mohamed, and Michael Auli. 2020. wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations. ArXiv abs\/2006.11477 (2020)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2015.10"},{"key":"e_1_3_2_1_4_1","volume-title":"Proceedings of the 34th International Conference on Machine Learning -","volume":"70","author":"Bolukbasi Tolga","year":"2017","unstructured":"Tolga Bolukbasi, Joseph Wang, Ofer Dekel, and Venkatesh Saligrama. 2017. Adaptive Neural Networks for Efficient Inference. In Proceedings of the 34th International Conference on Machine Learning - Volume 70 (Sydney, NSW, Australia) (ICML\u201917). JMLR.org, 527\u2013536."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10599-4_29"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307334.3326071"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSPEC.2019.8701189"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421285"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_10_1","unstructured":"Open Neural\u00a0Network Exchange. 2021. ONNX model zoo. https:\/\/github.com\/onnx\/models"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3241539.3241559"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASAP.2018.8445101"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2002.1022259"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/319151.319155"},{"key":"e_1_3_2_1_15_1","volume-title":"18th USENIX Symposium on Networked Systems Design and Implementation (NSDI 21)","author":"Guo Peizhen","year":"2021","unstructured":"Peizhen Guo, Bo Hu, and Wenjun Hu. 2021. Mistify: Automating DNN Model Porting for On-Device Inference at the Edge. In 18th USENIX Symposium on Networked Systems Design and Implementation (NSDI 21). USENIX Association, 705\u2013719. https:\/\/www.usenix.org\/conference\/nsdi21\/presentation\/guo"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"M. Halpern B. Boroujerdian T. Mummert E. Duesterwald and V. Reddi. 2019. One Size Does Not Fit All: Quantifying and Exposing the Accuracy-latency Trade-off in Machine Learning Cloud Service APIs via Tolerance Tiers. In ISPASS.","DOI":"10.1109\/ISPASS.2019.00012"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447555.3465326"},{"key":"e_1_3_2_1_18_1","volume-title":"Deep Residual Learning for Image Recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","author":"He Kaiming","year":"2016","unstructured":"Kaiming He, X. Zhang, Shaoqing Ren, and Jian Sun. 2016. Deep Residual Learning for Image Recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016), 770\u2013778."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3450268.3453521"},{"key":"e_1_3_2_1_20_1","volume-title":"Multi-Scale Dense Convolutional Networks for Efficient Prediction. ArXiv abs\/1703.09844","author":"Huang Gao","year":"2017","unstructured":"Gao Huang, Danlu Chen, T. Li, Felix Wu, L.\u00a0V.\u00a0D. Maaten, and Kilian\u00a0Q. Weinberger. 2017. Multi-Scale Dense Convolutional Networks for Efficient Prediction. ArXiv abs\/1703.09844 (2017)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3081333.3081360"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366636"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2018.2857338"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1057\/palgrave.jors.2600523"},{"key":"e_1_3_2_1_25_1","volume-title":"Kingma and Jimmy Ba","author":"P.","year":"2015","unstructured":"Diederik\u00a0P. Kingma and Jimmy Ba. 2015. Adam: A Method for Stochastic Optimization. CoRR abs\/1412.6980 (2015)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3400302.3415698"},{"key":"e_1_3_2_1_27_1","volume-title":"PETZEL: Opening the Black Box of Machine Learning Prediction Serving Systems. In OSDI.","author":"Lee Y.","year":"2018","unstructured":"Y. Lee, A. Scolari, B. Chun, M. Santambrogio, M. Weimer, and M. Interlandi. 2018. PETZEL: Opening the Black Box of Machine Learning Prediction Serving Systems. In OSDI."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2019.2946140"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1023\/B:OPTE.0000048538.35456.45"},{"key":"e_1_3_2_1_30_1","volume-title":"Proc. Chi, Vol.\u00a02007","author":"Mellis David","year":"2007","unstructured":"David Mellis, Massimo Banzi, David Cuartielles, and Tom Igoe. 2007. Arduino: An open electronic prototyping platform. In Proc. Chi, Vol.\u00a02007. 1\u201311."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPSN.2018.00051"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3384419.3430782"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"Dushyanth Narayanan and Mahadev Satyanarayanan. 2003. Predictive Resource Management for Wearable Computing. In MobiSys \u201903.","DOI":"10.1145\/1066116.1189041"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/268998.266708"},{"key":"e_1_3_2_1_35_1","volume-title":"Retrieved","year":"2020","unstructured":"Nvidia. 2020. NVIDIA Jetson Modules. Retrieved October 19, 2020 from https:\/\/developer.nvidia.com\/embedded\/jetson-modules"},{"key":"e_1_3_2_1_36_1","volume-title":"Automation Test in Europe Conference Exhibition (DATE). 475\u2013480","author":"Panda Priyadarshini","year":"2016","unstructured":"Priyadarshini Panda, Abhronil Sengupta, and Kaushik Roy. 2016. Conditional Deep Learning for energy-efficient and enhanced pattern recognition. In 2016 Design, Automation Test in Europe Conference Exhibition (DATE). 475\u2013480."},{"key":"e_1_3_2_1_37_1","volume-title":"PyTorch: An Imperative Style","author":"Paszke Adam","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In Advances in Neural Information Processing Systems 32, H.\u00a0Wallach, H.\u00a0Larochelle, A.\u00a0Beygelzimer, F.\u00a0d'Alch\u00e9-Buc, E.\u00a0Fox, and R.\u00a0Garnett (Eds.). Curran Associates, Inc., 8024\u20138035."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.91"},{"key":"e_1_3_2_1_39_1","volume-title":"INFaaS: Automated Model-less Inference Serving. In 2021 USENIX Annual Technical Conference (USENIX ATC 21)","author":"Romero Francisco","year":"2021","unstructured":"Francisco Romero, Qian Li, Neeraja\u00a0J. Yadwadkar, and Christos Kozyrakis. 2021. INFaaS: Automated Model-less Inference Serving. In 2021 USENIX Annual Technical Conference (USENIX ATC 21). USENIX Association, 397\u2013411. https:\/\/www.usenix.org\/conference\/atc21\/presentation\/romero"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2019.2911878"},{"key":"e_1_3_2_1_41_1","volume-title":"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks. ArXiv abs\/1905.11946","author":"Tan Mingxing","year":"2019","unstructured":"Mingxing Tan and Quoc\u00a0V. Le. 2019. EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks. ArXiv abs\/1905.11946 (2019)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3412382.3458272"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3211332.3211336"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2016.7900006"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/RoboSoft48309.2020.9116060"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/MCOM.1986.1092946"},{"key":"e_1_3_2_1_47_1","volume-title":"ALERT: Accurate Learning for Energy and Timeliness. In 2020 USENIX Annual Technical Conference (USENIX ATC 20)","author":"Wan Chengcheng","year":"2020","unstructured":"Chengcheng Wan, Muhammad Santriaji, Eri Rogers, Henry Hoffmann, Michael Maire, and Shan Lu. 2020. ALERT: Accurate Learning for Energy and Timeliness. In 2020 USENIX Annual Technical Conference (USENIX ATC 20). USENIX Association, 353\u2013369."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3318216.3363308"},{"key":"e_1_3_2_1_49_1","volume-title":"Speech Commands: A Dataset for Limited-Vocabulary Speech Recognition. ArXiv abs\/1804.03209","author":"Warden Pete","year":"2018","unstructured":"Pete Warden. 2018. Speech Commands: A Dataset for Limited-Vocabulary Speech Recognition. ArXiv abs\/1804.03209 (2018)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3386901.3388917"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411029.3411035"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3386901.3388948"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3372224.3419185"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3372224.3380881"},{"key":"e_1_3_2_1_55_1","unstructured":"C. Zhang M. Yu W. Wang and F. Yan. 2019. Exploiting Cloud Services for Cost-Effective SLO-Aware Machine Learning Inference. In USENIX ATC."},{"key":"e_1_3_2_1_56_1","unstructured":"J. Zhang S. Elnikety S. Zarar A. Gupta and S. Garg. 2020. Model-Switching: Dealing with Fluctuating Workloads in Machine-Learning-as-a-Service Systems. In HotCloud."}],"event":{"name":"IoTDI '23: International Conference on Internet-of-Things Design and Implementation","location":"San Antonio TX USA","acronym":"IoTDI '23","sponsor":["SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the 8th ACM\/IEEE Conference on Internet of Things Design and Implementation"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3576842.3582375","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3576842.3582375","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3576842.3582375","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T18:08:58Z","timestamp":1750183738000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3576842.3582375"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,9]]},"references-count":56,"alternative-id":["10.1145\/3576842.3582375","10.1145\/3576842"],"URL":"https:\/\/doi.org\/10.1145\/3576842.3582375","relation":{},"subject":[],"published":{"date-parts":[[2023,5,9]]},"assertion":[{"value":"2023-05-09","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}