{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T19:10:01Z","timestamp":1755976201756,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":19,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,12,11]],"date-time":"2023-12-11T00:00:00Z","timestamp":1702252800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,12,11]]},"DOI":"10.1145\/3631295.3631401","type":"proceedings-article","created":{"date-parts":[[2023,11,24]],"date-time":"2023-11-24T12:05:18Z","timestamp":1700827518000},"page":"48-52","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["On Serving Image Classification Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4398-0243","authenticated-orcid":false,"given":"Aurora","family":"Gonz\u00e1lez-Vidal","sequence":"first","affiliation":[{"name":"University of Murcia, Murcia, Spain"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0153-7251","authenticated-orcid":false,"given":"Alexander","family":"Isenko","sequence":"additional","affiliation":[{"name":"Technical University of Munich, Munich, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5382-276X","authenticated-orcid":false,"given":"K. R.","family":"Jayaram","sequence":"additional","affiliation":[{"name":"IBM Research, Yorktown Heights, NY, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,12,11]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.14778\/3547305.3547313"},{"key":"e_1_3_2_1_2_1","volume-title":"2022 USENIX Annual Technical Conference (USENIX ATC 22)","author":"Choi Seungbeom","year":"2022","unstructured":"Seungbeom Choi, Sunho Lee, Yeonjae Kim, Jongse Park, Youngjin Kwon, and Jaehyuk Huh. 2022. Serving heterogeneous machine learning models on Multi-GPU servers with Spatio-Temporal sharing. In 2022 USENIX Annual Technical Conference (USENIX ATC 22). 199--216."},{"key":"e_1_3_2_1_3_1","volume-title":"14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17)","author":"Crankshaw Daniel","year":"2017","unstructured":"Daniel Crankshaw, Xin Wang, Guilio Zhou, Michael J Franklin, Joseph E Gonzalez, and Ion Stoica. 2017. Clipper: A Low-Latency online prediction serving system. In 14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17). 613--627."},{"key":"e_1_3_2_1_4_1","volume-title":"Selection-inference: Exploiting large language models for interpretable logical reasoning. arXiv preprint arXiv:2205.09712","author":"Creswell Antonia","year":"2022","unstructured":"Antonia Creswell, Murray Shanahan, and Irina Higgins. 2022. Selection-inference: Exploiting large language models for interpretable logical reasoning. arXiv preprint arXiv:2205.09712 (2022)."},{"key":"e_1_3_2_1_5_1","volume-title":"PriMed: Private federated training and encrypted inference on medical images in healthcare. Expert Systems","author":"Gopalakrishnan Aparna","year":"2022","unstructured":"Aparna Gopalakrishnan, Narayan P Kulkarni, Chethan B Raghavendra, Raghavendra Manjappa, Prasad Honnavalli, and Sivaraman Eswaran. 2022. PriMed: Private federated training and encrypted inference on medical images in healthcare. Expert Systems (2022), e13283."},{"key":"e_1_3_2_1_6_1","volume-title":"large minibatch sgd: Training imagenet in 1 hour. arXiv preprint arXiv:1706.02677","author":"Goyal Priya","year":"2017","unstructured":"Priya Goyal, Piotr Doll\u00e1r, Ross Girshick, Pieter Noordhuis, Lukasz Wesolowski, Aapo Kyrola, Andrew Tulloch, Yangqing Jia, and Kaiming He. 2017. Accurate, large minibatch sgd: Training imagenet in 1 hour. arXiv preprint arXiv:1706.02677 (2017)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3135974.3135993"},{"key":"e_1_3_2_1_8_1","volume-title":"14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Gujarati Arpan","year":"2020","unstructured":"Arpan Gujarati, Reza Karimi, Safya Alzayat, Wei Hao, Antoine Kaufmann, Ymir Vigfusson, and Jonathan Mace. 2020. Serving DNNs like clockwork: Performance predictability from the bottom up. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20). 443--462."},{"key":"e_1_3_2_1_9_1","unstructured":"Alexander Isenko. 2023. Basic Hardware Monitor. https:\/\/github.com\/cirquit\/pyhardware-monitor"},{"volume-title":"Advances in Computer","author":"Kochura Yuriy","key":"e_1_3_2_1_10_1","unstructured":"Yuriy Kochura, Yuri Gordienko, Vlad Taran, Nikita Gordienko, Alexandr Rokovyi, Oleg Alienin, and Sergii Stirenko. 2020. Batch size influence on performance of graphic and tensor processing units during training and inference phases. In Advances in Computer Science for Engineering and Education II. Springer, 658--668."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC.2019.00022"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE55515.2023.00082"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jafr.2022.100308"},{"key":"e_1_3_2_1_14_1","volume-title":"2021 USENIX Annual Technical Conference (USENIX ATC 21)","author":"Romero Francisco","year":"2021","unstructured":"Francisco Romero, Qian Li, Neeraja J Yadwadkar, and Christos Kozyrakis. 2021. INFaaS: Automated model-less inference serving. In 2021 USENIX Annual Technical Conference (USENIX ATC 21). 397--411."},{"key":"e_1_3_2_1_15_1","volume-title":"International conference on machine learning. PMLR, 6105--6114","author":"Tan Mingxing","year":"2019","unstructured":"Mingxing Tan and Quoc Le. 2019. Efficientnet: Rethinking model scaling for convolutional neural networks. In International conference on machine learning. PMLR, 6105--6114."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/SMARTCOMP52413.2021.00021"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472883.3486987"},{"key":"e_1_3_2_1_18_1","volume-title":"SLO-Aware Machine Learning Inference Serving. In 2019 USENIX Annual Technical Conference (USENIX ATC 19)","author":"Zhang Chengliang","year":"2019","unstructured":"Chengliang Zhang, Minchen Yu, Wei Wang, and Feng Yan. 2019. MArk: Exploiting Cloud Services for Cost-Effective, SLO-Aware Machine Learning Inference Serving. In 2019 USENIX Annual Technical Conference (USENIX ATC 19). 1049--1062."},{"key":"e_1_3_2_1_19_1","volume-title":"SHEPHERD: Serving DNNs in the Wild. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23)","author":"Zhang Hong","year":"2023","unstructured":"Hong Zhang, Yupeng Tang, Anurag Khandelwal, and Ion Stoica. 2023. SHEPHERD: Serving DNNs in the Wild. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23). 787--808."}],"event":{"name":"Middleware '23: 24th International Middleware Conference","sponsor":["ACM Association for Computing Machinery","IFIP International Federation for Information Processing"],"location":"Bologna Italy","acronym":"Middleware '23"},"container-title":["Proceedings of the 9th International Workshop on Serverless Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3631295.3631401","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3631295.3631401","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T18:40:44Z","timestamp":1755974444000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3631295.3631401"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,11]]},"references-count":19,"alternative-id":["10.1145\/3631295.3631401","10.1145\/3631295"],"URL":"https:\/\/doi.org\/10.1145\/3631295.3631401","relation":{},"subject":[],"published":{"date-parts":[[2023,12,11]]},"assertion":[{"value":"2023-12-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}