{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T07:50:24Z","timestamp":1767340224224,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":41,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,11,7]],"date-time":"2022-11-07T00:00:00Z","timestamp":1667779200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CNS-2008368"],"award-info":[{"award-number":["CNS-2008368"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,11,7]]},"DOI":"10.1145\/3528535.3565254","type":"proceedings-article","created":{"date-parts":[[2022,12,20]],"date-time":"2022-12-20T13:40:01Z","timestamp":1671543601000},"page":"322-334","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["MicroEdge"],"prefix":"10.1145","author":[{"given":"Difei","family":"Cao","sequence":"first","affiliation":[{"name":"Georgia Institute of Technology"}]},{"given":"Jinsun","family":"Yoo","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology"}]},{"given":"Zhuangdi","family":"Xu","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology"}]},{"given":"Enrique","family":"Saurez","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology"}]},{"given":"Harshit","family":"Gupta","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology"}]},{"given":"Tushar","family":"Krishna","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology"}]},{"given":"Umakishore","family":"Ramachandran","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology"}]}],"member":"320","published-online":{"date-parts":[[2022,11,8]]},"reference":[{"volume-title":"What Is Amazon SageMaker? Retrieved","year":"2022","key":"e_1_3_2_1_1_1","unstructured":"Amazon. 2022. What Is Amazon SageMaker? Retrieved October 3, 2022 from https:\/\/docs.aws.amazon.com\/sagemaker\/latest\/dg\/whatis.html"},{"key":"e_1_3_2_1_2_1","volume-title":"K3s: Lightweight Kubernetes. Retrieved","author":"Authors Project","year":"2022","unstructured":"K3s Project Authors. 2022. K3s: Lightweight Kubernetes. Retrieved October 3, 2022 from https:\/\/k3s.io\/"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.3390\/s19092048"},{"key":"e_1_3_2_1_4_1","volume-title":"Google Coral BodyPix. Retrieved","author":"Brooks Michael","year":"2022","unstructured":"Michael Brooks, Naveen-Dodda, and Peter Malkin. 2021. Google Coral BodyPix. Retrieved October 3, 2022 from https:\/\/github.com\/google-coral\/project-bodypix.git"},{"key":"e_1_3_2_1_5_1","volume-title":"Proceedings of the 5th Conference on Machine Learning and Systems","author":"Cho Junguk","year":"2022","unstructured":"Junguk Cho, Diman Zad Tootaghaj, Lianjie Cao, and Puneet Sharma. 2022. SLA-Driven ML Inference Framework For Clouds With Heterogeneous Accelerators. In Proceedings of the 5th Conference on Machine Learning and Systems (Santa Clara, California, August 29 - September 1, 2022) (MLSys '22). 20--32. https:\/\/proceedings.mlsys.org\/paper\/2022\/file\/0777d5c17d4066b82ab86dff8a46af6f-Paper.pdf"},{"key":"e_1_3_2_1_6_1","volume-title":"Bin packing problem. Retrieved","author":"Wikipedia","year":"2022","unstructured":"Wikipedia contributors. 2022. Bin packing problem. Retrieved October 3, 2022 from https:\/\/en.wikipedia.org\/w\/index.php?title=Bin_packing_problem"},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the 14th USENIX Symposium on Networked Systems Design and Implementation","author":"Crankshaw Daniel","year":"2017","unstructured":"Daniel Crankshaw, Xin Wang, Guilio Zhou, Michael J. Franklin, Joseph E. Gonzalez, and Ion Stoica. 2017. Clipper: A Low-Latency Online Prediction Serving System. In Proceedings of the 14th USENIX Symposium on Networked Systems Design and Implementation (Boston, Massachusetts, March 27 - 29, 2017) (NSDI '17). USENIX, Berkeley, CA, USA, 613 -- 627. https:\/\/www.usenix.org\/conference\/nsdi17\/technical-sessions\/presentation\/crankshaw"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/75247.75248"},{"key":"e_1_3_2_1_9_1","volume-title":"Raspberry Pi 4 Model B. Retrieved","author":"Pi Foundation The Raspberry","year":"2022","unstructured":"The Raspberry Pi Foundation. 2022. Raspberry Pi 4 Model B. Retrieved October 3, 2022 from https:\/\/www.raspberrypi.com\/products\/raspberry-pi-4-model-b\/"},{"volume-title":"Co-compiling multiple models. Retrieved","year":"2022","key":"e_1_3_2_1_10_1","unstructured":"Google. 2020. Co-compiling multiple models. Retrieved October 3, 2022 from https:\/\/coral.ai\/docs\/edgetpu\/compiler\/#co-compiling-multiple-models"},{"volume-title":"Parameter data caching. Retrieved","year":"2022","key":"e_1_3_2_1_11_1","unstructured":"Google.2020. Parameter data caching. Retrieved October 3, 2022 from https:\/\/coral.ai\/docs\/edgetpu\/compiler\/#parameter-data-caching"},{"volume-title":"What is the Edge TPU? Retrieved","year":"2022","key":"e_1_3_2_1_12_1","unstructured":"Google. 2020. What is the Edge TPU? Retrieved October 3, 2022 from https:\/\/coral.ai\/docs\/edgetpu\/faq\/"},{"key":"e_1_3_2_1_13_1","unstructured":"Google. 2022. Vertex AI. Retrieved October 3 2022 from https:\/\/cloud.google.com\/vertex-ai"},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of the 14th USENIX Symposium on Operating Systems Design and Implementation (Virtual, November 4 - 6","author":"Gujarati Arpan","year":"2020","unstructured":"Arpan Gujarati, Reza Karimi, Safya Alzayat, Wei Hao, Antoine Kaufmann, Ymir Vigfusson, and Jonathan Mace. 2020. Serving DNNs like Clockwork: Performance Predictability from the Bottom Up. In Proceedings of the 14th USENIX Symposium on Operating Systems Design and Implementation (Virtual, November 4 - 6, 2020) (OSDI '20). USENIX, Berkeley, CA, USA, 443 -- 462. https:\/\/www.usenix.org\/conference\/osdi20\/presentation\/gujarati"},{"volume-title":"Intel Neural Compute Stick 2. Retrieved","year":"2022","key":"e_1_3_2_1_15_1","unstructured":"Intel. 2022. Intel Neural Compute Stick 2. Retrieved October 3, 2022 from https:\/\/software.intel.com\/content\/www\/us\/en\/develop\/hardware\/neural-compute-stick.html"},{"key":"e_1_3_2_1_16_1","volume-title":"Average Frame Rate Video Surveillance Statistics","author":"IPVM.","year":"2021","unstructured":"IPVM. 2021. Average Frame Rate Video Surveillance Statistics 2021. Retrieved October 3, 2022 fromhttps:\/\/ipvm.com\/reports\/average-frame-rate-video-surveillance-2021"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/SEC50012.2020.00016"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3453142.3491283"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.14778\/3137628.3137664"},{"volume-title":"Assigning Pods to Nodes. Retrieved","year":"2022","key":"e_1_3_2_1_20_1","unstructured":"Kubernetes. 2022. Assigning Pods to Nodes. Retrieved October 3, 2022 from https:\/\/kubernetes.io\/docs\/concepts\/scheduling-eviction\/assign-pod-node\/"},{"volume-title":"Managing Resources for Containers. Retrieved","year":"2022","key":"e_1_3_2_1_21_1","unstructured":"Kubernetes. 2022. Managing Resources for Containers. Retrieved October 3, 2022 from https:\/\/kubernetes.io\/docs\/concepts\/configuration\/manage-resources-containers\/"},{"volume-title":"Production-Grade Container Orchestration. Retrieved","year":"2022","key":"e_1_3_2_1_22_1","unstructured":"Kubernetes. 2022. Production-Grade Container Orchestration. Retrieved October 3, 2022 from https:\/\/kubernetes.io\/"},{"key":"e_1_3_2_1_23_1","unstructured":"Kubernetes. 2022. Service. Retrieved October 3 2022 from https:\/\/kubernetes.io\/docs\/concepts\/services-networking\/service\/"},{"key":"e_1_3_2_1_24_1","volume-title":"CUDA Multi-process Service. Retrieved","author":"NVIDIA.","year":"2022","unstructured":"NVIDIA. 2020. CUDA Multi-process Service. Retrieved October 3, 2022 from https:\/\/docs.nvidia.com\/deploy\/pdf\/CUDA_Multi_Process_Service_Overview.pdf"},{"key":"e_1_3_2_1_25_1","volume-title":"Jetson Nano Developer Kit. Retrieved","author":"NVIDIA.","year":"2022","unstructured":"NVIDIA. 2022. Jetson Nano Developer Kit. Retrieved October 3, 2022 from https:\/\/developer.nvidia.com\/embedded\/jetson-nano-developer-kit"},{"key":"e_1_3_2_1_26_1","unstructured":"NVIDIA. 2022. Multi-Process Service. Retrieved October 3 2022 from https:\/\/docs.nvidia.com\/pdf\/CUDA_Multi_Process_Service_Overview.pdf"},{"key":"e_1_3_2_1_27_1","volume-title":"NVIDIA Container Toolkit. Retrieved","author":"NVIDIA.","year":"2022","unstructured":"NVIDIA. 2022. NVIDIA Container Toolkit. Retrieved October 3, 2022 from https:\/\/github.com\/NVIDIA\/nvidia-docker"},{"key":"e_1_3_2_1_28_1","volume-title":"NVIDIA Triton Inference Server. Retrieved","author":"NVIDIA.","year":"2022","unstructured":"NVIDIA. 2022. NVIDIA Triton Inference Server. Retrieved October 3, 2022 from https:\/\/developer.nvidia.com\/nvidia-triton-inference-server"},{"key":"e_1_3_2_1_29_1","first-page":"1","volume-title":"Workshop on ML Systems at NIPS 2017","author":"Olston Christopher","year":"2017","unstructured":"Christopher Olston, Noah Fiedel, Kiril Gorovoy, Jeremiah Harmsen, Li Lao, Fangwei Li, Vinu Rajashekhar, Sukriti Ramesh, and Jordan Soyke. 2017. Tensorflow-serving: Flexible, high-performance ml serving. In Workshop on ML Systems at NIPS 2017 (Long Beach, California, December 8, 2017). 8 pages. http:\/\/learningsys.org\/nips17\/assets\/papers\/paper_1.pdf"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01264-9_17"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00233"},{"volume-title":"Retrieved","year":"2020","key":"e_1_3_2_1_32_1","unstructured":"Python. 2020. Python roundrobin 0.0.2. Retrieved December 12, 2021 from https:\/\/pypi.org\/project\/roundrobin\/"},{"key":"e_1_3_2_1_33_1","volume-title":"Proceedings of the 2021 USENIX Annual Technical Conference (Virtual, July 14 - 16","author":"Romero Francisco","year":"2021","unstructured":"Francisco Romero, Qian Li, Neeraja J Yadwadkar, and Christos Kozyrakis. 2021. INFaaS: Automated Model-less Inference Serving. In Proceedings of the 2021 USENIX Annual Technical Conference (Virtual, July 14 - 16, 2021) (ATC '21). USENIX, Berkeley, CA, USA, 397--411. https:\/\/www.usenix.org\/conference\/atc21\/presentation\/romero"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00474"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2017.9"},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of the 2020 USENIX Annual Technical Conference (Virtual, July 15 - 17","author":"Shahrad Mohammad","year":"2020","unstructured":"Mohammad Shahrad, Rodrigo Fonseca, \u00cd\u00f1igo Goiri, Gohar Chaudhry, Paul Batum, Jason Cooke, Eduardo Laureano, Colby Tresness, Mark Russinovich, and Ricardo Bianchini. 2020. Serverless in the Wild: Characterizing and Optimizing the Serverless Workload at a Large Cloud Provider. In Proceedings of the 2020 USENIX Annual Technical Conference (Virtual, July 15 - 17, 2020) (ATC '20). USENIX, Berkeley, CA, USA, 205--218. https:\/\/www.usenix.org\/conference\/atc20\/presentation\/shahrad"},{"key":"e_1_3_2_1_37_1","unstructured":"TensorFlow. 2022. TensorFlow Hub. Retrieved October 3 2022 from https:\/\/tfhub.dev\/"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3423211.3425686"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3453142.3491278"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3372224.3419192"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2017.2674661"}],"event":{"name":"Middleware '22: 23rd International Middleware Conference","sponsor":["ACM Association for Computing Machinery","IFIP"],"location":"Quebec QC Canada","acronym":"Middleware '22"},"container-title":["Proceedings of the 23rd ACM\/IFIP International Middleware Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3528535.3565254","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3528535.3565254","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3528535.3565254","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:02:43Z","timestamp":1750186963000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3528535.3565254"}},"subtitle":["a multi-tenant edge cluster system architecture for scalable camera processing"],"short-title":[],"issued":{"date-parts":[[2022,11,7]]},"references-count":41,"alternative-id":["10.1145\/3528535.3565254","10.1145\/3528535"],"URL":"https:\/\/doi.org\/10.1145\/3528535.3565254","relation":{},"subject":[],"published":{"date-parts":[[2022,11,7]]},"assertion":[{"value":"2022-11-08","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}