{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T15:46:35Z","timestamp":1774021595279,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,12,6]],"date-time":"2023-12-06T00:00:00Z","timestamp":1701820800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,12,6]]},"DOI":"10.1145\/3583740.3628443","type":"proceedings-article","created":{"date-parts":[[2024,8,7]],"date-time":"2024-08-07T18:35:50Z","timestamp":1723055750000},"page":"27-39","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["RAVAS: Interference-Aware Model Selection and Resource Allocation for Live Edge Video Analytics"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9249-1633","authenticated-orcid":false,"given":"Ali","family":"Rahmanian","sequence":"first","affiliation":[{"name":"Ume\u00e5 University, Ume\u00e5, Sweden"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2774-9284","authenticated-orcid":false,"given":"Ahmed","family":"Ali-Eldin","sequence":"additional","affiliation":[{"name":"Chalmers University of Technology, Gothenburg, Sweden"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3166-506X","authenticated-orcid":false,"given":"Selome Kostentinos","family":"Tesfatsion","sequence":"additional","affiliation":[{"name":"Ericsson Research, Stockholm, Sweden"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7499-6213","authenticated-orcid":false,"given":"Bj\u00f6rn","family":"Skubic","sequence":"additional","affiliation":[{"name":"Ericsson Research, Stockholm, Sweden"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3498-1540","authenticated-orcid":false,"given":"Harald","family":"Gustafsson","sequence":"additional","affiliation":[{"name":"Ericsson Research, Stockholm, Sweden"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5435-1901","authenticated-orcid":false,"given":"Prashant","family":"Shenoy","sequence":"additional","affiliation":[{"name":"University of Massachusetts, Amherst, Massachusetts, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2633-6798","authenticated-orcid":false,"given":"Erik","family":"Elmroth","sequence":"additional","affiliation":[{"name":"Ume\u00e5 university, Ume\u00e5, Sweden"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,8,7]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Yolov4: Optimal speed and accuracy of object detection. arXiv preprint arXiv:2004.10934","author":"Bochkovskiy Alexey","year":"2020","unstructured":"Alexey Bochkovskiy, Chien-Yao Wang, and Hong-Yuan Mark Liao. Yolov4: Optimal speed and accuracy of object detection. arXiv preprint arXiv:2004.10934, 2020."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_3_1","first-page":"6105","volume-title":"International conference on machine learning","author":"Tan Mingxing","year":"2019","unstructured":"Mingxing Tan and Quoc Le. Efficientnet: Rethinking model scaling for convolutional neural networks. In International conference on machine learning, pages 6105--6114. PMLR, 2019."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2017.3641638"},{"key":"e_1_3_2_1_5_1","first-page":"119","volume-title":"19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22)","author":"Bhardwaj Romil","year":"2022","unstructured":"Romil Bhardwaj, Zhengxu Xia, Ganesh Ananthanarayanan, Yuanchao Shu, Nikolaos Karianakis, Kevin Hsieh, Paramvir Bahl, and Ion Stoica. Ekya: Continuous learning of video analytics models on edge compute servers. In 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22), pages 119--135, 2022."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3230543.3230574"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","first-page":"145","DOI":"10.1109\/IISWC50251.2020.00023","volume-title":"2020 IEEE International Symposium on Workload Characterization (IISWC)","author":"Liang Qianlin","year":"2020","unstructured":"Qianlin Liang, Prashant Shenoy, and David Irwin. Ai on the edge: Characterizing ai-based iot applications using specialized edge architectures. In 2020 IEEE International Symposium on Workload Characterization (IISWC), pages 145--156, 2020."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC42613.2021.9365803"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793600"},{"key":"e_1_3_2_1_10_1","first-page":"359","volume-title":"Proceedings of the Annual Conference of the ACM Special Interest Group on Data Communication on the Applications, Technologies, Architectures, and Protocols for Computer Communication, SIGCOMM '20","author":"Li Yuanqi","year":"2020","unstructured":"Yuanqi Li, Arthi Padmanabhan, Pengzhan Zhao, Yufei Wang, Guoqing Harry Xu, and Ravi Netravali. Reducto: On-camera filtering for resource-efficient real-time video analytics. In Proceedings of the Annual Conference of the ACM Special Interest Group on Data Communication on the Applications, Technologies, Architectures, and Protocols for Computer Communication, SIGCOMM '20, page 359--376, New York, NY, USA, 2020. Association for Computing Machinery."},{"key":"e_1_3_2_1_11_1","first-page":"406","article-title":"Scaling video analytics on constrained edge nodes","volume":"1","author":"Canel Christopher","year":"2019","unstructured":"Christopher Canel, Thomas Kim, Giulio Zhou, Conglong Li, Hyeontaek Lim, David G Andersen, Michael Kaminsky, and Subramanya Dulloor. Scaling video analytics on constrained edge nodes. Proceedings of Machine Learning and Systems, 1:406--417, 2019.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3514221.3517835"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3230543.3230554"},{"key":"e_1_3_2_1_14_1","volume-title":"Noscope: optimizing neural network queries over video at scale. arXiv preprint arXiv:1703.02529","author":"Kang Daniel","year":"2017","unstructured":"Daniel Kang, John Emmons, Firas Abuzaid, Peter Bailis, and Matei Zaharia. Noscope: optimizing neural network queries over video at scale. arXiv preprint arXiv:1703.02529, 2017."},{"key":"e_1_3_2_1_15_1","first-page":"1","volume-title":"2020 IEEE 28th International Conference on Network Protocols (ICNP)","author":"Dhakal Aditya","year":"2020","unstructured":"Aditya Dhakal, Sameer G Kulkarni, and K. K. Ramakrishnan. Machine learning at the edge: Efficient utilization of limited cpu\/gpu resources by multiplexing. In 2020 IEEE 28th International Conference on Network Protocols (ICNP), pages 1--6, 2020."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","first-page":"53","DOI":"10.1109\/CLOUD53861.2021.00018","volume-title":"2021 IEEE 14th International Conference on Cloud Computing (CLOUD)","author":"Dhakal Aditya","year":"2021","unstructured":"Aditya Dhakal, Sameer G Kulkarni, and K. K. Ramakrishnan. Primitives enhancing gpu runtime support for improved dnn performance. In 2021 IEEE 14th International Conference on Cloud Computing (CLOUD), pages 53--64, 2021."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476142"},{"key":"e_1_3_2_1_18_1","first-page":"228","volume-title":"Proceedings of the 23rd ACM\/IFIP International Middleware Conference, Middleware '22","author":"Dhakal Aditya","year":"2022","unstructured":"Aditya Dhakal, K. K. Ramakrishnan, Sameer G. Kulkarni, Puneet Sharma, and Junguk Cho. Slice-tune: A system for high performance dnn autotuning. In Proceedings of the 23rd ACM\/IFIP International Middleware Conference, Middleware '22, page 228--240, New York, NY, USA, 2022. Association for Computing Machinery."},{"key":"e_1_3_2_1_19_1","first-page":"492","volume-title":"Proceedings of the 11th ACM Symposium on Cloud Computing, SoCC '20","author":"Dhakal Aditya","year":"2020","unstructured":"Aditya Dhakal, Sameer G Kulkarni, and K. K. Ramakrishnan. Gslice: Controlled spatial sharing of gpus for a scalable inference platform. In Proceedings of the 11th ACM Symposium on Cloud Computing, SoCC '20, page 492--506, New York, NY, USA, 2020. Association for Computing Machinery."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM41043.2020.9155524"},{"key":"e_1_3_2_1_21_1","first-page":"353","volume-title":"2020 USENIX Annual Technical Conference (USENIX ATC 20)","author":"Wan Chengcheng","year":"2020","unstructured":"Chengcheng Wan, Muhammad Santriaji, Eri Rogers, Henry Hoffmann, Michael Maire, and Shan Lu. ALERT: Accurate learning for energy and timeliness. In 2020 USENIX Annual Technical Conference (USENIX ATC 20), pages 353--369. USENIX Association, July 2020."},{"key":"e_1_3_2_1_22_1","first-page":"377","volume-title":"14th {USENIX} Symposium on Networked Systems Design and Implementation ({NSDI} 17)","author":"Zhang Haoyu","year":"2017","unstructured":"Haoyu Zhang, Ganesh Ananthanarayanan, Peter Bodik, Matthai Philipose, Paramvir Bahl, and Michael J Freedman. Live video analytics at scale with approximation and delay-tolerance. In 14th {USENIX} Symposium on Networked Systems Design and Implementation ({NSDI} 17), pages 377--392, 2017."},{"key":"e_1_3_2_1_23_1","first-page":"579","volume-title":"OSDI'18","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Meghan Cowan, Haichen Shen, Leyuan Wang, Yuwei Hu, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. Tvm: An automated end-to-end optimizing compiler for deep learning. OSDI'18, page 579--594, USA, 2018. USENIX Association."},{"key":"e_1_3_2_1_24_1","volume-title":"Optimizing deep learning inference on embedded systems through adaptive model selection. ACM Trans. Embed. Comput. Syst., 19(1), feb","author":"Marco Vicent Sanz","year":"2020","unstructured":"Vicent Sanz Marco, Ben Taylor, Zheng Wang, and Yehia Elkhatib. Optimizing deep learning inference on embedded systems through adaptive model selection. ACM Trans. Embed. Comput. Syst., 19(1), feb 2020."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3004571"},{"key":"e_1_3_2_1_26_1","first-page":"269","volume-title":"13th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 18)","author":"Hsieh Kevin","year":"2018","unstructured":"Kevin Hsieh, Ganesh Ananthanarayanan, Peter Bodik, Shivaram Venkataraman, Paramvir Bahl, Matthai Philipose, Phillip B Gibbons, and Onur Mutlu. Focus: Querying large video datasets with low latency and low cost. In 13th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 18), pages 269--286, 2018."},{"key":"e_1_3_2_1_27_1","volume-title":"Programmable inference accelerator","author":"Nvidia","year":"2018","unstructured":"Nvidia tensorrt: Programmable inference accelerator, 2018. https:\/\/developer.nvidia.com\/tensorrt\/."},{"key":"e_1_3_2_1_28_1","unstructured":"Tensorrt demos. https:\/\/github.com\/jkjung-avt\/tensorrt_demos\/."},{"key":"e_1_3_2_1_29_1","unstructured":"Ffmpeg. https:\/\/ffmpeg.org\/ffmpeg-formats.html\/."},{"key":"e_1_3_2_1_30_1","unstructured":"Prometheus. https:\/\/prometheus.io\/."},{"key":"e_1_3_2_1_31_1","unstructured":"Nvidia gpu prometheus exporter. https:\/\/github.com\/mindprince\/nvidia_gpu_prometheus_exporter\/."},{"key":"e_1_3_2_1_32_1","unstructured":"Prometheus operator. https:\/\/github.com\/prometheus-operator\/prometheus-operator."},{"key":"e_1_3_2_1_33_1","unstructured":"Prometheus client library. https:\/\/github.com\/prometheus\/client_python\/."},{"key":"e_1_3_2_1_34_1","unstructured":"Prometheus http api. https:\/\/prometheus.io\/docs\/prometheus\/latest\/querying\/api\/."},{"key":"e_1_3_2_1_35_1","unstructured":"Prometheus query language. https:\/\/prometheus.io\/docs\/prometheus\/latest\/querying\/basics\/."},{"key":"e_1_3_2_1_36_1","unstructured":"The virat video dataset. https:\/\/viratdata.org\/."},{"key":"e_1_3_2_1_37_1","first-page":"377","volume-title":"14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17)","author":"Zhang Haoyu","year":"2017","unstructured":"Haoyu Zhang, Ganesh Ananthanarayanan, Peter Bodik, Matthai Philipose, Paramvir Bahl, and Michael J. Freedman. Live video analytics at scale with approximation and Delay-Tolerance. In 14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17), pages 377--392, Boston, MA, March 2017. USENIX Association."},{"key":"e_1_3_2_1_38_1","first-page":"426","volume-title":"Proceedings of the 19th ACM Conference on Embedded Networked Sensor Systems, SenSys '21","author":"Nigade Vinod","year":"2021","unstructured":"Vinod Nigade, Ramon Winder, Henri Bal, and Lin Wang. Better never than late: Timely edge video analytics over the air. In Proceedings of the 19th ACM Conference on Embedded Networked Sensor Systems, SenSys '21, page 426--432, New York, NY, USA, 2021. Association for Computing Machinery."},{"key":"e_1_3_2_1_39_1","volume-title":"12th USENIX Workshop on Hot Topics in Cloud Computing (HotCloud 20)","author":"Zhang Jeff","year":"2020","unstructured":"Jeff Zhang, Sameh Elnikety, Shuayb Zarar, Atul Gupta, and Siddharth Garg. Model-Switching: Dealing with fluctuating workloads in Machine-Learning-asa-Service systems. In 12th USENIX Workshop on Hot Topics in Cloud Computing (HotCloud 20). USENIX Association, July 2020."},{"key":"e_1_3_2_1_40_1","first-page":"613","volume-title":"14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17)","author":"Crankshaw Daniel","year":"2017","unstructured":"Daniel Crankshaw, Xin Wang, Guilio Zhou, Michael J. Franklin, Joseph E. Gonzalez, and Ion Stoica. Clipper: A Low-Latency online prediction serving system. In 14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17), pages 613--627, Boston, MA, March 2017. USENIX Association."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2018.8485905"},{"key":"e_1_3_2_1_42_1","first-page":"397","volume-title":"2021 USENIX Annual Technical Conference (USENIX ATC 21)","author":"Romero Francisco","year":"2021","unstructured":"Francisco Romero, Qian Li, Neeraja J. Yadwadkar, and Christos Kozyrakis. INFaaS: Automated model-less inference serving. In 2021 USENIX Annual Technical Conference (USENIX ATC 21), pages 397--411. USENIX Association, July 2021."}],"event":{"name":"SEC '23: Eighth ACM\/IEEE Symposium on Edge Computing","location":"Wilmington DE USA","acronym":"SEC '23","sponsor":["SIGMOBILE ACM Special Interest Group on Mobility of Systems, Users, Data and Computing","IEEE Computer Society"]},"container-title":["Proceedings of the Eighth ACM\/IEEE Symposium on Edge Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3583740.3628443","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3583740.3628443","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:46:29Z","timestamp":1750178789000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3583740.3628443"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,6]]},"references-count":42,"alternative-id":["10.1145\/3583740.3628443","10.1145\/3583740"],"URL":"https:\/\/doi.org\/10.1145\/3583740.3628443","relation":{},"subject":[],"published":{"date-parts":[[2023,12,6]]},"assertion":[{"value":"2024-08-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}