{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T13:13:35Z","timestamp":1776950015641,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":48,"publisher":"ACM","funder":[{"name":"NSF &#x28;National Science Foundation&#x29;","award":["2211302"],"award-info":[{"award-number":["2211302"]}]},{"name":"NSF &#x28;National Science Foundation&#x29;","award":["2211888"],"award-info":[{"award-number":["2211888"]}]},{"name":"NSF &#x28;National Science Foundation&#x29;","award":["2213636"],"award-info":[{"award-number":["2213636"]}]},{"name":"NSF &#x28;National Science Foundation&#x29;","award":["2105494"],"award-info":[{"award-number":["2105494"]}]},{"name":"NSF &#x28;National Science Foundation&#x29;","award":["23091241"],"award-info":[{"award-number":["23091241"]}]},{"name":"NSF &#x28;National Science Foundation&#x29;","award":["2325956"],"award-info":[{"award-number":["2325956"]}]},{"name":"Army Research Laboratory","award":["W911NF-17-2-0196"],"award-info":[{"award-number":["W911NF-17-2-0196"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,5,4]]},"DOI":"10.1145\/3777884.3797816","type":"proceedings-article","created":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T12:27:26Z","timestamp":1776947246000},"page":"333-340","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["To Offload or Not To Offload: Model-driven Comparison of Edge-native and On-device Processing In the Era of Accelerators"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-8508-8124","authenticated-orcid":false,"given":"Nathan","family":"Ng","sequence":"first","affiliation":[{"name":"University of Massachusetts Amherst, Amherst, MA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1722-4927","authenticated-orcid":false,"given":"David","family":"Irwin","sequence":"additional","affiliation":[{"name":"University of Massachusetts Amherst, Amherst, MA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1439-332X","authenticated-orcid":false,"given":"Ananthram","family":"Swami","sequence":"additional","affiliation":[{"name":"DEVCOM Army Research Lab, Adelphi, MD, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7808-7375","authenticated-orcid":false,"given":"Don","family":"Towsley","sequence":"additional","affiliation":[{"name":"University of Massachusetts Amherst, Amherst, MA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5435-1901","authenticated-orcid":false,"given":"Prashant","family":"Shenoy","sequence":"additional","affiliation":[{"name":"University of Massachusetts Amherst, Amherst, MA, USA"}]}],"member":"320","published-online":{"date-parts":[[2026,5,3]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"SC'21","author":"Ahmed","unstructured":"Ahmed Ali-Eldin et al . 2021. The hidden cost of the edge: a performance compar- ison of edge and cloud latencies. In SC'21. ACM."},{"key":"e_1_3_2_1_2_1","volume-title":"Waiting Game: Optimally Provisioning Fixed Re- sources for Cloud-Enabled Schedulers. In SC'20","author":"Pradeep Ambati","year":"2020","unstructured":"Pradeep Ambati et al . 2020. Waiting Game: Optimally Provisioning Fixed Re- sources for Cloud-Enabled Schedulers. In SC'20. IEEE."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Amin Banitalebi-Dehkordi et al . 2021. Auto-Split: A General Framework of Collaborative Edge-Cloud AI. In KDD'21. ACM.","DOI":"10.1145\/3447548.3467078"},{"key":"e_1_3_2_1_4_1","volume-title":"INFOCOM'25","author":"Yichong","unstructured":"Yichong Chen et al . 2025. CEED: Collaborative Early Exit Neural Network Inference at the Edge. In INFOCOM'25."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/1013367.1013397"},{"key":"e_1_3_2_1_6_1","unstructured":"Jon Dugan et al . 2024. iPerf - Network Bandwidth Measurement. https:\/\/iperf.fr\/"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Anshul Gandhi et al. 2019. Leveraging Queueing Theory and OS Profiling to Reduce Application Latency. In Middleware'19. ACM.","DOI":"10.1145\/3366625.3368853"},{"key":"e_1_3_2_1_8_1","unstructured":"Georgi Gerganov et al . 2023. llama.cpp. https:\/\/github.com\/ggml-org\/llama.cpp"},{"key":"e_1_3_2_1_9_1","unstructured":"Google. 2026. Coral Edge TPU. https:\/\/www.coral.ai\/products\/accelerator."},{"key":"e_1_3_2_1_10_1","unstructured":"Karthik Gopalakrishnan et al. 2023. Topical-Chat: Towards Knowledge-Grounded Open-Domain Conversations. arXiv:2308.11995"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.5555\/2462638"},{"key":"e_1_3_2_1_12_1","volume-title":"INFOCOM'19","author":"Chuang","unstructured":"Chuang Hu et al. 2019. Dynamic Adaptive DNN Surgery for Inference Accelera- tion on the Edge. In INFOCOM'19. IEEE."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"Jin Huang et al. 2020. CLIO: enabling automatic compilation of deep learning pipelines across IoT and cloud. In MobiCom'20. ACM.","DOI":"10.1145\/3372224.3419215"},{"key":"e_1_3_2_1_14_1","unstructured":"Intel Corporation. 2025. The AI PC Powered by Intel is Here. https:\/\/www.intel. com\/content\/www\/us\/en\/products\/docs\/processors\/core-ultra\/ai-pc.html"},{"key":"e_1_3_2_1_15_1","volume-title":"NNICE'24","author":"Yong","unstructured":"Yong Ji et al. 2024. An Active Learning based Latency Prediction Approach for Neural Network Architecture. In NNICE'24."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Lili Jiang et al. 2021. Performance analysis of heterogeneous cloud-edge services: A modeling approach. Peer-to-Peer Networking and Applications 14 (01 2021).","DOI":"10.1007\/s12083-020-00968-5"},{"key":"e_1_3_2_1_17_1","first-page":"1","article-title":"Neurosurgeon: Collaborative Intelligence Between the Cloud and Mobile Edge","volume":"45","author":"Yiping Kang","year":"2017","unstructured":"Yiping Kang et al . 2017. Neurosurgeon: Collaborative Intelligence Between the Cloud and Mobile Edge. SIGARCH Comput. Archit. News 45, 1 (apr 2017).","journal-title":"SIGARCH Comput. Archit. News"},{"key":"e_1_3_2_1_18_1","volume-title":"Furion: Engineering High-Quality Immersive Virtual Reality on Today's Mobile Devices. In MobiCom'17. ACM.","author":"Zeqi Lai","year":"2017","unstructured":"Zeqi Lai et al . 2017. Furion: Engineering High-Quality Immersive Virtual Reality on Today's Mobile Devices. In MobiCom'17. ACM."},{"key":"e_1_3_2_1_19_1","volume-title":"Predicting Inference Latency of Neural Architectures on Mobile Devices. In ICPE'23","author":"Zhuojin","unstructured":"Zhuojin Li et al . 2023. Predicting Inference Latency of Neural Architectures on Mobile Devices. In ICPE'23. ACM."},{"key":"e_1_3_2_1_20_1","article-title":"Energy-efficient offloading for DNN-based applications in edge-cloud computing: A hybrid chaotic evolutionary approach","author":"Zengpeng Li","year":"2024","unstructured":"Zengpeng Li et al. 2024. Energy-efficient offloading for DNN-based applications in edge-cloud computing: A hybrid chaotic evolutionary approach. J. Parallel Distrib. Comput. 187, C (May 2024).","journal-title":"J. Parallel Distrib. Comput. 187, C"},{"key":"e_1_3_2_1_21_1","article-title":"Model-driven Cluster Resource Management for AI Workloads in Edge Clouds. ACM","author":"Qianlin Liang","year":"2023","unstructured":"Qianlin Liang et al. 2023. Model-driven Cluster Resource Management for AI Workloads in Edge Clouds. ACM Trans. Auton. Adapt. Syst. (March 2023).","journal-title":"Trans. Auton. Adapt. Syst."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","unstructured":"Dumitrel Loghin et al. 2019. Towards Analyzing the Performance of Hybrid Edge-Cloud Processing. In EDGE'19.","DOI":"10.1109\/EDGE.2019.00029"},{"key":"e_1_3_2_1_23_1","volume-title":"Characterizing the Execution of Deep Neural Networks on Collaborative Robots and Edge Devices. In PEARC'19","author":"Matthew L.","unstructured":"Matthew L. Merck et al. 2019. Characterizing the Execution of Deep Neural Networks on Collaborative Robots and Edge Devices. In PEARC'19. ACM."},{"key":"e_1_3_2_1_24_1","unstructured":"Meta. 2024. Llama-3.2--1B. https:\/\/huggingface.co\/meta-llama\/Llama-3.2--1B"},{"key":"e_1_3_2_1_25_1","first-page":"4","article-title":"Human-in-the-loop machine learning: a state of the art","volume":"56","author":"Eduardo Mosqueira-Rey","year":"2022","unstructured":"Eduardo Mosqueira-Rey et al. 2022. Human-in-the-loop machine learning: a state of the art. Artif. Intell. Rev. 56, 4 (Aug. 2022).","journal-title":"Artif. Intell. Rev."},{"key":"e_1_3_2_1_26_1","unstructured":"Netatmo. 2026. Smart Camera. https:\/\/www.netatmo.com\/smart-outdoor- camera."},{"key":"e_1_3_2_1_27_1","unstructured":"Nathan Ng et al . 2025. To Offload or Not To Offload: Model-driven Com- parison of Edge-native and On-device Processing In the Era of Accelerators. arXiv:2504.15162"},{"key":"e_1_3_2_1_28_1","unstructured":"NVIDIA. 2017. JETSON TX2: High Performance AI at the Edge. https:\/\/www. nvidia.com\/en-us\/autonomous-machines\/embedded-systems\/jetson-tx2\/."},{"key":"e_1_3_2_1_29_1","unstructured":"NVIDIA. 2021. A2 Tensor Core GPU. https:\/\/www.nvidia.com\/en-us\/data- center\/products\/a2\/"},{"key":"e_1_3_2_1_30_1","unstructured":"NVIDIA. 2023. Accelerated Inference for Large Transformer Models Using NVIDIA FasterTransformer. https:\/\/github.com\/NVIDIA\/FasterTransformer\/."},{"key":"e_1_3_2_1_31_1","unstructured":"NVIDIA. 2023. Jetson Orin Nano Developer Kit. https:\/\/developer.nvidia.com\/embedded\/learn\/get-started-jetson-orin-nano-devkit."},{"key":"e_1_3_2_1_32_1","unstructured":"NVIDIA. 2024. NVIDIA System Management Interface (nvidia-smi). https: \/\/developer.nvidia.com\/system-management-interface."},{"key":"e_1_3_2_1_33_1","unstructured":"NVIDIA. 2025. Multi-Instance GPU (MIG). https:\/\/www.nvidia.com\/en-us\/ technologies\/multi-instance-gpu\/."},{"key":"e_1_3_2_1_34_1","volume-title":"Ogden et al","author":"Samuel S.","year":"2021","unstructured":"Samuel S. Ogden et al . 2021. PieSlicer: Dynamically Improving Response Time for Cloud-based CNN Inference. In ICPE'21."},{"key":"e_1_3_2_1_35_1","volume-title":"Ogden et al","author":"Samuel S.","year":"2023","unstructured":"Samuel S. Ogden et al. 2023. Layercake: Efficient Inference Serving with Cloud and Mobile Resources. In CCGrid'23. IEEE."},{"key":"e_1_3_2_1_36_1","volume-title":"Introduction to Probability, Statistics, and Random Processes","author":"Hossein Pishro-Nik","unstructured":"Hossein Pishro-Nik. 2014. Introduction to Probability, Statistics, and Random Processes. Springer."},{"key":"e_1_3_2_1_37_1","volume-title":"ISCA'20","author":"Vijay Janapa","unstructured":"Vijay Janapa Reddi et al. 2020. MLPerf inference benchmark. In ISCA'20. IEEE."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"crossref","unstructured":"Jinke Ren et al. 2019. An Edge-Computing Based Architecture for Mobile Aug- mented Reality. IEEE Network 33 4 (2019).","DOI":"10.1109\/MNET.2018.1800132"},{"key":"e_1_3_2_1_39_1","unstructured":"Tom S. 2017. Apple's Neural Engine Infuses the iPhone with AI. https:\/\/www. wired.com\/story\/apples-neural-engine-infuses-the-iphone-with-ai-smarts\/"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"crossref","unstructured":"Mahadev Satyanarayanan et al . 2009. The Case for VM-Based Cloudlets in Mobile Computing. IEEE Pervasive Computing 8 4 (2009).","DOI":"10.1109\/MPRV.2009.82"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"crossref","unstructured":"Mahadev Satyanarayanan et al. 2019. The Seminal Role of Edge-Native Applica- tions. In EDGE'19. IEEE.","DOI":"10.1109\/EDGE.2019.00022"},{"key":"e_1_3_2_1_42_1","unstructured":"Rana Shahout et al. 2024. Don't Stop Me Now: Embedding Based Scheduling for LLMs. arXiv:2410.01035"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"crossref","unstructured":"Uma Tadakamalla et al. 2018. FogQN: An Analytic Model for Fog\/Cloud Com- puting. In UCC Companion'18.","DOI":"10.1109\/UCC-Companion.2018.00073"},{"key":"e_1_3_2_1_44_1","volume-title":"SIGMETRICS'05","author":"Bhuvan","unstructured":"Bhuvan Urgaonkar et al. 2005. An analytical model for multi-tier internet services and its applications. In SIGMETRICS'05. ACM."},{"key":"e_1_3_2_1_45_1","unstructured":"Vicuna team. 2024. ShareGPT Dataset Collection. https:\/\/huggingface.co\/datasets\/ anon8231489123\/ShareGPT_Vicuna_unfiltered."},{"key":"e_1_3_2_1_46_1","volume-title":"INVAR: Inversion Aware Resource Provisioning and Workload Scheduling for Edge Computing. In INFOCOM'24","author":"Bin Wang","year":"2024","unstructured":"Bin Wang et al . 2024. INVAR: Inversion Aware Resource Provisioning and Workload Scheduling for Edge Computing. In INFOCOM'24. IEEE."},{"key":"e_1_3_2_1_47_1","unstructured":"Shuochao Yao et al. 2018. FastDeepIoT: Towards Understanding and Optimizing Neural Network Execution Time on Mobile and Embedded Devices. In SenSys'18."},{"key":"e_1_3_2_1_48_1","volume-title":"FRESCO: Fast and Reliable Edge Offloading with Reputation- based Hybrid Smart Contracts","author":"Josip Zilic","year":"2025","unstructured":"Josip Zilic et al . 2025. FRESCO: Fast and Reliable Edge Offloading with Reputation- based Hybrid Smart Contracts. IEEE Trans. Services Comput. (2025)."}],"event":{"name":"ICPE '26: 17th ACM\/SPEC International Conference on Performance Engineering","location":"Florence Italy","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering","SIGMETRICS ACM Special Interest Group on Measurement and Evaluation","SPEC"]},"container-title":["Proceedings of the 17th ACM\/SPEC International Conference on Performance Engineering"],"original-title":[],"deposited":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T12:27:41Z","timestamp":1776947261000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3777884.3797816"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5,3]]},"references-count":48,"alternative-id":["10.1145\/3777884.3797816","10.1145\/3777884"],"URL":"https:\/\/doi.org\/10.1145\/3777884.3797816","relation":{},"subject":[],"published":{"date-parts":[[2026,5,3]]},"assertion":[{"value":"2026-05-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}