{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,8]],"date-time":"2026-05-08T16:08:07Z","timestamp":1778256487478,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":32,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,18]],"date-time":"2024-04-18T00:00:00Z","timestamp":1713398400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100006374","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CPS-2103459, SHF-2210744"],"award-info":[{"award-number":["CPS-2103459, SHF-2210744"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,18]]},"DOI":"10.1145\/3603287.3651205","type":"proceedings-article","created":{"date-parts":[[2024,4,27]],"date-time":"2024-04-27T12:06:34Z","timestamp":1714219594000},"page":"69-76","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":30,"title":["An Empirical Analysis and Resource Footprint Study of Deploying Large Language Models on Edge Devices"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-5467-7082","authenticated-orcid":false,"given":"Nobel","family":"Dhar","sequence":"first","affiliation":[{"name":"Kennesaw State University, Marietta, Georgia, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8361-9025","authenticated-orcid":false,"given":"Bobin","family":"Deng","sequence":"additional","affiliation":[{"name":"Kennesaw State University, Marietta, Georgia, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4396-0370","authenticated-orcid":false,"given":"Dan","family":"Lo","sequence":"additional","affiliation":[{"name":"Kennesaw State University, Marietta, Georgia, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-6446-5769","authenticated-orcid":false,"given":"Xiaofeng","family":"Wu","sequence":"additional","affiliation":[{"name":"City University of Macau, Macao, Macau"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3910-3536","authenticated-orcid":false,"given":"Liang","family":"Zhao","sequence":"additional","affiliation":[{"name":"Kennesaw State University, Marietta, Georgia, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8562-0492","authenticated-orcid":false,"given":"Kun","family":"Suo","sequence":"additional","affiliation":[{"name":"Kennesaw State University, Marietta, Georgia, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,4,27]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n. d.]. Jetson AGX Orin. https:\/\/www.nvidia.com\/en-us\/autonomous-machines\/embedded-systems\/jetson-orin\/."},{"key":"e_1_3_2_1_2_1","unstructured":"[n.d.]. Port of Facebook's LLaMA Model in C\/C++. https:\/\/github.com\/ggerganov\/llama.cpp."},{"key":"e_1_3_2_1_3_1","unstructured":"2023. A New Foundation for AI on Android. https:\/\/android-developers.googleblog.com\/2023\/12\/a-new-foundation-for-ai-on-android.html."},{"key":"e_1_3_2_1_4_1","unstructured":"2023. Qualcomm Works with Meta to Enable On-device AI Applications Using Llama 2. https:\/\/www.qualcomm.com\/news\/releases\/2023\/07\/qualcomm-works-with-meta-to-enable-on-device-ai-applications-usi."},{"key":"e_1_3_2_1_5_1","unstructured":"2023. Samsung Looks Towards AI For The Galaxy S24. https:\/\/www.forbes.com\/sites\/ewanspence\/2023\/11\/13\/samsung-galaxys24-ultra-generative-ai-qualcomm-snapdragon-exynos-2400\/?sh=6a019d2b3fba."},{"key":"e_1_3_2_1_6_1","volume-title":"Mohammad Rastegari, and Mehrdad Farajtabar.","author":"Alizadeh Keivan","year":"2023","unstructured":"Keivan Alizadeh, Iman Mirzadeh, Dmitry Belenko, Karen Khatamifard, Minsik Cho, Carlo C Del Mundo, Mohammad Rastegari, and Mehrdad Farajtabar. 2023. LLM in a Flash: Efficient Large Language Model Inference with Limited Memory. arXiv:2312.11514 [cs.CL]"},{"key":"e_1_3_2_1_7_1","unstructured":"Tom B. Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell Sandhini Agarwal Ariel Herbert-Voss Gretchen Krueger Tom Henighan Rewon Child Aditya Ramesh Daniel M. Ziegler Jeffrey Wu Clemens Winter Christopher Hesse Mark Chen Eric Sigler Mateusz Litwin Scott Gray Benjamin Chess Jack Clark Christopher Berner Sam McCandlish Alec Radford Ilya Sutskever and Dario Amodei. 2020. Language Models are Few-Shot Learners. arXiv:2005.14165 [cs.CL]"},{"key":"e_1_3_2_1_8_1","unstructured":"Ellis Di Cataldo. 2023. OpenAI Stops New ChatGPT Plus Subscriptions Due to Demand. https:\/\/tech.co\/news\/openai-stops-new-chatgpt-plus-subscriptions"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2023.3318080"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2020.2984887"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","unstructured":"Warren Gay. 2014. Raspberry Pi Hardware Reference. https:\/\/doi.org\/10.1007\/978-1-4842-0799-4","DOI":"10.1007\/978-1-4842-0799-4"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM53939.2023.10229043"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMLCN.2023.3309773"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3229556.3229562"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jnca.2020.102781"},{"key":"e_1_3_2_1_16_1","volume-title":"AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration. arXiv preprint arXiv:2306.00978","author":"Lin Ji","year":"2023","unstructured":"Ji Lin, Jiaming Tang, Haotian Tang, Shang Yang, Xingyu Dang, and Song Han. 2023. AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration. arXiv preprint arXiv:2306.00978 (2023)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2019.2922285"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.04.141"},{"key":"e_1_3_2_1_19_1","unstructured":"Pradeep Menon. 2023. Introduction to Large Language Models and the Transformer Architecture. https:\/\/rpradeepmenon.medium.com\/introduction-to-large-language-models-and-the-transformer-architecture-534408ed7e61."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00290"},{"key":"e_1_3_2_1_21_1","volume-title":"Yelysei Bondarenko, Mart Van Baalen, and Tijmen Blankevoort.","author":"Nagel Markus","year":"2021","unstructured":"Markus Nagel, Marios Fournarakis, Rana Ali Amjad, Yelysei Bondarenko, Mart Van Baalen, and Tijmen Blankevoort. 2021. A White Paper on Neural Network Quantization. arXiv preprint arXiv:2106.08295 (2021)."},{"key":"e_1_3_2_1_22_1","unstructured":"Liangxin Qian and Jun Zhao. 2023. User Association and Resource Allocation in Large Language Model Based Mobile Edge Computing System over Wireless Communications. arXiv:2310.17872 [cs.IT]"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2020.2978027"},{"key":"e_1_3_2_1_24_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian Canton Ferrer Moya Chen Guillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller Cynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou Hakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel Kloumann Artem Korenev Punit Singh Koura Marie-Anne Lachaux Thibaut Lavril Jenya Lee Diana Liskovich Yinghai Lu Yuning Mao Xavier Martinet Todor Mihaylov Pushkar Mishra Igor Molybog Yixin Nie Andrew Poulton Jeremy Reizenstein Rashi Rungta Kalyan Saladi Alan Schelten Ruan Silva Eric Michael Smith Ranjan Subramanian Xiaoqing Ellen Tan Binh Tang Ross Taylor Adina Williams Jian Xiang Kuan Puxin Xu Zheng Yan Iliyan Zarov Yuchen Zhang Angela Fan Melanie Kambadur Sharan Narang Aurelien Rodriguez Robert Stojnic Sergey Edunov and Thomas Scialom. 2023. Llama 2: Open Foundation and Fine-Tuned Chat Models. arXiv:2307.09288 [cs.CL]"},{"key":"e_1_3_2_1_25_1","volume-title":"Number of Internet of Things (IoT) Connected Devices Worldwide from 2019 to","author":"Vailshery Lionel Sujay","year":"2023","unstructured":"Lionel Sujay Vailshery. 2023. Number of Internet of Things (IoT) Connected Devices Worldwide from 2019 to 2023, with Forecasts from 2022 to 2030. https:\/\/www.statista.com\/statistics\/1183457\/iot-connected-devices-worldwide\/"},{"key":"e_1_3_2_1_26_1","volume-title":"Attention is all you Need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you Need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2904348"},{"key":"e_1_3_2_1_28_1","volume-title":"Federated Fine-Tuning of LLMs on the Very Edge: The Good, the Bad, the Ugly. arXiv preprint arXiv:2310.03150","author":"Woisetschl\u00e4ger Herbert","year":"2023","unstructured":"Herbert Woisetschl\u00e4ger, Alexander Isenko, Shiqiang Wang, Ruben Mayer, and Hans-Arno Jacobsen. 2023. Federated Fine-Tuning of LLMs on the Very Edge: The Good, the Bad, the Ugly. arXiv preprint arXiv:2310.03150 (2023)."},{"key":"e_1_3_2_1_29_1","volume-title":"Edge Intelligence: Architectures, Challenges, and Applications. arXiv:2003.12172 [cs.NI]","author":"Xu Dianlei","year":"2020","unstructured":"Dianlei Xu, Tong Li, Yong Li, Xiang Su, Sasu Tarkoma, Tao Jiang, Jon Crowcroft, and Pan Hui. 2020. Edge Intelligence: Architectures, Challenges, and Applications. arXiv:2003.12172 [cs.NI]"},{"key":"e_1_3_2_1_30_1","volume-title":"Edgemoe: Fast on-device Inference of Moe-based Large Language Models. arXiv preprint arXiv:2308.14352","author":"Yi Rongjie","year":"2023","unstructured":"Rongjie Yi, Liwei Guo, Shiyun Wei, Ao Zhou, Shangguang Wang, and Mengwei Xu. 2023. Edgemoe: Fast on-device Inference of Moe-based Large Language Models. arXiv preprint arXiv:2308.14352 (2023)."},{"key":"e_1_3_2_1_31_1","volume-title":"Dual Grained Quantization: Efficient Fine-Grained Quantization for LLM. arXiv preprint arXiv:2310.04836","author":"Zhang Luoming","year":"2023","unstructured":"Luoming Zhang, Wen Fei, Weijia Wu, Yefei He, Zhenyu Lou, and Hong Zhou. 2023. Dual Grained Quantization: Efficient Fine-Grained Quantization for LLM. arXiv preprint arXiv:2310.04836 (2023)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2018.2858384"}],"event":{"name":"ACM SE '24: 2024 ACM Southeast Conference","location":"Marietta GA USA","acronym":"ACM SE '24","sponsor":["ACM Association for Computing Machinery"]},"container-title":["Proceedings of the 2024 ACM Southeast Conference on ZZZ"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3603287.3651205","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3603287.3651205","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,29]],"date-time":"2025-08-29T17:06:45Z","timestamp":1756487205000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3603287.3651205"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,18]]},"references-count":32,"alternative-id":["10.1145\/3603287.3651205","10.1145\/3603287"],"URL":"https:\/\/doi.org\/10.1145\/3603287.3651205","relation":{},"subject":[],"published":{"date-parts":[[2024,4,18]]},"assertion":[{"value":"2024-04-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}