{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T19:03:57Z","timestamp":1776107037724,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,3]],"date-time":"2024-06-03T00:00:00Z","timestamp":1717372800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,3]]},"DOI":"10.1145\/3662006.3662059","type":"proceedings-article","created":{"date-parts":[[2024,6,11]],"date-time":"2024-06-11T12:23:36Z","timestamp":1718108616000},"page":"1-6","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":16,"title":["Large Language Models on Mobile Devices: Measurements, Analysis, and Insights"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-5932-8258","authenticated-orcid":false,"given":"Xiang","family":"Li","sequence":"first","affiliation":[{"name":"Beijing University of Posts and Telecommunications (BUPT), China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-0209-8421","authenticated-orcid":false,"given":"Zhenyan","family":"Lu","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications (BUPT), China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2751-2500","authenticated-orcid":false,"given":"Dongqi","family":"Cai","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications (BUPT), China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9504-3702","authenticated-orcid":false,"given":"Xiao","family":"Ma","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications (BUPT), China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6271-6993","authenticated-orcid":false,"given":"Mengwei","family":"Xu","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications (BUPT), China"}]}],"member":"320","published-online":{"date-parts":[[2024,6,11]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n.d.]. Baichuan Models. https:\/\/huggingface.co\/baichuan-inc."},{"key":"e_1_3_2_1_2_1","volume-title":"d.]","year":"2023","unstructured":"[n. d.]. Koala: A Dialogue Model for Academic Research. https:\/\/bair.berkeley.edu\/blog\/2023\/04\/03\/koala\/."},{"key":"e_1_3_2_1_3_1","unstructured":"[n.d.]. Pygmalion\/Metharme 7B Model. https:\/\/huggingface.co\/PygmalionAI\/pygmalion-7b."},{"key":"e_1_3_2_1_4_1","volume-title":"d.]","year":"2023","unstructured":"[n. d.]. Vicuna: An Open-Source Chatbot Impressing GPT-4 with 90% ChatGPT Quality. https:\/\/lmsys.org\/blog\/2023-03-30-vicuna\/."},{"key":"e_1_3_2_1_5_1","unstructured":"[n.d.]. WizardLM Model. https:\/\/github.com\/nlpxucan\/WizardLM."},{"key":"e_1_3_2_1_6_1","unstructured":"2019. WinoGrande: An Adversarial Winograd Schema Challenge at Scale."},{"key":"e_1_3_2_1_7_1","unstructured":"n.d.. Android AICore. urlhttps:\/\/developer.android.com\/ml\/aicore. Accessed: 2024-04-06."},{"key":"e_1_3_2_1_8_1","unstructured":"Armen Aghajanyan Lili Yu Alexis Conneau Wei-Ning Hsu Karen Hambardzumyan et al. 2023. Scaling Laws for Generative Mixed-Modal Language Models. arXiv:2301.03728 [cs.CL]"},{"key":"e_1_3_2_1_9_1","volume-title":"SARATHI: Efficient LLM Inference by Piggybacking Decodes with Chunked Prefills. arXiv:2308.16369 [cs.LG]","author":"Agrawal Amey","year":"2023","unstructured":"Amey Agrawal, Ashish Panwar, Jayashree Mohan, Nipun Kwatra, Bhargav S. Gulavani, et al. 2023. SARATHI: Efficient LLM Inference by Piggybacking Decodes with Chunked Prefills. arXiv:2308.16369 [cs.LG]"},{"key":"e_1_3_2_1_10_1","volume-title":"Yi: Open Foundation Models by 01.AI. arXiv:2403.04652 [cs.CL]","author":"Alex Young AI","year":"2024","unstructured":"01. AI, Alex Young, Bei Chen, Chao Li, et al. 2024. Yi: Open Foundation Models by 01.AI. arXiv:2403.04652 [cs.CL]"},{"key":"e_1_3_2_1_11_1","unstructured":"Ebtesam Almazrouei Hamza Alobeidli Abdulaziz Alshamsi Alessandro Cappelli Ruxandra Cojocaru et al. 2023. Falcon-40B: an open large language model with state-of-the-art performance. (2023)."},{"key":"e_1_3_2_1_12_1","unstructured":"Jinze Bai Shuai Bai Yunfei Chu Zeyu Cui Kai Dang et al. 2023. QwenTechnical Report. arXiv preprint arXiv:2309.16609 (2023)."},{"key":"e_1_3_2_1_13_1","unstructured":"BigScience and Hugging Face. 2022. Introducing The World's Largest Open Multilingual Language Model: BLOOM. url-https:\/\/bigscience.huggingface.co\/blog\/bloom. Accessed: 2024-04-07."},{"key":"e_1_3_2_1_14_1","volume-title":"The-x: Privacy-preserving transformer inference with homomorphic encryption. arXiv preprint arXiv:2206.00216","author":"Chen Tianyu","year":"2022","unstructured":"Tianyu Chen, Hangbo Bao, Shaohan Huang, Li Dong, Binxing Jiao, et al. 2022. The-x: Privacy-preserving transformer inference with homomorphic encryption. arXiv preprint arXiv:2206.00216 (2022)."},{"key":"e_1_3_2_1_15_1","volume-title":"Efficient and Effective Text Encoding for Chinese LLaMA and Alpaca. arXiv preprint arXiv:2304.08177","author":"Cui Yiming","year":"2023","unstructured":"Yiming Cui, Ziqing Yang, and Xin Yao. 2023. Efficient and Effective Text Encoding for Chinese LLaMA and Alpaca. arXiv preprint arXiv:2304.08177 (2023). https:\/\/arxiv.org\/abs\/2304.08177"},{"key":"e_1_3_2_1_16_1","unstructured":"Databricks. 2023. Introducing MPT-7B: A New Standard for Open-Source Commercially Usable LLMs. urlhttps:\/\/www.databricks.com\/blog\/mpt-7b. Accessed: 2024-04-06."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.joule.2023.09.004"},{"key":"e_1_3_2_1_18_1","unstructured":"DeepSeek-AI Xiao Bi Deli Chen Guanting Chen et al. 2024. DeepSeek LLM: Scaling Open-Source Language Models with Longtermism. arXiv:2401.02954 [cs.CL]"},{"key":"e_1_3_2_1_19_1","unstructured":"Erich Elsen Augustus Odena Maxwell Nye Sa\u011fnak Ta\u015firlar Tri Dao et al. 2023. Releasing Persimmon-8B. https:\/\/www.adept.ai\/blog\/persimmon-8b"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","unstructured":"Thomas Mesnard Gemma Team Cassidy Hardin Robert Dadashi Surya Bhupatiraju Laurent Sifre et al. 2024. Gemma. (2024). https:\/\/doi.org\/10.34740\/KAGGLE\/M\/3301","DOI":"10.34740\/KAGGLE\/M\/3301"},{"key":"e_1_3_2_1_21_1","unstructured":"Georgi Gerganov. 2023. llama.cpp. urlhttps:\/\/github.com\/ggerganov\/llama.cpp."},{"key":"e_1_3_2_1_22_1","volume-title":"Proceedings of The 33rd International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"210","author":"Gilad-Bachrach Ran","year":"2016","unstructured":"Ran Gilad-Bachrach, Nathan Dowlin, Kim Laine, Kristin Lauter, Michael Naehrig, et al. 2016. CryptoNets: Applying Neural Networks to Encrypted Data with High Throughput and Accuracy. In Proceedings of The 33rd International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 48), Maria Florina Balcan and Kilian Q. Weinberger (Eds.). PMLR, New York, New York, USA, 201--210. https:\/\/proceedings.mlr.press\/v48\/gilad-bachrach16.html"},{"key":"e_1_3_2_1_23_1","unstructured":"Dan Hendrycks Collin Burns Steven Basart Andy Zou Mantas Mazeika et al. 2021. Measuring Massive Multitask Language Understanding. arXiv:2009.03300 [cs.CY]"},{"key":"e_1_3_2_1_24_1","volume-title":"Vigogne: French Instruction-following and Chat Models. https:\/\/github.com\/bofenghuang\/vigogne.","author":"Huang Bofeng","year":"2023","unstructured":"Bofeng Huang. 2023. Vigogne: French Instruction-following and Chat Models. https:\/\/github.com\/bofenghuang\/vigogne."},{"key":"e_1_3_2_1_25_1","unstructured":"Yuanchun Li Hao Wen Weijun Wang Xiangyu Li Yizhen Yuan et al. 2024. Personal LLM Agents: Insights and Survey about the Capability Efficiency and Security. arXiv preprint arXiv:2401.05459 (2024)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3570361.3592524"},{"key":"e_1_3_2_1_27_1","volume-title":"AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration. arXiv:2306.00978 [cs.CL]","author":"Lin Ji","year":"2023","unstructured":"Ji Lin, Jiaming Tang, Haotian Tang, Shang Yang, Xingyu Dang, et al. 2023. AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration. arXiv:2306.00978 [cs.CL]"},{"key":"e_1_3_2_1_28_1","unstructured":"mllm team. 2024. mllm. https:\/\/github.com\/UbiquitousLearning\/mllm"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2022.3148714"},{"key":"e_1_3_2_1_30_1","unstructured":"Qualcomm Technologies Inc. 2023. Snapdragon 8 Gen 3 Mobile Platform Product Brief. urlhttps:\/\/docs.qualcomm.com\/bundle\/publicresource\/87-71408-1_REV_C_Snapdragon_8_gen_3_Mobile_Platform_Product_Brief.pdf. Accessed: 2024-04-06."},{"key":"e_1_3_2_1_31_1","volume-title":"Beyond memorization: Violating privacy via inference with large language models. arXiv preprint arXiv:2310.07298","author":"Staab Robin","year":"2023","unstructured":"Robin Staab, Mark Vero, Mislav Balunovi\u0107, and Martin Vechev. 2023. Beyond memorization: Violating privacy via inference with large language models. arXiv preprint arXiv:2310.07298 (2023)."},{"key":"e_1_3_2_1_32_1","unstructured":"MLC team. 2023. MLC-LLM. https:\/\/github.com\/mlc-ai\/mlc-llm"},{"key":"e_1_3_2_1_33_1","unstructured":"Hugo Touvron Thibaut Lavril Gautier Izacard Xavier Martinet Marie-Anne Lachaux et al. 2023. LLaMA: Open and Efficient Foundation Language Models. arXiv:2302.13971 [cs.CL]"},{"key":"e_1_3_2_1_34_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi et al. 2023. Llama 2: Open Foundation and Fine-Tuned Chat Models. arXiv:2307.09288 [cs.CL]"},{"key":"e_1_3_2_1_35_1","unstructured":"Jonathan Tow Marco Bellagente Dakota Mahan and Carlos Riquelme. [n. d.]. StableLM 3B 4E1T. https:\/\/huggingface.co\/stabilityai\/stablelm-3b-4e1t"},{"key":"e_1_3_2_1_36_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones et al. 2023. Attention Is All You Need. arXiv:1706.03762 [cs.CL]"},{"key":"e_1_3_2_1_37_1","unstructured":"Jason Wei Yi Tay Rishi Bommasani Colin Raffel Barret Zoph et al. 2022. Emergent Abilities of Large Language Models. arXiv:2206.07682 [cs.CL]"},{"key":"e_1_3_2_1_38_1","volume-title":"Proceedings of the 40th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"38099","author":"Xiao Guangxuan","year":"2023","unstructured":"Guangxuan Xiao, Ji Lin, Mickael Seznec, Hao Wu, Julien Demouth, et al. 2023. SmoothQuant: Accurate and Efficient Post-Training Quantization for Large Language Models. In Proceedings of the 40th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 202), Andreas Krause, Emma Brunskill, Kyunghyun Cho, Barbara Engelhardt, Sivan Sabato, and Jonathan Scarlett (Eds.). PMLR, 38087--38099. https:\/\/proceedings.mlr.press\/v202\/xiao23c.html"},{"key":"e_1_3_2_1_39_1","volume-title":"Llmcad: Fast and scalable on-device large language model inference. arXiv preprint arXiv:2309.04255","author":"Xu Daliang","year":"2023","unstructured":"Daliang Xu, Wangsong Yin, Xin Jin, Ying Zhang, Shiyun Wei, et al. 2023. Llmcad: Fast and scalable on-device large language model inference. arXiv preprint arXiv:2309.04255 (2023)."},{"key":"e_1_3_2_1_40_1","unstructured":"Mengwei Xu Wangsong Yin Dongqi Cai Rongjie Yi Daliang Xu et al. 2024. A survey of resource-efficient llm and multimodal foundation models. arXiv preprint arXiv:2401.08092 (2024)."},{"key":"e_1_3_2_1_41_1","unstructured":"Aiyuan Yang Bin Xiao Bingning Wang Borong Zhang Ce Bian et al. 2023. Baichuan 2: Open Large-scale Language Models. arXiv:2309.10305 [cs.CL]"},{"key":"e_1_3_2_1_42_1","volume-title":"LLM as a System Service on Mobile Devices. arXiv preprint arXiv:2403.11805","author":"Yin Wangsong","year":"2024","unstructured":"Wangsong Yin, Mengwei Xu, Yuanchun Li, and Xuanzhe Liu. 2024. LLM as a System Service on Mobile Devices. arXiv preprint arXiv:2403.11805 (2024)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"crossref","unstructured":"Jinliang Yuan Chen Yang Dongqi Cai Shihe Wang Xin Yuan et al. 2024. Mobile Foundation Model as Firmware. (2024).","DOI":"10.1145\/3636534.3649361"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1472"},{"key":"e_1_3_2_1_45_1","unstructured":"Zejun Zhang Li Zhang Xin Yuan Anlan Zhang Mengwei Xu et al. 2024. A First Look at GPT Apps: Landscape and Vulnerability. arXiv preprint arXiv:2402.15105 (2024)."}],"event":{"name":"MOBISYS '24: The 22nd Annual International Conference on Mobile Systems, Applications and Services","location":"Minato-ku Tokyo Japan","acronym":"MOBISYS '24","sponsor":["SIGMOBILE ACM Special Interest Group on Mobility of Systems, Users, Data and Computing","SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the Workshop on Edge and Mobile Foundation Models"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3662006.3662059","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3662006.3662059","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T20:18:17Z","timestamp":1755980297000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3662006.3662059"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,3]]},"references-count":45,"alternative-id":["10.1145\/3662006.3662059","10.1145\/3662006"],"URL":"https:\/\/doi.org\/10.1145\/3662006.3662059","relation":{},"subject":[],"published":{"date-parts":[[2024,6,3]]},"assertion":[{"value":"2024-06-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}