{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,2]],"date-time":"2025-10-02T20:10:44Z","timestamp":1759435844005,"version":"build-2065373602"},"publisher-location":"New York, NY, USA","reference-count":100,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,9,25]],"date-time":"2025-09-25T00:00:00Z","timestamp":1758758400000},"content-version":"vor","delay-in-days":94,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2431611"],"award-info":[{"award-number":["2431611"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,23]]},"DOI":"10.1145\/3711875.3729141","type":"proceedings-article","created":{"date-parts":[[2025,10,2]],"date-time":"2025-10-02T19:30:22Z","timestamp":1759433422000},"page":"138-153","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["EdgeLoRA: An Efficient Multi-Tenant LLM Serving System on Edge Devices"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-3542-0713","authenticated-orcid":false,"given":"Zheyu","family":"Shen","sequence":"first","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Maryland College Park, College Park, MD, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4675-7733","authenticated-orcid":false,"given":"Yexiao","family":"He","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Maryland College Park, College Park, MD, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-9810-464X","authenticated-orcid":false,"given":"Ziyao","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Maryland College Park, College Park, MD, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9511-120X","authenticated-orcid":false,"given":"Yuning","family":"Zhang","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Maryland College Park, College Park, MD, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-4346-8516","authenticated-orcid":false,"given":"Guoheng","family":"Sun","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Maryland College Park, College Park, MD, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-7064-2335","authenticated-orcid":false,"given":"Wanghao","family":"Ye","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Maryland College Park, College Park, MD, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4990-1729","authenticated-orcid":false,"given":"Ang","family":"Li","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Maryland College Park, College Park, MD, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,9,25]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Marah Abdin Jyoti Aneja Hany Awadalla Ahmed Awadallah Ammar Ahmad Awan Nguyen Bach Amit Bahree Arash Bakhtiari Jianmin Bao Harkirat Behl Alon Benhaim Misha Bilenko Johan Bjorck S\u00e9bastien Bubeck Martin Cai Qin Cai Vishrav Chaudhary Dong Chen Dongdong Chen Weizhu Chen Yen-Chun Chen Yi-Ling Chen Hao Cheng Parul Chopra Xiyang Dai Matthew Dixon Ronen Eldan Victor Fragoso Jianfeng Gao Mei Gao Min Gao Amit Garg Allie Del Giorno Abhishek Goswami Suriya Gunasekar Emman Haider Junheng Hao Russell J. Hewett Wenxiang Hu Jamie Huynh Dan Iter Sam Ade Jacobs Mojan Javaheripi Xin Jin Nikos Karampatziakis Piero Kauffmann Mahoud Khademi Dongwoo Kim Young Jin Kim Lev Kurilenko James R. Lee Yin Tat Lee Yuanzhi Li Yunsheng Li Chen Liang Lars Liden Xihui Lin Zeqi Lin Ce Liu Liyuan Liu Mengchen Liu Weishung Liu Xiaodong Liu Chong Luo Piyush Madan Ali Mahmoudzadeh David Majercak Matt Mazzola Caio C\u00e9sar Teodoro Mendes Arindam Mitra Hardik Modi Anh Nguyen Brandon Norick Barun Patra Daniel Perez-Becker Thomas Portet Reid Pryzant Heyang Qin Marko Radmilac Liliang Ren Gustavo de Rosa Corby Rosset Sambudha Roy Olatunji Ruwase Olli Saarikivi Amin Saied Adil Salim Michael Santacroce Shital Shah Ning Shang Hiteshi Sharma Yelong Shen Swadheen Shukla Xia Song Masahiro Tanaka Andrea Tupini Praneetha Vaddamanu Chunyu Wang Guanhua Wang Lijuan Wang Shuohang Wang Xin Wang Yu Wang Rachel Ward Wen Wen Philipp Witte Haiping Wu Xiaoxia Wu Michael Wyatt Bin Xiao Can Xu Jiahang Xu Weijian Xu Jilong Xue Sonali Yadav Fan Yang Jianwei Yang Yifan Yang Ziyi Yang Donghan Yu Lu Yuan Chenruidong Zhang Cyril Zhang Jianwen Zhang Li Lyna Zhang Yi Zhang Yue Zhang Yunan Zhang and Xiren Zhou. 2024. Phi-3 Technical Report: A Highly Capable Language Model Locally on Your Phone. arXiv:2404.14219 [cs.CL] https:\/\/arxiv.org\/abs\/2404.14219"},{"key":"e_1_3_2_1_2_1","unstructured":"Daniel Adiwardana Minh-Thang Luong David R So Jamie Hall Noah Fiedel Romal Thoppilan Zi Yang Apoorv Kulshreshtha Gaurav Nemade Yifeng Lu et al. 2020. Towards a human-like open-domain chatbot. arXiv preprint arXiv:2001.09977 (2020)."},{"key":"e_1_3_2_1_3_1","unstructured":"Anthropic. 2023. Claude 3 Model Card. https:\/\/www.anthropic.com\/model-card-claude-3 Accessed: 2024-12-03."},{"key":"e_1_3_2_1_4_1","unstructured":"Jinze Bai Shuai Bai Yunfei Chu Zeyu Cui Kai Dang Xiaodong Deng Yang Fan Wenbin Ge Yu Han Fei Huang Binyuan Hui Luo Ji Mei Li Junyang Lin Runji Lin Dayiheng Liu Gao Liu Chengqiang Lu Keming Lu Jianxin Ma Rui Men Xingzhang Ren Xuancheng Ren Chuanqi Tan Sinan Tan Jianhong Tu Peng Wang Shijie Wang Wei Wang Shengguang Wu Benfeng Xu Jin Xu An Yang Hao Yang Jian Yang Shusheng Yang Yang Yao Bowen Yu Hongyi Yuan Zheng Yuan Jianwei Zhang Xingxuan Zhang Yichang Zhang Zhenru Zhang Chang Zhou Jingren Zhou Xiaohuan Zhou and Tianhang Zhu. 2023. Qwen Technical Report. arXiv:2309.16609 [cs.CL] https:\/\/arxiv.org\/abs\/2309.16609"},{"key":"e_1_3_2_1_5_1","volume-title":"Mansheej Paul, Philip Greengard, Connor Jennings, Daniel King, Sam Havens, Vitaliy Chiley, Jonathan Frankle, Cody Blakeney, and John P. Cunningham.","author":"Biderman Dan","year":"2024","unstructured":"Dan Biderman, Jacob Portes, Jose Javier Gonzalez Ortiz, Mansheej Paul, Philip Greengard, Connor Jennings, Daniel King, Sam Havens, Vitaliy Chiley, Jonathan Frankle, Cody Blakeney, and John P. Cunningham. 2024. LoRA Learns Less and Forgets Less. arXiv:2405.09673 [cs.LG] https:\/\/arxiv.org\/abs\/2405.09673"},{"key":"e_1_3_2_1_6_1","unstructured":"Raffaello Bonghi. 2023. Jetson-Stats. https:\/\/github.com\/rbonghi\/jetson_stats. Accessed: 2024-12-07."},{"key":"e_1_3_2_1_7_1","volume-title":"Distributed inference and fine-tuning of large language models over the internet. Advances in neural information processing systems 36","author":"Borzunov Alexander","year":"2023","unstructured":"Alexander Borzunov, Max Ryabinin, Artem Chumachenko, Dmitry Baranchuk, Tim Dettmers, Younes Belkada, Pavel Samygin, and Colin A Raffel. 2023. Distributed inference and fine-tuning of large language models over the internet. Advances in neural information processing systems 36 (2023), 12312\u201312331."},{"key":"e_1_3_2_1_8_1","volume-title":"Vincent J Della Pietra, Frederick Jelinek, John Lafferty, Robert L Mercer, and Paul S Roossin.","author":"Brown Peter F","year":"1990","unstructured":"Peter F Brown, John Cocke, Stephen A Della Pietra, Vincent J Della Pietra, Frederick Jelinek, John Lafferty, Robert L Mercer, and Paul S Roossin. 1990. A statistical approach to machine translation. Computational linguistics 16, 2 (1990), 79\u201385."},{"key":"e_1_3_2_1_9_1","unstructured":"Tom B. Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell Sandhini Agarwal Ariel Herbert-Voss Gretchen Krueger Tom Henighan Rewon Child Aditya Ramesh Daniel M. Ziegler Jeffrey Wu Clemens Winter Christopher Hesse Mark Chen Eric Sigler Mateusz Litwin Scott Gray Benjamin Chess Jack Clark Christopher Berner Sam McCandlish Alec Radford Ilya Sutskever and Dario Amodei. 2020. Language Models are Few-Shot Learners. arXiv:2005.14165 [cs.CL] https:\/\/arxiv.org\/abs\/2005.14165"},{"key":"e_1_3_2_1_10_1","unstructured":"Rickard Br\u00fcel-Gabrielsson Jiacheng Zhu Onkar Bhardwaj Leshem Choshen Kristjan Greenewald Mikhail Yurochkin and Justin Solomon. 2024. Compress then Serve: Serving Thousands of LoRA Adapters with Little Overhead. arXiv:2407.00066 [cs.DC] https:\/\/arxiv.org\/abs\/2407.00066"},{"key":"e_1_3_2_1_11_1","volume-title":"PipeInfer: Accelerating LLM Inference using Asynchronous Pipelined Speculation. arXiv preprint arXiv:2407.11798","author":"Butler Branden","year":"2024","unstructured":"Branden Butler, Sixing Yu, Arya Mazaheri, and Ali Jannesari. 2024. PipeInfer: Accelerating LLM Inference using Asynchronous Pipelined Speculation. arXiv preprint arXiv:2407.11798 (2024)."},{"key":"e_1_3_2_1_12_1","volume-title":"One-for-all: Generalized lora for parameter-efficient fine-tuning. arXiv preprint arXiv:2306.07967","author":"Chavan Arnav","year":"2023","unstructured":"Arnav Chavan, Zhuang Liu, Deepak Gupta, Eric Xing, and Zhiqiang Shen. 2023. One-for-all: Generalized lora for parameter-efficient fine-tuning. arXiv preprint arXiv:2306.07967 (2023)."},{"key":"e_1_3_2_1_13_1","volume-title":"Punica: Multi-Tenant LoRA Serving. arXiv:2310.18547 [cs.DC] https:\/\/arxiv.org\/abs\/2310.18547","author":"Chen Lequn","year":"2023","unstructured":"Lequn Chen, Zihao Ye, Yongji Wu, Danyang Zhuo, Luis Ceze, and Arvind Krishnamurthy. 2023. Punica: Multi-Tenant LoRA Serving. arXiv:2310.18547 [cs.DC] https:\/\/arxiv.org\/abs\/2310.18547"},{"key":"e_1_3_2_1_14_1","volume-title":"Longlora: Efficient fine-tuning of long-context large language models. arXiv preprint arXiv:2309.12307","author":"Chen Yukang","year":"2023","unstructured":"Yukang Chen, Shengju Qian, Haotian Tang, Xin Lai, Zhijian Liu, Song Han, and Jiaya Jia. 2023. Longlora: Efficient fine-tuning of long-context large language models. arXiv preprint arXiv:2309.12307 (2023)."},{"key":"e_1_3_2_1_15_1","first-page":"1","article-title":"Palm: Scaling language modeling with pathways","volume":"24","author":"Chowdhery Aakanksha","year":"2023","unstructured":"Aakanksha Chowdhery, Sharan Narang, Jacob Devlin, Maarten Bosma, Gaurav Mishra, Adam Roberts, Paul Barham, Hyung Won Chung, Charles Sutton, Sebastian Gehrmann, et al. 2023. Palm: Scaling language modeling with pathways. Journal of Machine Learning Research 24, 240 (2023), 1\u2013113.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_16_1","volume-title":"Qlora: Efficient finetuning of quantized llms. Advances in Neural Information Processing Systems 36","author":"Dettmers Tim","year":"2024","unstructured":"Tim Dettmers, Artidoro Pagnoni, Ari Holtzman, and Luke Zettlemoyer. 2024. Qlora: Efficient finetuning of quantized llms. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_17_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly Jakob Uszkoreit and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. arXiv:2010.11929 [cs.CV] https:\/\/arxiv.org\/abs\/2010.11929"},{"key":"e_1_3_2_1_18_1","unstructured":"Jane Dwivedi-Yu Timo Schick Zhengbao Jiang Maria Lomeli Patrick Lewis Gautier Izacard Edouard Grave Sebastian Riedel and Fabio Petroni. 2022. EditEval: An Instruction-Based Benchmark for Text Improvements. arXiv:2209.13331 [cs.CL] https:\/\/arxiv.org\/abs\/2209.13331"},{"key":"e_1_3_2_1_19_1","volume-title":"Automatic text summarization: A comprehensive survey. Expert systems with applications 165","author":"El-Kassas Wafaa S","year":"2021","unstructured":"Wafaa S El-Kassas, Cherif R Salama, Ahmed A Rafea, and Hoda K Mohamed. 2021. Automatic text summarization: A comprehensive survey. Expert systems with applications 165 (2021), 113679."},{"key":"e_1_3_2_1_20_1","unstructured":"Hugging Face. 2023. Text Generation Inference: Large Language Model Text Generation Inference. https:\/\/github.com\/huggingface\/text-generation-inference."},{"key":"e_1_3_2_1_21_1","volume-title":"International Conference on Machine Learning. PMLR, 10323\u201310337","author":"Frantar Elias","year":"2023","unstructured":"Elias Frantar and Dan Alistarh. 2023. Sparsegpt: Massive language models can be accurately pruned in one-shot. In International Conference on Machine Learning. PMLR, 10323\u201310337."},{"key":"e_1_3_2_1_22_1","volume-title":"Gptq: Accurate post-training quantization for generative pre-trained transformers. arXiv preprint arXiv:2210.17323","author":"Frantar Elias","year":"2022","unstructured":"Elias Frantar, Saleh Ashkboos, Torsten Hoefler, and Dan Alistarh. 2022. Gptq: Accurate post-training quantization for generative pre-trained transformers. arXiv preprint arXiv:2210.17323 (2022)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.10256836"},{"key":"e_1_3_2_1_24_1","unstructured":"Georgi Gerganov. 2023. llama.cpp: LLM inference in C\/C++. https:\/\/github.com\/ggerganov\/llama.cpp."},{"key":"e_1_3_2_1_25_1","unstructured":"Georgi Gerganov. 2024. ggml. https:\/\/github.com\/ggerganov\/ggml. Accessed: 2024-12-03."},{"key":"e_1_3_2_1_26_1","volume-title":"Multimodal-gpt: A vision and language model for dialogue with humans. arXiv preprint arXiv:2305.04790","author":"Gong Tao","year":"2023","unstructured":"Tao Gong, Chengqi Lyu, Shilong Zhang, Yudong Wang, Miao Zheng, Qian Zhao, Kuikun Liu, Wenwei Zhang, Ping Luo, and Kai Chen. 2023. Multimodal-gpt: A vision and language model for dialogue with humans. arXiv preprint arXiv:2305.04790 (2023)."},{"key":"e_1_3_2_1_27_1","unstructured":"Aaron Grattafiori Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman and Others. 2024. The Llama 3 Herd of Models. arXiv:2407.21783 [cs.AI] https:\/\/arxiv.org\/abs\/2407.21783"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-24797-2"},{"key":"e_1_3_2_1_29_1","volume-title":"InstructDial: Improving zero and few-shot generalization in dialogue through instruction tuning. arXiv preprint arXiv:2205.12673","author":"Gupta Prakhar","year":"2022","unstructured":"Prakhar Gupta, Cathy Jiao, Yi-Ting Yeh, Shikib Mehri, Maxine Eskenazi, and Jeffrey P Bigham. 2022. InstructDial: Improving zero and few-shot generalization in dialogue through instruction tuning. arXiv preprint arXiv:2205.12673 (2022)."},{"key":"e_1_3_2_1_30_1","unstructured":"Dan Hendrycks Collin Burns Saurav Kadavath Akul Arora Steven Basart Eric Tang Dawn Song and Jacob Steinhardt. 2021. Measuring Mathematical Problem Solving With the MATH Dataset. arXiv:2103.03874 [cs.LG] https:\/\/arxiv.org\/abs\/2103.03874"},{"key":"e_1_3_2_1_31_1","volume-title":"International conference on machine learning. PMLR, 2790\u20132799","author":"Houlsby Neil","year":"2019","unstructured":"Neil Houlsby, Andrei Giurgiu, Stanislaw Jastrzebski, Bruna Morrone, Quentin De Laroussilhe, Andrea Gesmundo, Mona Attariyan, and Sylvain Gelly. 2019. Parameter-efficient transfer learning for NLP. In International conference on machine learning. PMLR, 2790\u20132799."},{"key":"e_1_3_2_1_32_1","volume-title":"When the Edge Meets Transformers: Distributed Inference with Transformer Models. In 2024 IEEE 44th International Conference on Distributed Computing Systems (ICDCS). IEEE, 82\u201392","author":"Hu Chenghao","year":"2024","unstructured":"Chenghao Hu and Baochun Li. 2024. When the Edge Meets Transformers: Distributed Inference with Transformer Models. In 2024 IEEE 44th International Conference on Distributed Computing Systems (ICDCS). IEEE, 82\u201392."},{"key":"e_1_3_2_1_33_1","volume-title":"Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685","author":"Hu Edward J","year":"2021","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)."},{"key":"e_1_3_2_1_34_1","unstructured":"Albert Q. Jiang Alexandre Sablayrolles Antoine Roux Arthur Mensch Blanche Savary Chris Bamford Devendra Singh Chaplot Diego de las Casas Emma Bou Hanna Florian Bressand Gianna Lengyel Guillaume Bour Guillaume Lample L\u00e9lio Renard Lavaud Lucile Saulnier Marie-Anne Lachaux Pierre Stock Sandeep Subramanian Sophia Yang Szymon Antoniak Teven Le Scao Th\u00e9ophile Gervet Thibaut Lavril Thomas Wang Timoth\u00e9e Lacroix and William El Sayed. 2024. Mixtral of Experts. arXiv:2401.04088 [cs.LG] https:\/\/arxiv.org\/abs\/2401.04088"},{"key":"e_1_3_2_1_35_1","volume-title":"Lion: Adversarial distillation of proprietary large language models. arXiv preprint arXiv:2305.12870","author":"Jiang Yuxin","year":"2023","unstructured":"Yuxin Jiang, Chunkit Chan, Mingyang Chen, and Wei Wang. 2023. Lion: Adversarial distillation of proprietary large language models. arXiv preprint arXiv:2305.12870 (2023)."},{"key":"e_1_3_2_1_36_1","unstructured":"Nicolai M Josuttis. 2012. The C++ standard library: a tutorial and reference. (2012)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00276"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"e_1_3_2_1_39_1","volume-title":"18th USENIX Symposium on Operating Systems Design and Implementation (OSDI 24)","author":"Lee Wonbeom","year":"2024","unstructured":"Wonbeom Lee, Jungi Lee, Junghwan Seo, and Jaewoong Sim. 2024. {InfiniGen}: Efficient generative inference of large language models with dynamic {KV} cache management. In 18th USENIX Symposium on Operating Systems Design and Implementation (OSDI 24). 155\u2013172."},{"key":"e_1_3_2_1_40_1","volume-title":"The power of scale for parameter-efficient prompt tuning. arXiv preprint arXiv:2104.08691","author":"Lester Brian","year":"2021","unstructured":"Brian Lester, Rami Al-Rfou, and Noah Constant. 2021. The power of scale for parameter-efficient prompt tuning. arXiv preprint arXiv:2104.08691 (2021)."},{"key":"e_1_3_2_1_41_1","volume-title":"The power of scale for parameter-efficient prompt tuning. arXiv preprint arXiv:2104.08691","author":"Lester Brian","year":"2021","unstructured":"Brian Lester, Rami Al-Rfou, and Noah Constant. 2021. The power of scale for parameter-efficient prompt tuning. arXiv preprint arXiv:2104.08691 (2021)."},{"key":"e_1_3_2_1_42_1","unstructured":"Suyi Li Hanfeng Lu Tianyuan Wu Minchen Yu Qizhen Weng Xusheng Chen Yizhou Shan Binhang Yuan and Wei Wang. 2024. CaraServe: CPUAssisted and Rank-Aware LoRA Serving for Generative LLM Inference. arXiv:2401.11240 [cs.DC] https:\/\/arxiv.org\/abs\/2401.11240"},{"key":"e_1_3_2_1_43_1","volume-title":"Prefix-tuning: Optimizing continuous prompts for generation. arXiv preprint arXiv:2101.00190","author":"Li Xiang Lisa","year":"2021","unstructured":"Xiang Lisa Li and Percy Liang. 2021. Prefix-tuning: Optimizing continuous prompts for generation. arXiv preprint arXiv:2101.00190 (2021)."},{"key":"e_1_3_2_1_44_1","volume-title":"Chatdoctor: A medical chat model fine-tuned on a large language model meta-ai (llama) using medical domain knowledge. Cureus 15, 6","author":"Li Yunxiang","year":"2023","unstructured":"Yunxiang Li, Zihan Li, Kai Zhang, Ruilong Dan, Steve Jiang, and You Zhang. 2023. Chatdoctor: A medical chat model fine-tuned on a large language model meta-ai (llama) using medical domain knowledge. Cureus 15, 6 (2023)."},{"key":"e_1_3_2_1_45_1","unstructured":"Yuanchun Li Hao Wen Weijun Wang Xiangyu Li Yizhen Yuan Guohong Liu Jiacheng Liu Wenxing Xu Xiang Wang Yi Sun et al. 2024. Personal llm agents: Insights and survey about the capability efficiency and security. arXiv preprint arXiv:2401.05459 (2024)."},{"key":"e_1_3_2_1_46_1","volume-title":"17th USENIX Symposium on Operating Systems Design and Implementation (OSDI 23)","author":"Li Zhuohan","year":"2023","unstructured":"Zhuohan Li, Lianmin Zheng, Yinmin Zhong, Vincent Liu, Ying Sheng, Xin Jin, Yanping Huang, Zhifeng Chen, Hao Zhang, Joseph E Gonzalez, et al. 2023. {AlpaServe}: Statistical multiplexing with model parallelism for deep learning serving. In 17th USENIX Symposium on Operating Systems Design and Implementation (OSDI 23). 663\u2013679."},{"key":"e_1_3_2_1_47_1","volume-title":"Llmgrounded video diffusion models. arXiv preprint arXiv:2309.17444","author":"Lian Long","year":"2023","unstructured":"Long Lian, Baifeng Shi, Adam Yala, Trevor Darrell, and Boyi Li. 2023. Llmgrounded video diffusion models. arXiv preprint arXiv:2309.17444 (2023)."},{"key":"e_1_3_2_1_48_1","first-page":"87","article-title":"Awq: Activation-aware weight quantization for on-device llm compression and acceleration","volume":"6","author":"Lin Ji","year":"2024","unstructured":"Ji Lin, Jiaming Tang, Haotian Tang, Shang Yang, Wei-Ming Chen, Wei-Chen Wang, Guangxuan Xiao, Xingyu Dang, Chuang Gan, and Song Han. 2024. Awq: Activation-aware weight quantization for on-device llm compression and acceleration. Proceedings of Machine Learning and Systems 6 (2024), 87\u2013100.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_49_1","volume-title":"Goat: Fine-tuned llama outperforms gpt-4 on arithmetic tasks. arXiv preprint arXiv:2305.14201","author":"Liu Tiedong","year":"2023","unstructured":"Tiedong Liu and Bryan Kian Hsiang Low. 2023. Goat: Fine-tuned llama outperforms gpt-4 on arithmetic tasks. arXiv preprint arXiv:2305.14201 (2023)."},{"key":"e_1_3_2_1_50_1","volume-title":"Zhengxiao Du, Zhilin Yang, and Jie Tang.","author":"Liu Xiao","year":"2021","unstructured":"Xiao Liu, Kaixuan Ji, Yicheng Fu, Weng Lam Tam, Zhengxiao Du, Zhilin Yang, and Jie Tang. 2021. P-tuning v2: Prompt tuning can be comparable to fine-tuning universally across scales and tasks. arXiv preprint arXiv:2110.07602 (2021)."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1002\/widm.1434"},{"key":"e_1_3_2_1_53_1","volume-title":"Llm-pruner: On the structural pruning of large language models. Advances in neural information processing systems 36","author":"Ma Xinyin","year":"2023","unstructured":"Xinyin Ma, Gongfan Fang, and Xinchao Wang. 2023. Llm-pruner: On the structural pruning of large language models. Advances in neural information processing systems 36 (2023), 21702\u201321720."},{"key":"e_1_3_2_1_54_1","volume-title":"PEFT: State-of-the-art Parameter-Efficient Fine-Tuning methods. https:\/\/github.com\/huggingface\/peft.","author":"Mangrulkar Sourab","year":"2022","unstructured":"Sourab Mangrulkar, Sylvain Gugger, Lysandre Debut, Younes Belkada, Sayak Paul, and Benjamin Bossan. 2022. PEFT: State-of-the-art Parameter-Efficient Fine-Tuning methods. https:\/\/github.com\/huggingface\/peft."},{"key":"e_1_3_2_1_55_1","volume-title":"Qingqing Cao, Maxwell Horton, Yanzi Jin, Chenfan Sun, Iman Mirzadeh, Mahyar Najibi, Dmitry Belenko, Peter Zatloukal, and Mohammad Rastegari.","author":"Mehta Sachin","year":"2024","unstructured":"Sachin Mehta, Mohammad Hossein Sekhavat, Qingqing Cao, Maxwell Horton, Yanzi Jin, Chenfan Sun, Iman Mirzadeh, Mahyar Najibi, Dmitry Belenko, Peter Zatloukal, and Mohammad Rastegari. 2024. OpenELM: An Efficient Language Model Family with Open Training and Inference Framework. arXiv:2404.14619 [cs.CL] https:\/\/arxiv.org\/abs\/2404.14619"},{"key":"e_1_3_2_1_56_1","unstructured":"Liang Mi Weijun Wang Wenming Tu Qingfeng He Rui Kong Xinyu Fang Yazhu Dong Yikang Zhang Yunchun Li Meng Li et al. 2024. V-LoRA: An Efficient and Flexible System Boosts Vision Applications with LoRA LMM. arXiv preprint arXiv:2411.00915 (2024)."},{"key":"e_1_3_2_1_57_1","unstructured":"Microsoft. 2023. DeepSpeed-MII: DeepSpeed Model Implementations for Inference. https:\/\/github.com\/microsoft\/DeepSpeed-MII. Accessed: [Insert date of access]."},{"key":"e_1_3_2_1_58_1","unstructured":"MLC team. 2023\u20132025. MLC-LLM. https:\/\/github.com\/mlc-ai\/mlc-llm"},{"key":"e_1_3_2_1_59_1","volume-title":"Jennifer Neville, and Tara Safavi.","author":"Mysore Sheshera","year":"2023","unstructured":"Sheshera Mysore, Zhuoran Lu, Mengting Wan, Longqi Yang, Steve Menezes, Tina Baghaee, Emmanuel Barajas Gonzalez, Jennifer Neville, and Tara Safavi. 2023. Pearl: Personalizing large language model writing assistants with generation-calibrated retrievers. arXiv preprint arXiv:2311.09180 (2023)."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"crossref","unstructured":"Ramesh Nallapati Bowen Zhou Caglar Gulcehre Bing Xiang et al. 2016. Abstractive text summarization using sequence-to-sequence rnns and beyond. arXiv preprint arXiv:1602.06023 (2016).","DOI":"10.18653\/v1\/K16-1028"},{"key":"e_1_3_2_1_61_1","unstructured":"OpenAI. 2024. GPT-4 Technical Report. arXiv:2303.08774 [cs.CL] https:\/\/arxiv.org\/abs\/2303.08774"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/3697010"},{"key":"e_1_3_2_1_63_1","volume-title":"2011 17th IEEE Real-Time and Embedded Technology and Applications Symposium. IEEE, 280\u2013290","author":"Paolieri Marco","year":"2011","unstructured":"Marco Paolieri, Eduardo Qui\u00f1ones, Francisco J Cazorla, Robert I Davis, and Mateo Valero. 2011. IA\u02c6 3: An interference aware allocation algorithm for multicore hard real-time systems. In 2011 17th IEEE Real-Time and Embedded Technology and Applications Symposium. IEEE, 280\u2013290."},{"key":"e_1_3_2_1_64_1","unstructured":"Adam Paszke Sam Gross Soumith Chintala Gregory Chanan Edward Yang Zachary DeVito Zeming Lin Alban Desmaison Luca Antiga and Adam Lerer. 2017. Automatic differentiation in PyTorch. (2017)."},{"key":"e_1_3_2_1_65_1","volume-title":"Operating system concepts","author":"Peterson James L","unstructured":"James L Peterson and Abraham Silberschatz. 1985. Operating system concepts. Addison-Wesley Longman Publishing Co., Inc."},{"key":"e_1_3_2_1_66_1","volume-title":"Asa Cooper Stickland, Jackson Petty, Richard Yuanzhe Pang, Julien Dirani, Julian Michael, and Samuel R. Bowman.","author":"Rein David","year":"2023","unstructured":"David Rein, Betty Li Hou, Asa Cooper Stickland, Jackson Petty, Richard Yuanzhe Pang, Julien Dirani, Julian Michael, and Samuel R. Bowman. 2023. GPQA: A Graduate-Level Google-Proof Q&A Benchmark. arXiv:2311.12022 [cs.AI] https:\/\/arxiv.org\/abs\/2311.12022"},{"key":"e_1_3_2_1_67_1","volume-title":"Yossi Adi, Jingyu Liu, Romain Sauvestre, Tal Remez, et al.","author":"Roziere Baptiste","year":"2023","unstructured":"Baptiste Roziere, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Romain Sauvestre, Tal Remez, et al. 2023. Code llama: Open foundation models for code. arXiv preprint arXiv:2308.12950 (2023)."},{"key":"e_1_3_2_1_68_1","volume-title":"Exploiting cloze questions for few shot text classification and natural language inference. arXiv preprint arXiv:2001.07676","author":"Schick Timo","year":"2020","unstructured":"Timo Schick and Hinrich Sch\u00fctze. 2020. Exploiting cloze questions for few shot text classification and natural language inference. arXiv preprint arXiv:2001.07676 (2020)."},{"key":"e_1_3_2_1_69_1","volume-title":"2022 USENIX Annual Technical Conference (USENIX ATC 22)","author":"Shen Tianxiang","year":"2022","unstructured":"Tianxiang Shen, Ji Qi, Jianyu Jiang, Xian Wang, Siyuan Wen, Xusheng Chen, Shixiong Zhao, Sen Wang, Li Chen, Xiapu Luo, Fengwei Zhang, and Heming Cui. 2022. SOTER: Guarding Black-box Inference for General Neural Networks at the Edge. In 2022 USENIX Annual Technical Conference (USENIX ATC 22). USENIX Association, Carlsbad, CA, 723\u2013738. https:\/\/www.usenix.org\/conference\/atc22\/presentation\/shen"},{"key":"e_1_3_2_1_70_1","volume-title":"S-lora: Serving thousands of concurrent lora adapters. arXiv preprint arXiv:2311.03285","author":"Sheng Ying","year":"2023","unstructured":"Ying Sheng, Shiyi Cao, Dacheng Li, Coleman Hooper, Nicholas Lee, Shuo Yang, Christopher Chou, Banghua Zhu, Lianmin Zheng, Kurt Keutzer, et al. 2023. S-lora: Serving thousands of concurrent lora adapters. arXiv preprint arXiv:2311.03285 (2023)."},{"key":"e_1_3_2_1_71_1","volume-title":"Llasm: Large language and speech model. arXiv preprint arXiv:2308.15930","author":"Shu Yu","year":"2023","unstructured":"Yu Shu, Siwei Dong, Guangyao Chen, Wenhao Huang, Ruihua Zhang, Daochen Shi, Qiqi Xiang, and Yemin Shi. 2023. Llasm: Large language and speech model. arXiv preprint arXiv:2308.15930 (2023)."},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1145\/3694715.3695964"},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.12007"},{"key":"e_1_3_2_1_74_1","volume-title":"Llumnix: Dynamic Scheduling for Large Language Model Serving. arXiv preprint arXiv:2406.03243","author":"Sun Biao","year":"2024","unstructured":"Biao Sun, Ziming Huang, Hanyu Zhao, Wencong Xiao, Xinyi Zhang, Yong Li, and Wei Lin. 2024. Llumnix: Dynamic Scheduling for Large Language Model Serving. arXiv preprint arXiv:2406.03243 (2024)."},{"key":"e_1_3_2_1_75_1","volume-title":"Aakanksha Chowdhery, Quoc V. Le, Ed H. Chi, Denny Zhou, and Jason Wei.","author":"Suzgun Mirac","year":"2022","unstructured":"Mirac Suzgun, Nathan Scales, Nathanael Sch\u00e4rli, Sebastian Gehrmann, Yi Tay, Hyung Won Chung, Aakanksha Chowdhery, Quoc V. Le, Ed H. Chi, Denny Zhou, and Jason Wei. 2022. Challenging BIG-Bench Tasks and Whether Chain-of-Thought Can Solve Them. arXiv:2210.09261 [cs.CL] https:\/\/arxiv.org\/abs\/2210.09261"},{"key":"e_1_3_2_1_76_1","unstructured":"Zhengyang Tang Xingxing Zhang Benyou Wang and Furu Wei. 2024. MathScale: Scaling Instruction Tuning for Mathematical Reasoning. arXiv:2403.02884 [cs.CL] https:\/\/arxiv.org\/abs\/2403.02884"},{"key":"e_1_3_2_1_77_1","volume-title":"Alpaca: A strong, replicable instruction-following model","author":"Taori Rohan","year":"2023","unstructured":"Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li, Carlos Guestrin, Percy Liang, and Tatsunori B Hashimoto. 2023. Alpaca: A strong, replicable instruction-following model. Stanford Center for Research on Foundation Models. https:\/\/crfm.stanford.edu\/2023\/03\/13\/alpaca.html 3, 6 (2023), 7."},{"key":"e_1_3_2_1_78_1","volume-title":"Gemini: A Family of Highly Capable Multimodal Models. arXiv:2312.11805 [cs.CL] https:\/\/arxiv.org\/abs\/2312.11805","author":"Team Gemini","year":"2024","unstructured":"Gemini Team. 2024. Gemini: A Family of Highly Capable Multimodal Models. arXiv:2312.11805 [cs.CL] https:\/\/arxiv.org\/abs\/2312.11805"},{"key":"e_1_3_2_1_79_1","volume-title":"OpenMathInstruct-2: Accelerating AI for Math with Massive Open-Source Instruction Data. arXiv preprint arXiv:2410.01560","author":"Toshniwal Shubham","year":"2024","unstructured":"Shubham Toshniwal, Wei Du, Ivan Moshkov, Branislav Kisacanin, Alexan Ayrapetyan, and Igor Gitman. 2024. OpenMathInstruct-2: Accelerating AI for Math with Massive Open-Source Instruction Data. arXiv preprint arXiv:2410.01560 (2024)."},{"key":"e_1_3_2_1_80_1","unstructured":"Hugo Touvron Thibaut Lavril Gautier Izacard Xavier Martinet Marie-Anne Lachaux Timoth\u00e9e Lacroix Baptiste Rozi\u00e8re Naman Goyal Eric Hambro Faisal Azhar Aurelien Rodriguez Armand Joulin Edouard Grave and Guillaume Lample. 2023. LLaMA: Open and Efficient Foundation Language Models. arXiv:2302.13971 [cs.CL] https:\/\/arxiv.org\/abs\/2302.13971"},{"key":"e_1_3_2_1_81_1","volume-title":"Attention is all you need. Advances in Neural Information Processing Systems","author":"Vaswani A","year":"2017","unstructured":"A Vaswani. 2017. Attention is all you need. Advances in Neural Information Processing Systems (2017)."},{"key":"e_1_3_2_1_82_1","volume-title":"Minilm: Deep self-attention distillation for task-agnostic compression of pre-trained transformers. Advances in neural information processing systems 33","author":"Wang Wenhui","year":"2020","unstructured":"Wenhui Wang, Furu Wei, Li Dong, Hangbo Bao, Nan Yang, and Ming Zhou. 2020. Minilm: Deep self-attention distillation for task-agnostic compression of pre-trained transformers. Advances in neural information processing systems 33 (2020), 5776\u20135788."},{"key":"e_1_3_2_1_83_1","unstructured":"Yubo Wang Xueguang Ma Ge Zhang Yuansheng Ni Abhranil Chandra Shiguang Guo Weiming Ren Aaran Arulraj Xuan He Ziyan Jiang Tianle Li Max Ku Kai Wang Alex Zhuang Rongqi Fan Xiang Yue and Wenhu Chen. 2024. MMLU-Pro: A More Robust and Challenging Multi-Task Language Understanding Benchmark. arXiv:2406.01574 [cs.CL] https:\/\/arxiv.org\/abs\/2406.01574"},{"key":"e_1_3_2_1_84_1","volume-title":"Clip-gen: Language-free training of a text-to-image generator with clip. arXiv preprint arXiv:2203.00386","author":"Wang Zihao","year":"2022","unstructured":"Zihao Wang, Wei Liu, Qian He, Xinglong Wu, and Zili Yi. 2022. Clip-gen: Language-free training of a text-to-image generator with clip. arXiv preprint arXiv:2203.00386 (2022)."},{"key":"e_1_3_2_1_85_1","unstructured":"Ziyao Wang Zheyu Shen Yexiao He Guoheng Sun Hongyi Wang Lingjuan Lyu and Ang Li. 2024. FLoRA: Federated Fine-Tuning Large Language Models with Heterogeneous Low-Rank Adaptations. arXiv:2409.05976 [cs.LG] https:\/\/arxiv.org\/abs\/2409.05976"},{"key":"e_1_3_2_1_86_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jksuci.2020.05.006"},{"key":"e_1_3_2_1_87_1","volume-title":"Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations. Association for Computational Linguistics, Online, 38\u201345","author":"Wolf Thomas","year":"2020","unstructured":"Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Clement Delangue, Anthony Moi, Pierric Cistac, Tim Rault, R\u00e9mi Louf, Morgan Funtowicz, Joe Davison, Sam Shleifer, Patrick von Platen, Clara Ma, Yacine Jernite, Julien Plu, Canwen Xu, Teven Le Scao, Sylvain Gugger, Mariama Drame, Quentin Lhoest, and Alexander M. Rush. 2020. Transformers: State-of-the-Art Natural Language Processing. In Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations. Association for Computational Linguistics, Online, 38\u201345. https:\/\/www.aclweb.org\/anthology\/2020.emnlpdemos.6"},{"key":"e_1_3_2_1_88_1","volume-title":"18th USENIX Symposium on Operating Systems Design and Implementation (OSDI 24)","author":"Wu Bingyang","year":"2024","unstructured":"Bingyang Wu, Ruidong Zhu, Zili Zhang, Peng Sun, Xuanzhe Liu, and Xin Jin. 2024. {dLoRA}: Dynamically Orchestrating Requests and Adapters for {LoRA} {LLM} Serving. In 18th USENIX Symposium on Operating Systems Design and Implementation (OSDI 24). 911\u2013927."},{"key":"e_1_3_2_1_89_1","volume-title":"Structured pruning learns compact and accurate models. arXiv preprint arXiv:2204.00408","author":"Xia Mengzhou","year":"2022","unstructured":"Mengzhou Xia, Zexuan Zhong, and Danqi Chen. 2022. Structured pruning learns compact and accurate models. arXiv preprint arXiv:2204.00408 (2022)."},{"key":"e_1_3_2_1_90_1","volume-title":"International Conference on Machine Learning. PMLR, 38087\u201338099","author":"Xiao Guangxuan","year":"2023","unstructured":"Guangxuan Xiao, Ji Lin, Mickael Seznec, Hao Wu, Julien Demouth, and Song Han. 2023. Smoothquant: Accurate and efficient post-training quantization for large language models. In International Conference on Machine Learning. PMLR, 38087\u201338099."},{"key":"e_1_3_2_1_91_1","unstructured":"Shang Yang Junxian Guo Haotian Tang Qinghao Hu Guangxuan Xiao Jiaming Tang Yujun Lin Zhijian Liu Yao Lu and Song Han. 2025. LServe: Efficient Long-sequence LLM Serving with Unified Sparse Attention. arXiv:2502.14866 [cs.CL] https:\/\/arxiv.org\/abs\/2502.14866"},{"key":"e_1_3_2_1_92_1","volume-title":"Review networks for caption generation. Advances in neural information processing systems 29","author":"Yang Zhilin","year":"2016","unstructured":"Zhilin Yang, Ye Yuan, Yuexin Wu, William W Cohen, and Russ R Salakhutdinov. 2016. Review networks for caption generation. Advances in neural information processing systems 29 (2016)."},{"key":"e_1_3_2_1_93_1","doi-asserted-by":"publisher","DOI":"10.1093\/nsr\/nwae403"},{"key":"e_1_3_2_1_94_1","volume-title":"Disc-lawllm: Fine-tuning large language models for intelligent legal services. arXiv preprint arXiv:2309.11325","author":"Yue Shengbin","year":"2023","unstructured":"Shengbin Yue, Wei Chen, Siyuan Wang, Bingxuan Li, Chenchen Shen, Shujun Liu, Yuxuan Zhou, Yao Xiao, Song Yun, Xuanjing Huang, et al. 2023. Disc-lawllm: Fine-tuning large language models for intelligent legal services. arXiv preprint arXiv:2309.11325 (2023)."},{"key":"e_1_3_2_1_95_1","volume-title":"Spectr: Spectral transformer for hyperspectral pathology image segmentation. arXiv preprint arXiv:2103.03604","author":"Yun Boxiang","year":"2021","unstructured":"Boxiang Yun, Yan Wang, Jieneng Chen, Huiyu Wang, Wei Shen, and Qingli Li. 2021. Spectr: Spectral transformer for hyperspectral pathology image segmentation. arXiv preprint arXiv:2103.03604 (2021)."},{"key":"e_1_3_2_1_96_1","doi-asserted-by":"crossref","unstructured":"Duzhen Zhang Yahan Yu Jiahua Dong Chenxing Li Dan Su Chenhui Chu and Dong Yu. 2024. MM-LLMs: Recent Advances in MultiModal Large Language Models. arXiv:2401.13601 [cs.CL] https:\/\/arxiv.org\/abs\/2401.13601","DOI":"10.18653\/v1\/2024.findings-acl.738"},{"key":"e_1_3_2_1_97_1","volume-title":"Adalora: Adaptive budget allocation for parameter-efficient fine-tuning. arXiv preprint arXiv:2303.10512","author":"Zhang Qingru","year":"2023","unstructured":"Qingru Zhang, Minshuo Chen, Alexander Bukharin, Nikos Karampatziakis, Pengcheng He, Yu Cheng, Weizhu Chen, and Tuo Zhao. 2023. Adalora: Adaptive budget allocation for parameter-efficient fine-tuning. arXiv preprint arXiv:2303.10512 (2023)."},{"key":"e_1_3_2_1_98_1","volume-title":"Multi-task instruction tuning of llama for specific scenarios: A preliminary study on writing assistance. arXiv preprint arXiv:2305.13225","author":"Zhang Yue","year":"2023","unstructured":"Yue Zhang, Leyang Cui, Deng Cai, Xinting Huang, Tao Fang, and Wei Bi. 2023. Multi-task instruction tuning of llama for specific scenarios: A preliminary study on writing assistance. arXiv preprint arXiv:2305.13225 (2023)."},{"key":"e_1_3_2_1_99_1","volume-title":"Shiyi Cao, Christos Kozyrakis, Ion Stoica, Joseph E. Gonzalez, Clark Barrett, and Ying Sheng.","author":"Zheng Lianmin","year":"2024","unstructured":"Lianmin Zheng, Liangsheng Yin, Zhiqiang Xie, Chuyue Sun, Jeff Huang, Cody Hao Yu, Shiyi Cao, Christos Kozyrakis, Ion Stoica, Joseph E. Gonzalez, Clark Barrett, and Ying Sheng. 2024. SGLang: Efficient Execution of Structured Language Model Programs. arXiv:2312.07104 [cs.AI] https:\/\/arxiv.org\/abs\/2312.07104"},{"key":"e_1_3_2_1_100_1","unstructured":"Jeffrey Zhou Tianjian Lu Swaroop Mishra Siddhartha Brahma Sujoy Basu Yi Luan Denny Zhou and Le Hou. 2023. Instruction-Following Evaluation for Large Language Models. arXiv:2311.07911 [cs.CL] https:\/\/arxiv.org\/abs\/2311.07911"}],"event":{"name":"MobiSys '25: 23rd Annual International Conference on Mobile Systems, Applications and Services","location":"Hilton Anaheim Anaheim CA USA","acronym":"MobiSys '25","sponsor":["SIGMOBILE ACM Special Interest Group on Mobility of Systems, Users, Data and Computing","SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the 23rd Annual International Conference on Mobile Systems, Applications and Services"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3711875.3729141","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3711875.3729141","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,2]],"date-time":"2025-10-02T19:32:18Z","timestamp":1759433538000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3711875.3729141"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,23]]},"references-count":100,"alternative-id":["10.1145\/3711875.3729141","10.1145\/3711875"],"URL":"https:\/\/doi.org\/10.1145\/3711875.3729141","relation":{},"subject":[],"published":{"date-parts":[[2025,6,23]]},"assertion":[{"value":"2025-09-25","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}