{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,17]],"date-time":"2026-05-17T09:09:46Z","timestamp":1779008986127,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":110,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,5,10]],"date-time":"2026-05-10T00:00:00Z","timestamp":1778371200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2503073"],"award-info":[{"award-number":["2503073"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2047461"],"award-info":[{"award-number":["2047461"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,5,11]]},"DOI":"10.1145\/3774906.3802769","type":"proceedings-article","created":{"date-parts":[[2026,5,8]],"date-time":"2026-05-08T14:20:14Z","timestamp":1778250014000},"page":"421-437","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["EdgeTune: Efficient On-Device LLM Personalization at the Edge"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-8323-1809","authenticated-orcid":false,"given":"Zhenyu","family":"Wang","sequence":"first","affiliation":[{"name":"Department of Computer Science, University of North Carolina at Chapel Hill, Chapel Hill, NC, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7103-0573","authenticated-orcid":false,"given":"Rana Muhammad Shahroz","family":"Khan","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of North Carolina at Chapel Hill, Chapel Hill, NC, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7774-8197","authenticated-orcid":false,"given":"Tianlong","family":"Chen","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of North Carolina at Chapel Hill, Chapel Hill, NC, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1443-1146","authenticated-orcid":false,"given":"Shahriar","family":"Nirjon","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of North Carolina at Chapel Hill, Chapel Hill, NC, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2026,5,10]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"[n.d.]. 3C Battery Watcher. https:\/\/www.3c71.com\/android\/."},{"key":"e_1_3_3_2_3_2","unstructured":"[n.d.]. Huggingface Hub V1.0.https:\/\/huggingface.co\/blog\/huggingface-hub-v1"},{"key":"e_1_3_3_2_4_2","unstructured":"[n.d.]. ollama. https:\/\/docs.ollama.com\/."},{"key":"e_1_3_3_2_5_2","unstructured":"[n.d.]. TensorRT-LLM. https:\/\/docs.nvidia.com\/tensorrt-llm\/index.html."},{"key":"e_1_3_3_2_6_2","unstructured":"Marah Abdin Jyoti Aneja Harkirat Behl S\u00e9bastien Bubeck Ronen Eldan Suriya Gunasekar Michael Harrison Russell\u00a0J Hewett Mojan Javaheripi Piero Kauffmann et\u00a0al. 2024. Phi-4 technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.08905 (2024)."},{"key":"e_1_3_3_2_7_2","unstructured":"Loubna\u00a0Ben Allal Anton Lozhkov Elie Bakouch Gabriel\u00a0Mart\u00edn Bl\u00e1zquez Guilherme Penedo Lewis Tunstall Andr\u00e9s Marafioti Hynek Kydl\u00ed\u010dek Agust\u00edn\u00a0Piqueres Lajar\u00edn Vaibhav Srivastav Joshua Lochner Caleb Fahlgren Xuan-Son Nguyen Cl\u00e9mentine Fourrier Ben Burtenshaw Hugo Larcher Haojun Zhao Cyril Zakka Mathieu Morlon Colin Raffel Leandro von Werra and Thomas Wolf. 2025. SmolLM2: When Smol Goes Big \u2013 Data-Centric Training of a Small Language Model. arxiv:https:\/\/arXiv.org\/abs\/2502.02737\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2502.02737"},{"key":"e_1_3_3_2_8_2","unstructured":"Loubna\u00a0Ben Allal Anton Lozhkov Elie Bakouch Leandro von Werra and Thomas Wolf. 2024. SmolLM - blazingly fast and remarkably powerful."},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"crossref","unstructured":"Ziyan An Xia Wang Hendrik Baier Zirong Chen Abhishek Dubey Taylor\u00a0T Johnson Jonathan Sprinkle Ayan Mukhopadhyay and Meiyi Ma. 2025. Combining llms with logic-based framework to explain mcts. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2505.00610 (2025).","DOI":"10.65109\/JWWJ9232"},{"key":"e_1_3_3_2_10_2","unstructured":"Jinze Bai Shuai Bai Yunfei Chu Zeyu Cui Kai Dang Xiaodong Deng Yang Fan Wenbin Ge Yu Han Fei Huang et\u00a0al. 2023. Qwen technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2309.16609 (2023)."},{"key":"e_1_3_3_2_11_2","unstructured":"Shuai Bai Keqin Chen Xuejing Liu Jialin Wang Wenbin Ge Sibo Song Kai Dang Peng Wang Shijie Wang Jun Tang et\u00a0al. 2025. Qwen2. 5-vl technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2502.13923 (2025)."},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-51461-6_3"},{"key":"e_1_3_3_2_13_2","unstructured":"Junyi Chen Chuheng Du Renyuan Liu Shuochao Yao Dingtian Yan Jiang Liao Shengzhong Liu Fan Wu and Guihai Chen. 2025. TokenFlow: Responsive LLM Text Streaming Serving under Request Burst via Preemptive Scheduling. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2510.02758 (2025)."},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i22.34537"},{"key":"e_1_3_3_2_15_2","volume-title":"NAACL","author":"Clark Christopher","year":"2019","unstructured":"Christopher Clark, Kenton Lee, Ming-Wei Chang, Tom Kwiatkowski, Michael Collins, and Kristina Toutanova. 2019. BoolQ: Exploring the Surprising Difficulty of Natural Yes\/No Questions. In NAACL."},{"key":"e_1_3_3_2_16_2","volume-title":"Free Dolly: Introducing the World\u2019s First Truly Open Instruction-Tuned LLM","author":"Conover Mike","year":"2023","unstructured":"Mike Conover, Matt Hayes, Ankit Mathur, Jianwei Xie, Jun Wan, Sam Shah, Ali Ghodsi, Patrick Wendell, Matei Zaharia, and Reynold Xin. 2023. Free Dolly: Introducing the World\u2019s First Truly Open Instruction-Tuned LLM. https:\/\/www.databricks.com\/blog\/2023\/04\/12\/dolly-first-open-commercially-viable-instruction-tuned-llm"},{"key":"e_1_3_3_2_17_2","first-page":"177","volume-title":"Machine learning challenges workshop","author":"Dagan Ido","year":"2005","unstructured":"Ido Dagan, Oren Glickman, and Bernardo Magnini. 2005. The pascal recognising textual entailment challenge. In Machine learning challenges workshop. Springer, 177\u2013190."},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"crossref","unstructured":"Tri Dao Dan Fu Stefano Ermon Atri Rudra and Christopher R\u00e9. 2022. Flashattention: Fast and memory-efficient exact attention with io-awareness. Advances in neural information processing systems 35 (2022) 16344\u201316359.","DOI":"10.52202\/068431-1189"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"crossref","unstructured":"Tim Dettmers Artidoro Pagnoni Ari Holtzman and Luke Zettlemoyer. 2023. Qlora: Efficient finetuning of quantized llms. Advances in neural information processing systems 36 (2023) 10088\u201310115.","DOI":"10.52202\/075280-0441"},{"key":"e_1_3_3_2_20_2","volume-title":"Proceedings of the third international workshop on paraphrasing (IWP2005)","author":"Dolan William\u00a0B","year":"2005","unstructured":"William\u00a0B Dolan and Chris Brockett. 2005. Automatically constructing a corpus of sentential paraphrases. In Proceedings of the third international workshop on paraphrasing (IWP2005)."},{"key":"e_1_3_3_2_21_2","unstructured":"William Fleshman and Benjamin Van\u00a0Durme. 2024. RE-Adapt: Reverse Engineered Adaptation of Large Language Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2405.15007 (2024)."},{"key":"e_1_3_3_2_22_2","unstructured":"Elias Frantar Saleh Ashkboos Torsten Hoefler and Dan Alistarh. 2022. Gptq: Accurate post-training quantization for generative pre-trained transformers. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2210.17323 (2022)."},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.1145\/3715014.3722062"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"crossref","unstructured":"Heming Fu Weici Pan Liangkai Zhou Zeyu Zhang Zhenhua Liu and Shan Lin. 2025. MALLM: Multi-Agent Decision-Making with LLMs for Multi-User Edge-Sensor Environments. ACM SIGMETRICS Performance Evaluation Review 53 2 (2025) 3\u20138.","DOI":"10.1145\/3764944.3764947"},{"key":"e_1_3_3_2_25_2","unstructured":"Jiaxiang Geng Lunyu Zhao Yiyi Lu and Bing Luo. 2025. MobileFineTuner: A Unified End-to-End Framework for Fine-Tuning LLMs on Mobile Phones. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2512.08211 (2025)."},{"key":"e_1_3_3_2_26_2","unstructured":"Aaron Grattafiori Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur Alan Schelten Alex Vaughan et\u00a0al. 2024. The llama 3 herd of models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.21783 (2024)."},{"key":"e_1_3_3_2_27_2","unstructured":"Naibin Gu Peng Fu Xiyu Liu Ke Ma Zheng Lin and Weiping Wang. 2025. Adapt Once Thrive with Updates: Transferable Parameter-Efficient Fine-Tuning on Evolving Base Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2506.06844 (2025)."},{"key":"e_1_3_3_2_28_2","unstructured":"Simeng Han Howard Dai Stephen Xia Grant Zhang Chen Liu Lichang Chen Hoang\u00a0Huy Nguyen Hongyuan Mei Jiayuan Mao and R\u00a0Thomas McCoy. 2025. Creativity or brute force? using brainteasers as a window into the problem-solving abilities of large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2505.10844 (2025)."},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.507"},{"key":"e_1_3_3_2_30_2","volume-title":"International Conference on Learning Representations","author":"Hu Edward\u00a0J","year":"2022","unstructured":"Edward\u00a0J Hu, yelong shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2022. LoRA: Low-Rank Adaptation of Large Language Models. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=nZeVKeeFYf9"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.1145\/3715014.3722067"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2025\/900"},{"key":"e_1_3_3_2_33_2","unstructured":"Shih-Cheng Huang Pin-Zu Li Yu-Chi Hsu Kuang-Ming Chen Yu\u00a0Tung Lin Shih-Kai Hsiao Richard Tzong-Han Tsai and Hung-yi Lee. 2023. Chat vector: A simple approach to equip llms with instruction following and model alignment in new languages. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.04799 (2023)."},{"key":"e_1_3_3_2_34_2","unstructured":"Yuting Huang Leilei Ding Zhipeng Tang Tianfu Wang Xinrui Lin Wuyang Zhang Mingxiao Ma and Yanyong Zhang. 2025. A Framework for Benchmarking and Aligning Task-Planning Safety in LLM-Based Embodied Agents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2504.14650 (2025)."},{"key":"e_1_3_3_2_35_2","unstructured":"Nasir Hussain Haohan Chen Chanh Tran Philip Huang Zhuohao Li Pravir Chugh William Chen Ashish Kundu and Yuan Tian. 2025. VulBinLLM: LLM-powered Vulnerability Detection for Stripped Binaries. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2505.22010 (2025)."},{"key":"e_1_3_3_2_36_2","unstructured":"Gabriel Ilharco Marco\u00a0Tulio Ribeiro Mitchell Wortsman Suchin Gururangan Ludwig Schmidt Hannaneh Hajishirzi and Ali Farhadi. 2022. Editing models with task arithmetic. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2212.04089 (2022)."},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","DOI":"10.1145\/3715014.3724073"},{"key":"e_1_3_3_2_38_2","unstructured":"Albert\u00a0Q Jiang Alexandre Sablayrolles Antoine Roux Arthur Mensch Blanche Savary Chris Bamford Devendra\u00a0Singh Chaplot Diego de\u00a0las Casas Emma\u00a0Bou Hanna Florian Bressand et\u00a0al. 2024. Mixtral of experts. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.04088 (2024)."},{"key":"e_1_3_3_2_39_2","unstructured":"Dongfu Jiang Xiang Ren and Bill\u00a0Yuchen Lin. 2023. Llm-blender: Ensembling large language models with pairwise ranking and generative fusion. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2306.02561 (2023)."},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"crossref","unstructured":"Yiping Kang Johann Hauswald Cao Gao Austin Rovinski Trevor Mudge Jason Mars and Lingjia Tang. 2017. Neurosurgeon: Collaborative intelligence between the cloud and mobile edge. ACM SIGARCH Computer Architecture News 45 1 (2017) 615\u2013629.","DOI":"10.1145\/3093337.3037698"},{"key":"e_1_3_3_2_41_2","unstructured":"Namhoon Lee Thalaiyasingam Ajanthan and Philip\u00a0HS Torr. 2018. Snip: Single-shot network pruning based on connection sensitivity. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1810.02340 (2018)."},{"key":"e_1_3_3_2_42_2","unstructured":"Sungmin Lee Minju Kang Joonhee Lee Seungyong Lee Dongju Kim Jingi Hong Jun Shin Pei Zhang and JeongGil Ko. 2025. LLM-based Question-Answer Framework for Sensor-driven HVAC System Interaction. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2507.04748 (2025)."},{"key":"e_1_3_3_2_43_2","doi-asserted-by":"crossref","unstructured":"Brian Lester Rami Al-Rfou and Noah Constant. 2021. The power of scale for parameter-efficient prompt tuning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2104.08691 (2021).","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"e_1_3_3_2_44_2","unstructured":"Patrick Lewis Ethan Perez Aleksandra Piktus Fabio Petroni Vladimir Karpukhin Naman Goyal Heinrich K\u00fcttler Mike Lewis Wen-tau Yih Tim Rockt\u00e4schel et\u00a0al. 2020. Retrieval-augmented generation for knowledge-intensive nlp tasks. Advances in neural information processing systems 33 (2020) 9459\u20139474."},{"key":"e_1_3_3_2_45_2","unstructured":"Hao Li Asim Kadav Igor Durdanovic Hanan Samet and Hans\u00a0Peter Graf. 2016. Pruning filters for efficient convnets. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1608.08710 (2016)."},{"key":"e_1_3_3_2_46_2","doi-asserted-by":"publisher","DOI":"10.1145\/3737902.3768351"},{"key":"e_1_3_3_2_47_2","unstructured":"Shuyue\u00a0Stella Li Avinandan Bose Faeze Brahman Simon\u00a0Shaolei Du Pang\u00a0Wei Koh Maryam Fazel and Yulia Tsvetkov. 2025. Personalized Reasoning: Just-In-Time Personalization and Why LLMs Fail At It. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2510.00177 (2025)."},{"key":"e_1_3_3_2_48_2","unstructured":"Xiang\u00a0Lisa Li and Percy Liang. 2021. Prefix-tuning: Optimizing continuous prompts for generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2101.00190 (2021)."},{"key":"e_1_3_3_2_49_2","unstructured":"Wing Lian Bleys Goodson Eugene Pentland Austin Cook Chanvichet Vong and \"Teknium\". 2023. OpenOrca: An Open Dataset of GPT Augmented FLAN Reasoning Traces. https:\/\/huggingface.co\/datasets\/Open-Orca\/OpenOrca."},{"key":"e_1_3_3_2_50_2","doi-asserted-by":"publisher","DOI":"10.1145\/3711875.3734376"},{"key":"e_1_3_3_2_51_2","unstructured":"Ji Lin Jiaming Tang Haotian Tang Shang Yang Wei-Ming Chen Wei-Chen Wang Guangxuan Xiao Xingyu Dang Chuang Gan and Song Han. 2024. Awq: Activation-aware weight quantization for on-device llm compression and acceleration. Proceedings of machine learning and systems 6 (2024) 87\u2013100."},{"key":"e_1_3_3_2_52_2","unstructured":"Aixin Liu Bei Feng Bing Xue Bingxuan Wang Bochao Wu Chengda Lu Chenggang Zhao Chengqi Deng Chenyu Zhang Chong Ruan et\u00a0al. 2024. Deepseek-v3 technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.19437 (2024)."},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"crossref","unstructured":"Akide Liu Jing Liu Zizheng Pan Yefei He Gholamreza Haffari and Bohan Zhuang. 2024. Minicache: Kv cache compression in depth dimension for large language models. Advances in Neural Information Processing Systems 37 (2024) 139997\u2013140031.","DOI":"10.52202\/079017-4443"},{"key":"e_1_3_3_2_54_2","unstructured":"Che Liu Zhongwei Wan Yuqi Wang Hui Shen Haozhe Wang Kangyu Zheng Mi Zhang and Rossella Arcucci. 2024. Benchmarking and boosting radiology report generation for 3D high-resolution medical images. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.07146 (2024)."},{"key":"e_1_3_3_2_55_2","volume-title":"Forty-first International Conference on Machine Learning","author":"Liu Shih-Yang","year":"2024","unstructured":"Shih-Yang Liu, Chien-Yi Wang, Hongxu Yin, Pavlo Molchanov, Yu-Chiang\u00a0Frank Wang, Kwang-Ting Cheng, and Min-Hung Chen. 2024. Dora: Weight-decomposed low-rank adaptation. In Forty-first International Conference on Machine Learning."},{"key":"e_1_3_3_2_56_2","doi-asserted-by":"crossref","unstructured":"Xinyin Ma Gongfan Fang and Xinchao Wang. 2023. Llm-pruner: On the structural pruning of large language models. Advances in neural information processing systems 36 (2023) 21702\u201321720.","DOI":"10.52202\/075280-0950"},{"key":"e_1_3_3_2_57_2","unstructured":"Fanhang Man Huandong Wang Jianjie Fang Zhaoyi Deng Baining Zhao Xinlei Chen and Yong Li. 2025. Context-Aware Sentiment Forecasting via LLM-based Multi-Perspective Role-Playing Agents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2505.24331 (2025)."},{"key":"e_1_3_3_2_58_2","unstructured":"Bill Marino Rosco Hunter Christoph Schnabl Zubair Jamali Marinos\u00a0Emmanouil Kalpakos Mudra Kashyap Isaiah Hinton Alexa Hanson Maahum Nazir Felix Steffek et\u00a0al. 2025. AIReg-Bench: Benchmarking language models that assess AI regulation compliance. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2510.01474 (2025)."},{"key":"e_1_3_3_2_59_2","doi-asserted-by":"publisher","DOI":"10.1145\/3672202.3673752"},{"key":"e_1_3_3_2_60_2","unstructured":"Paulius Micikevicius Sharan Narang Jonah Alben Gregory Diamos Erich Elsen David Garcia Boris Ginsburg Michael Houston Oleksii Kuchaiev Ganesh Venkatesh et\u00a0al. 2017. Mixed precision training. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1710.03740 (2017)."},{"key":"e_1_3_3_2_61_2","unstructured":"Ayushi Mishra Yang Bai Priyadarshan Narayanasamy Nakul Garg and Nirupam Roy. 2025. Spatial audio processing with large language model on wearable devices. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2504.08907 (2025)."},{"key":"e_1_3_3_2_62_2","unstructured":"MLC team. 2023-2025. MLC-LLM. https:\/\/github.com\/mlc-ai\/mlc-llm."},{"key":"e_1_3_3_2_63_2","first-page":"269","volume-title":"International Conference on Computer-Human Interaction Research and Applications","author":"Muralidhar Deepa","year":"2024","unstructured":"Deepa Muralidhar, Rafik Belloum, Kathia\u00a0Mar\u00e7al de Oliveira, Ashwin Ashok, and Pardaz\u00a0Banu Mohammad. 2024. The Effect of Progressive Disclosure in the Transparency of Large Language Models. In International Conference on Computer-Human Interaction Research and Applications. Springer, 269\u2013288."},{"key":"e_1_3_3_2_64_2","first-page":"189","volume-title":"International Conference on Applications of Natural Language to Information Systems","author":"Niimi Junichiro","year":"2025","unstructured":"Junichiro Niimi. 2025. A simple ensemble strategy for llm inference: Towards more stable text classification. In International Conference on Applications of Natural Language to Information Systems. Springer, 189\u2013199."},{"key":"e_1_3_3_2_65_2","unstructured":"Erik Nijkamp Bo Pang Hiroaki Hayashi Lifu Tu Huan Wang Yingbo Zhou Silvio Savarese and Caiming Xiong. 2022. Codegen: An open large language model for code with multi-turn program synthesis. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2203.13474 (2022)."},{"key":"e_1_3_3_2_66_2","volume-title":"Tegrastats Utility","year":"2025","unstructured":"NVIDIA. 2025. Tegrastats Utility. NVIDIA. NVIDIA documentation."},{"key":"e_1_3_3_2_67_2","doi-asserted-by":"publisher","DOI":"10.1109\/SenSys-ML62579.2024.00007"},{"key":"e_1_3_3_2_68_2","doi-asserted-by":"crossref","unstructured":"Dan Peng Zhihui Fu and Jun Wang. 2024. Pocketllm: Enabling on-device fine-tuning for personalized llms. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.01031 (2024).","DOI":"10.18653\/v1\/2024.privatenlp-1.10"},{"key":"e_1_3_3_2_69_2","unstructured":"Hung\u00a0Manh Pham Jialu Tang Aaqib Saeed and Dong Ma. 2025. Q-Heart: ECG Question Answering via Knowledge-Informed Multimodal LLMs. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2505.06296 (2025)."},{"key":"e_1_3_3_2_70_2","doi-asserted-by":"publisher","DOI":"10.1145\/3708468.3711892"},{"key":"e_1_3_3_2_71_2","doi-asserted-by":"crossref","unstructured":"Yujia Qin Cheng Qian Xu Han Yankai Lin Huadong Wang Ruobing Xie Zhiyuan Liu Maosong Sun and Jie Zhou. 2023. Recyclable tuning for continual pre-training. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.08702 (2023).","DOI":"10.18653\/v1\/2023.findings-acl.723"},{"key":"e_1_3_3_2_72_2","unstructured":"Alec Radford Jeffrey Wu Rewon Child David Luan Dario Amodei Ilya Sutskever et\u00a0al. 2019. Language models are unsupervised multitask learners. OpenAI blog 1 8 (2019) 9."},{"key":"e_1_3_3_2_73_2","doi-asserted-by":"publisher","DOI":"10.1145\/3746252.3761510"},{"key":"e_1_3_3_2_74_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICICNIS64247.2024.10823332"},{"key":"e_1_3_3_2_75_2","unstructured":"Isaac Rehg. 2024. Kv-compress: Paged kv-cache compression with variable compression rates per attention head. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.00161 (2024)."},{"key":"e_1_3_3_2_76_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.naacl-long.481"},{"key":"e_1_3_3_2_77_2","unstructured":"Maria Rigaki Carlos Catania and Sebastian Garcia. 2024. Hackphyr: A local fine-tuned LLM agent for network security environments. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.11276 (2024)."},{"key":"e_1_3_3_2_78_2","doi-asserted-by":"publisher","DOI":"10.1109\/SoutheastCon56624.2025.10971638"},{"key":"e_1_3_3_2_79_2","volume-title":"The Thirteenth International Conference on Learning Representations","author":"Shahroz Rana","year":"2025","unstructured":"Rana Shahroz, Pingzhi Li, Sukwon Yun, Zhenyu Wang, Shahriar Nirjon, Chau-Wai Wong, and Tianlong Chen. 2025. PortLLM: Personalizing Evolving Large Language Models with Training-Free and Portable Model Patches. In The Thirteenth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=gyHoR6uFhU"},{"key":"e_1_3_3_2_80_2","doi-asserted-by":"crossref","unstructured":"Nikumbh\u00a0Sarthak Sham Sandip Chakraborty and Shamik Sural. 2025. Generation of Optimized Solidity Code for Machine Learning Models using LLMs. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2503.06203 (2025).","DOI":"10.1109\/ICBC64466.2025.11114660"},{"key":"e_1_3_3_2_81_2","unstructured":"Joykirat Singh Akshay Nambi and Vibhav Vineet. 2024. Exposing the Achilles\u2019 Heel: Evaluating LLMs Ability to Handle Mistakes in Mathematical Reasoning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.10834 (2024)."},{"key":"e_1_3_3_2_82_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D13-1170"},{"key":"e_1_3_3_2_83_2","doi-asserted-by":"publisher","DOI":"10.1145\/3722570.3726898"},{"key":"e_1_3_3_2_84_2","doi-asserted-by":"publisher","DOI":"10.1145\/3666025.3699374"},{"key":"e_1_3_3_2_85_2","doi-asserted-by":"publisher","DOI":"10.1145\/3746027.3755789"},{"key":"e_1_3_3_2_86_2","doi-asserted-by":"publisher","DOI":"10.1145\/3675094.3677588"},{"key":"e_1_3_3_2_87_2","unstructured":"Rohan Taori Ishaan Gulrajani Tianyi Zhang Yann Dubois Xuechen Li Carlos Guestrin Percy Liang and Tatsunori\u00a0B. Hashimoto. 2023. Stanford Alpaca: An Instruction-following LLaMA model. https:\/\/github.com\/tatsu-lab\/stanford_alpaca."},{"key":"e_1_3_3_2_88_2","unstructured":"Gemma Team Aishwarya Kamath Johan Ferret Shreya Pathak Nino Vieillard Ramona Merhej Sarah Perrin et\u00a0al. 2025. Gemma 3 Technical Report. arxiv:https:\/\/arXiv.org\/abs\/2503.19786\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2503.19786"},{"key":"e_1_3_3_2_89_2","unstructured":"Gemma Team Thomas Mesnard Cassidy Hardin Robert Dadashi Surya Bhupatiraju Shreya Pathak Laurent Sifre Morgane Rivi\u00e8re Mihir\u00a0Sanjay Kale Juliette Love et\u00a0al. 2024. Gemma: Open models based on gemini research and technology. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.08295 (2024)."},{"key":"e_1_3_3_2_90_2","unstructured":"Qwen Team. 2024. Qwen2.5: A Party of Foundation Models. https:\/\/qwenlm.github.io\/blog\/qwen2.5\/"},{"key":"e_1_3_3_2_91_2","unstructured":"Selim\u00a0Furkan Tekin Fatih Ilhan Tiansheng Huang Sihao Hu and Ling Liu. 2024. Llm-topla: Efficient llm ensemble by maximising diversity. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.03953 (2024)."},{"key":"e_1_3_3_2_92_2","unstructured":"Hugo Touvron Thibaut Lavril Gautier Izacard Xavier Martinet Marie-Anne Lachaux Timoth\u00e9e Lacroix Baptiste Rozi\u00e8re Naman Goyal Eric Hambro Faisal Azhar et\u00a0al. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2302.13971 (2023)."},{"key":"e_1_3_3_2_93_2","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan\u00a0N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_3_2_94_2","doi-asserted-by":"publisher","DOI":"10.1145\/3711875.3729132"},{"key":"e_1_3_3_2_95_2","unstructured":"Yinzhou Wang Yimeng Wang Ye Xiao Liabette Escamilla Bianca Augustine Kelly Crace Gang Zhou and Yixuan Zhang. 2025. Evaluating an LLM-Powered Chatbot for Cognitive Restructuring: Insights from Mental Health Professionals. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2501.15599 (2025)."},{"key":"e_1_3_3_2_96_2","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM55648.2025.11044645"},{"key":"e_1_3_3_2_97_2","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671897"},{"key":"e_1_3_3_2_98_2","first-page":"38087","volume-title":"International conference on machine learning","author":"Xiao Guangxuan","year":"2023","unstructured":"Guangxuan Xiao, Ji Lin, Mickael Seznec, Hao Wu, Julien Demouth, and Song Han. 2023. Smoothquant: Accurate and efficient post-training quantization for large language models. In International conference on machine learning. PMLR, 38087\u201338099."},{"key":"e_1_3_3_2_99_2","doi-asserted-by":"publisher","DOI":"10.1109\/VRW66409.2025.00464"},{"key":"e_1_3_3_2_100_2","doi-asserted-by":"publisher","DOI":"10.1145\/3722565.3727195"},{"key":"e_1_3_3_2_101_2","unstructured":"Lingling Xu Haoran Xie S\u00a0Joe Qin Xiaohui Tao and Fu\u00a0Lee Wang. 2026. Parameter-efficient fine-tuning methods for pretrained language models: A critical review and assessment. IEEE Transactions on Pattern Analysis and Machine Intelligence (2026)."},{"key":"e_1_3_3_2_102_2","doi-asserted-by":"publisher","DOI":"10.1145\/3666025.3699396"},{"key":"e_1_3_3_2_103_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-short.1"},{"key":"e_1_3_3_2_104_2","unstructured":"Peiyuan Zhang Guangtao Zeng Tianduo Wang and Wei Lu. 2024. TinyLlama: An Open-Source Small Language Model. arxiv:https:\/\/arXiv.org\/abs\/2401.02385\u00a0[cs.CL]"},{"key":"e_1_3_3_2_105_2","unstructured":"Qingru Zhang Minshuo Chen Alexander Bukharin Nikos Karampatziakis Pengcheng He Yu Cheng Weizhu Chen and Tuo Zhao. 2023. Adalora: Adaptive budget allocation for parameter-efficient fine-tuning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.10512 (2023)."},{"key":"e_1_3_3_2_106_2","doi-asserted-by":"publisher","DOI":"10.1145\/3675094.3677545"},{"key":"e_1_3_3_2_107_2","unstructured":"Susan Zhang Stephen Roller Naman Goyal Mikel Artetxe Moya Chen Shuohui Chen Christopher Dewan Mona Diab Xian Li Xi\u00a0Victoria Lin Todor Mihaylov Myle Ott Sam Shleifer Kurt Shuster Daniel Simig Punit\u00a0Singh Koura Anjali Sridhar Tianlu Wang and Luke Zettlemoyer. 2022. OPT: Open Pre-trained Transformer Language Models. arxiv:https:\/\/arXiv.org\/abs\/2205.01068\u00a0[cs.CL]"},{"key":"e_1_3_3_2_108_2","doi-asserted-by":"crossref","unstructured":"Xuechen Zhang Zijian Huang Ege\u00a0Onur Taga Carlee Joe-Wong Samet Oymak and Jiasi Chen. 2024. Efficient Contextual LLM Cascades through Budget-Constrained Policy Learning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.13082 (2024).","DOI":"10.52202\/079017-2910"},{"key":"e_1_3_3_2_109_2","unstructured":"Zhehao Zhang Ryan\u00a0A Rossi Branislav Kveton Yijia Shao Diyi Yang Hamed Zamani Franck Dernoncourt Joe Barrow Tong Yu Sungchul Kim et\u00a0al. 2024. Personalization of large language models: A survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.00027 (2024)."},{"key":"e_1_3_3_2_110_2","first-page":"5530","volume-title":"Proceedings of the 31st International Conference on Computational Linguistics","author":"Zhou Hongyun","year":"2025","unstructured":"Hongyun Zhou, Xiangyu Lu, Wang Xu, Conghui Zhu, Tiejun Zhao, and Muyun Yang. 2025. LoRA-drop: Efficient LoRA Parameter Pruning based on Output Evaluation. In Proceedings of the 31st International Conference on Computational Linguistics, Owen Rambow, Leo Wanner, Marianna Apidianaki, Hend Al-Khalifa, Barbara\u00a0Di Eugenio, and Steven Schockaert (Eds.). Association for Computational Linguistics, Abu Dhabi, UAE, 5530\u20135543. https:\/\/aclanthology.org\/2025.coling-main.371\/"},{"key":"e_1_3_3_2_111_2","unstructured":"Thomas\u00a0P Zollo Andrew Wei\u00a0Tung Siah Naimeng Ye Ang Li and Hongseok Namkoong. 2024. Personalllm: Tailoring llms to individual preferences. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.20296 (2024)."}],"event":{"name":"SenSys '26: ACM\/IEEE International Conference on Embedded Artificial Intelligence and Sensing Systems","location":"Saint Malo France","acronym":"SenSys '26","sponsor":["SIGBED ACM Special Interest Group on Embedded Systems","SIGMOBILE ACM Special Interest Group on Mobility of Systems, Users, Data and Computing","IEEE CS"]},"container-title":["Proceedings of the 2026 ACM\/IEEE International Conference on Embedded Artificial Intelligence and Sensing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3774906.3802769","content-type":"text\/html","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3774906.3802769","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3774906.3802769","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,17]],"date-time":"2026-05-17T08:34:40Z","timestamp":1779006880000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3774906.3802769"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5,10]]},"references-count":110,"alternative-id":["10.1145\/3774906.3802769","10.1145\/3774906"],"URL":"https:\/\/doi.org\/10.1145\/3774906.3802769","relation":{},"subject":[],"published":{"date-parts":[[2026,5,10]]},"assertion":[{"value":"2026-05-10","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}