{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T13:00:17Z","timestamp":1780664417559,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":99,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,4,26]],"date-time":"2026-04-26T00:00:00Z","timestamp":1777161600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,27]]},"DOI":"10.1145\/3767295.3769334","type":"proceedings-article","created":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T20:20:04Z","timestamp":1777062004000},"page":"657-674","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["TZ-LLM: Protecting On-Device Large Language Models with Arm TrustZone"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-9704-800X","authenticated-orcid":false,"given":"Xunjie","family":"Wang","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2725-8955","authenticated-orcid":false,"given":"Jiacheng","family":"Shi","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-0916-0913","authenticated-orcid":false,"given":"Zihan","family":"Zhao","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-5904-5115","authenticated-orcid":false,"given":"Yang","family":"Yu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2211-9120","authenticated-orcid":false,"given":"Zhichao","family":"Hua","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8112-8481","authenticated-orcid":false,"given":"Jinyu","family":"Gu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,4,26]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"A deep dive into CMA. https:\/\/lwn.net\/Articles\/486301\/."},{"key":"e_1_3_2_1_2_1","unstructured":"About the TZC-400. https:\/\/developer.arm.com\/documentation\/ddi0504\/c\/introduction\/about-the-tzc-400."},{"key":"e_1_3_2_1_3_1","unstructured":"AMD Secure Encrypted Virtualization (SEV). https:\/\/www.amd.com\/en\/developer\/sev.html."},{"key":"e_1_3_2_1_4_1","unstructured":"Apple Intelligence. https:\/\/www.apple.com\/apple-intelligence\/."},{"key":"e_1_3_2_1_5_1","unstructured":"Chatbot Arena. https:\/\/lmarena.ai."},{"key":"e_1_3_2_1_6_1","unstructured":"Galaxy AI. https:\/\/www.samsung.com\/us\/galaxy-ai\/."},{"key":"e_1_3_2_1_7_1","unstructured":"Geekbench. https:\/\/www.geekbench.com."},{"key":"e_1_3_2_1_8_1","unstructured":"HarmonyOS. https:\/\/www.harmonyos.com\/en\/."},{"key":"e_1_3_2_1_9_1","unstructured":"Intel Trust Domain Extensions (Intel TDX). https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/tools\/trust-domain-extensions\/overview.html."},{"key":"e_1_3_2_1_10_1","unstructured":"Introducing Arm Confidential Compute Architecture. https:\/\/developer.arm.com\/documentation\/den0125\/400\/Overview."},{"key":"e_1_3_2_1_11_1","unstructured":"K-Quants. https:\/\/github.com\/ggml-org\/llama.cpp\/pull\/1684."},{"key":"e_1_3_2_1_12_1","unstructured":"Llama 3. https:\/\/github.com\/meta-llama\/llama3\/blob\/main\/MODEL_CARD.md."},{"key":"e_1_3_2_1_13_1","unstructured":"LLM inference in C\/C++. https:\/\/github.com\/ggerganov\/llama.cpp."},{"key":"e_1_3_2_1_14_1","unstructured":"NVIDIA Confidential Computing. https:\/\/images.nvidia.cn\/aem-dam\/en-zz\/Solutions\/data-center\/HCC-Whitepaper-v1.0.pdf."},{"key":"e_1_3_2_1_15_1","unstructured":"OP-TEE Documentation. https:\/\/optee.readthedocs.io\/en\/latest\/index.html."},{"key":"e_1_3_2_1_16_1","unstructured":"OpenHarmony. https:\/\/gitee.com\/openharmony."},{"key":"e_1_3_2_1_17_1","unstructured":"Orange Pi 5 Plus (32GB). http:\/\/www.orangepi.org\/html\/hardWare\/computerAndMicrocontrollers\/details\/Orange-Pi-5-plus-32GB.html."},{"key":"e_1_3_2_1_18_1","unstructured":"QTI MSM NPU driver. https:\/\/android.googlesource.com\/kernel\/msm\/+\/60319ac47b3d30c81413fb0ebb9a21085a9a0be0\/drivers\/media\/platform\/msm\/npu\/."},{"key":"e_1_3_2_1_19_1","unstructured":"Qualcomm AI Hub. https:\/\/aihub.qualcomm.com\/mobile\/models."},{"key":"e_1_3_2_1_20_1","unstructured":"Qualcomm Hexagon NPU. https:\/\/www.qualcomm.com\/products\/technology\/processors\/hexagon."},{"key":"e_1_3_2_1_21_1","unstructured":"RK3588. https:\/\/www.rock-chips.com\/a\/en\/products\/RK35_Series\/2022\/0926\/1660.html."},{"key":"e_1_3_2_1_22_1","unstructured":"rknpu-driver. https:\/\/github.com\/airockchip\/rknn-llm\/tree\/main\/rknpu-driver."},{"key":"e_1_3_2_1_23_1","unstructured":"stress-ng (stress next generation). https:\/\/github.com\/ColinIanKing\/stress-ng."},{"key":"e_1_3_2_1_24_1","unstructured":"TLS\/SSL and crypto library. https:\/\/github.com\/openssl\/openssl."},{"key":"e_1_3_2_1_25_1","unstructured":"TrustZone for Cortex-A. https:\/\/www.arm.com\/technologies\/trustzone-for-cortex-a."},{"key":"e_1_3_2_1_26_1","unstructured":"Voice Assistant Celia - HUAWEI Global. https:\/\/consumer.huawei.com\/en\/emui\/celia\/."},{"key":"e_1_3_2_1_27_1","unstructured":"YOLOv5: A state-of-the-art real-time object detection system. https:\/\/docs.ultralytics.com."},{"key":"e_1_3_2_1_28_1","unstructured":"Marah I Abdin Sam Ade Jacobs Ammar Ahmad Awan Jyoti Aneja Ahmed Awadallah Hany Awadalla Nguyen Bach Amit Bahree Arash Bakhtiari Harkirat S. Behl Alon Benhaim Misha Bilenko Johan Bjorck S\u00e9bastien Bubeck Martin Cai Caio C\u00e9sar Teodoro Mendes Weizhu Chen Vishrav Chaudhary Parul Chopra Allie Del Giorno Gustavo de Rosa Matthew Dixon Ronen Eldan Dan Iter Amit Garg Abhishek Goswami Suriya Gunasekar Emman Haider Junheng Hao Russell J. Hewett Jamie Huynh Mojan Javaheripi Xin Jin Piero Kauffmann Nikos Karampatziakis Dongwoo Kim Mahoud Khademi Lev Kurilenko James R. Lee Yin Tat Lee Yuanzhi Li Chen Liang Weishung Liu Eric Lin Zeqi Lin Piyush Madan Arindam Mitra Hardik Modi Anh Nguyen Brandon Norick Barun Patra Daniel Perez-Becker Thomas Portet Reid Pryzant Heyang Qin Marko Radmilac Corby Rosset Sambudha Roy Olatunji Ruwase Olli Saarikivi Amin Saied Adil Salim Michael Santacroce Shital Shah Ning Shang Hiteshi Sharma Xia Song Masahiro Tanaka Xin Wang Rachel Ward Guanhua Wang Philipp Witte Michael Wyatt Can Xu Jiahang Xu Sonali Yadav Fan Yang Ziyi Yang Donghan Yu Chengruidong Zhang Cyril Zhang Jianwen Zhang Li Lyna Zhang Yi Zhang Yue Zhang Yunan Zhang and Xiren Zhou. Phi-3 technical report: A highly capable language model locally on your phone. CoRR abs\/2404.14219 2024."},{"key":"e_1_3_2_1_29_1","first-page":"12584","volume-title":"Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), ACL 2024","author":"Alizadeh Keivan","year":"2024","unstructured":"Keivan Alizadeh, Seyed-Iman Mirzadeh, Dmitry Belenko, S. Khatamifard, Minsik Cho, Carlo C. del Mundo, Mohammad Rastegari, and Mehrdad Farajtabar. LLM in a flash: Efficient large language model inference with limited memory. In Lun-Wei Ku, Andre Martins, and Vivek Srikumar, editors, Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), ACL 2024, Bangkok, Thailand, August 11\u201316, 2024, pages 12562\u201312584. Association for Computational Linguistics, 2024."},{"key":"e_1_3_2_1_30_1","first-page":"1090","volume-title":"30th USENIX Security Symposium, USENIX Security 2021","author":"Bahmani Raad","year":"2021","unstructured":"Raad Bahmani, Ferdinand Brasser, Ghada Dessouky, Patrick Jauernig, Matthias Klimmek, Ahmad-Reza Sadeghi, and Emmanuel Stapf. CURE: A security architecture with customizable and resilient enclaves. In Michael D. Bailey and Rachel Greenstadt, editors, 30th USENIX Security Symposium, USENIX Security 2021, August 11\u201313, 2021, pages 1073\u20131090. USENIX Association, 2021."},{"key":"e_1_3_2_1_31_1","first-page":"768","volume-title":"Proceedings of the 2023 USENIX Annual Technical Conference, USENIX ATC 2023","author":"Bergman Shai","year":"2023","unstructured":"Shai Bergman, Mark Silberstein, Takahiro Shinagawa, Peter R. Pietzuch, and Llu\u00eds Vilanova. Translation pass-through for near-native paging performance in vms. In Julia Lawall and Dan Williams, editors, Proceedings of the 2023 USENIX Annual Technical Conference, USENIX ATC 2023, Boston, MA, USA, July 10\u201312, 2023, pages 753\u2013768. USENIX Association, 2023."},{"key":"e_1_3_2_1_32_1","first-page":"874","volume-title":"25th USENIX Security Symposium (USENIX Security 16)","author":"Costan Victor","year":"2016","unstructured":"Victor Costan, Ilia Lebedev, and Srinivas Devadas. Sanctum: Minimal hardware extensions for strong software isolation. In 25th USENIX Security Symposium (USENIX Security 16), pages 857\u2013874, 2016."},{"key":"e_1_3_2_1_33_1","first-page":"783","volume-title":"Proceedings of the 2022 ACM SIGSAC Conference on Computer and Communications Security, CCS 2022","author":"Deng Yunjie","year":"2022","unstructured":"Yunjie Deng, Chenxu Wang, Shunchang Yu, Shiqing Liu, Zhenyu Ning, Kevin Leach, Jin Li, Shoumeng Yan, Zhengyu He, Jiannong Cao, and Fengwei Zhang. Strongbox: A GPU TEE on arm endpoints. In Heng Yin, Angelos Stavrou, Cas Cremers, and Elaine Shi, editors, Proceedings of the 2022 ACM SIGSAC Conference on Computer and Communications Security, CCS 2022, Los Angeles, CA, USA, November 7\u201311, 2022, pages 769\u2013783. ACM, 2022."},{"key":"e_1_3_2_1_34_1","first-page":"468","volume-title":"29th USENIX Security Symposium, USENIX Security 2020","author":"Dessouky Ghada","year":"2020","unstructured":"Ghada Dessouky, Tommaso Frassetto, and Ahmad-Reza Sadeghi. Hybcache: Hybrid side-channel-resilient caches for trusted execution environments. In Srdjan Capkun and Franziska Roesner, editors, 29th USENIX Security Symposium, USENIX Security 2020, August 12\u201314, 2020, pages 451\u2013468. USENIX Association, 2020."},{"key":"e_1_3_2_1_35_1","volume-title":"Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022","author":"Dettmers Tim","year":"2022","unstructured":"Tim Dettmers, Mike Lewis, Younes Belkada, and Luke Zettlemoyer. Gpt3.int8(): 8-bit matrix multiplication for transformers at scale. In Sanmi Koyejo, S. Mohamed, A. Agarwal, Danielle Belgrave, K. Cho, and A. Oh, editors, Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans, LA, USA, November 28 - December 9, 2022, 2022."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.183"},{"key":"e_1_3_2_1_37_1","volume-title":"Yancheng Zheng, Haoqi Wu, Derun Zhao, Jin Tan, Zhicong Huang, Cheng Hong, Tao Wei, and Wenguang Chen. Puma: Secure inference of llama-7b in five minutes","author":"Dong Ye","year":"2023","unstructured":"Ye Dong, Wen jie Lu, Yancheng Zheng, Haoqi Wu, Derun Zhao, Jin Tan, Zhicong Huang, Cheng Hong, Tao Wei, and Wenguang Chen. Puma: Secure inference of llama-7b in five minutes, 2023."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA59077.2024.00057"},{"key":"e_1_3_2_1_39_1","series-title":"Proceedings of Machine Learning Research","first-page":"10337","volume-title":"International Conference on Machine Learning, ICML","author":"Frantar Elias","year":"2023","unstructured":"Elias Frantar and Dan Alistarh. Sparsegpt: Massive language models can be accurately pruned in one-shot. In Andreas Krause, Emma Brunskill, Kyunghyun Cho, Barbara Engelhardt, Sivan Sabato, and Jonathan Scarlett, editors, International Conference on Machine Learning, ICML 2023, 23\u201329 July 2023, Honolulu, Hawaii, USA, volume 202 of Proceedings of Machine Learning Research, pages 10323\u201310337. PMLR, 2023."},{"key":"e_1_3_2_1_40_1","volume-title":"GPTQ: accurate post-training quantization for generative pre-trained transformers. CoRR, abs\/2210.17323","author":"Frantar Elias","year":"2022","unstructured":"Elias Frantar, Saleh Ashkboos, Torsten Hoefler, and Dan Alistarh. GPTQ: accurate post-training quantization for generative pre-trained transformers. CoRR, abs\/2210.17323, 2022."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.67"},{"key":"e_1_3_2_1_42_1","volume-title":"Privado: Practical and secure dnn inference with enclaves. arXiv preprint arXiv:1810.00602","author":"Grover Karan","year":"2018","unstructured":"Karan Grover, Shruti Tople, Shweta Shinde, Ranjita Bhagwan, and Ramachandran Ramjee. Privado: Practical and secure dnn inference with enclaves. arXiv preprint arXiv:1810.00602, 2018."},{"key":"e_1_3_2_1_43_1","volume-title":"The Twelfth International Conference on Learning Representations, ICLR 2024","author":"Gu Yuxian","year":"2024","unstructured":"Yuxian Gu, Li Dong, Furu Wei, and Minlie Huang. Minillm: Knowledge distillation of large language models. In The Twelfth International Conference on Learning Representations, ICLR 2024, Vienna, Austria, May 7\u201311, 2024. OpenReview.net, 2024."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3418297"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP.2016.11"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.14722\/ndss.2023.23041"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW53098.2021.00368"},{"key":"e_1_3_2_1_48_1","first-page":"700","volume-title":"16th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2022","author":"Hof Alexander Van't","year":"2022","unstructured":"Alexander Van't Hof and Jason Nieh. Blackbox: A container security monitor for protecting containers on untrusted operating systems. In Marcos K. Aguilera and Hakim Weatherspoon, editors, 16th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2022, Carlsbad, CA, USA, July 11\u201313, 2022, pages 683\u2013700. USENIX Association, 2022."},{"key":"e_1_3_2_1_49_1","volume-title":"Mobilenets: Efficient convolutional neural networks for mobile vision applications. CoRR, abs\/1704.04861","author":"Howard Andrew G.","year":"2017","unstructured":"Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, and Hartwig Adam. Mobilenets: Efficient convolutional neural networks for mobile vision applications. CoRR, abs\/1704.04861, 2017."},{"key":"e_1_3_2_1_50_1","first-page":"833","volume-title":"17th USENIX Symposium on Networked Systems Design and Implementation, NSDI 2020","author":"Hunt Tyler","year":"2020","unstructured":"Tyler Hunt, Zhipeng Jia, Vance Miller, Ariel Szekely, Yige Hu, Christopher J. Rossbach, and Emmett Witchel. Telekine: Secure computing with cloud gpus. In Ranjita Bhagwan and George Porter, editors, 17th USENIX Symposium on Networked Systems Design and Implementation, NSDI 2020, Santa Clara, CA, USA, February 25\u201327, 2020, pages 817\u2013833. USENIX Association, 2020."},{"key":"e_1_3_2_1_51_1","first-page":"164","volume-title":"Proceedings of the Thirteenth ACM Conference on Data and Application Security and Privacy, CODASPY 2023","author":"Islam Md Shihabul","year":"2023","unstructured":"Md Shihabul Islam, Mahmoud Zamani, Chung Hwan Kim, Latifur Khan, and Kevin W. Hamlen. Confidential execution of deep learning inference at the untrusted edge with ARM trustzone. In Mohamed Shehab, Maribel Fern\u00e1ndez, and Ninghui Li, editors, Proceedings of the Thirteenth ACM Conference on Data and Application Security and Privacy, CODASPY 2023, Charlotte, NC, USA, April 24\u201326, 2023, pages 153\u2013164. ACM, 2023."},{"key":"e_1_3_2_1_52_1","volume-title":"Faithful persona-based conversational dataset generation with large language models","author":"Jandaghi Pegah","year":"2023","unstructured":"Pegah Jandaghi, XiangHai Sheng, Xinyi Bai, Jay Pujara, and Hakim Sidahmed. Faithful persona-based conversational dataset generation with large language models, 2023."},{"key":"e_1_3_2_1_53_1","first-page":"468","volume-title":"Proceedings of the Twenty-Fourth International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS 2019","author":"Jang Insu","year":"2019","unstructured":"Insu Jang, Adrian Tang, Taehoon Kim, Simha Sethumadhavan, and Jaehyuk Huh. Heterogeneous isolated execution for commodity gpus. In Iris Bahar, Maurice Herlihy, Emmett Witchel, and Alvin R. Lebeck, editors, Proceedings of the Twenty-Fourth International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS 2019, Providence, RI, USA, April 13\u201317, 2019, pages 455\u2013468. ACM, 2019."},{"key":"e_1_3_2_1_54_1","first-page":"200","volume-title":"Proceedings of the 15th International Conference on Compilers, Architecture, and Synthesis for Embedded Systems, CASES 2012, part of the Eighth Embedded Systems Week, ESWeek 2012","author":"Jeong Jinkyu","year":"2012","unstructured":"Jinkyu Jeong, Hwanju Kim, Jeaho Hwang, Joonwon Lee, and Seungryoul Maeng. Daac: device-reserved memory as an eviction-based file cache. In Ahmed Jerraya, Luca P. Carloni, Vincent John Mooney III, and Rodric M. Rabbah, editors, Proceedings of the 15th International Conference on Compilers, Architecture, and Synthesis for Embedded Systems, CASES 2012, part of the Eighth Embedded Systems Week, ESWeek 2012, Tampere, Finland, October 7\u201312, 2012, pages 191\u2013200. ACM, 2012."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2025.3557971"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO56248.2022.00019"},{"key":"e_1_3_2_1_57_1","first-page":"36","volume-title":"International Symposium on Memory Management, ISMM '98, Vancouver, British Columbia, Canada, 17\u201319 October, 1998, Conference Proceedings","author":"Mark","unstructured":"Mark S. Johnstone and Paul R. Wilson. The memory fragmentation problem: Solved? In Simon L. Peyton Jones and Richard E. Jones, editors, International Symposium on Memory Management, ISMM '98, Vancouver, British Columbia, Canada, 17\u201319 October, 1998, Conference Proceedings, pages 26\u201336. ACM, 1998."},{"key":"e_1_3_2_1_58_1","first-page":"1669","volume-title":"27th USENIX Security Symposium, USENIX Security 2018","author":"Juvekar Chiraag","year":"2018","unstructured":"Chiraag Juvekar, Vinod Vaikuntanathan, and Anantha P. Chandrakasan. GAZELLE: A low latency framework for secure neural network inference. In William Enck and Adrienne Porter Felt, editors, 27th USENIX Security Symposium, USENIX Security 2018, Baltimore, MD, USA, August 15\u201317, 2018, pages 1651\u20131669. USENIX Association, 2018."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00083"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3342195.3387532"},{"key":"e_1_3_2_1_61_1","first-page":"17","volume-title":"The 25th Annual International Conference on Mobile Computing and Networking, MobiCom 2019","author":"Lee Taegyeong","year":"2019","unstructured":"Taegyeong Lee, Zhiqi Lin, Saumay Pushp, Caihua Li, Yunxin Liu, Youngki Lee, Fengyuan Xu, Chenren Xu, Lintao Zhang, and Junehwa Song. Occlumency: Privacy-preserving remote deep-learning inference using SGX. In Stephen A. Brewster, Geraldine Fitzpatrick, Anna L. Cox, and Vassilis Kostakos, editors, The 25th Annual International Conference on Mobile Computing and Networking, MobiCom 2019, Los Cabos, Mexico, October 21\u201325, 2019, pages 46:1\u201346:17. ACM, 2019."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477132.3483554"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680786"},{"key":"e_1_3_2_1_64_1","volume-title":"Proceedings of the Seventh Annual Conference on Machine Learning and Systems, MLSys 2024","author":"Lin Ji","year":"2024","unstructured":"Ji Lin, Jiaming Tang, Haotian Tang, Shang Yang, Wei-Ming Chen, Wei-Chen Wang, Guangxuan Xiao, Xingyu Dang, Chuang Gan, and Song Han. AWQ: activation-aware weight quantization for on-device LLM compression and acceleration. In Phillip B. Gibbons, Gennady Pekhimenko, and Christopher De Sa, editors, Proceedings of the Seventh Annual Conference on Machine Learning and Systems, MLSys 2024, Santa Clara, CA, USA, May 13\u201316, 2024. mlsys.org, 2024."},{"key":"e_1_3_2_1_65_1","first-page":"564","volume-title":"25th USENIX Security Symposium, USENIX Security 16","author":"Lipp Moritz","year":"2016","unstructured":"Moritz Lipp, Daniel Gruss, Raphael Spreitzer, Cl\u00e9mentine Maurice, and Stefan Mangard. Armageddon: Cache attacks on mobile devices. In Thorsten Holz and Stefan Savage, editors, 25th USENIX Security Symposium, USENIX Security 16, Austin, TX, USA, August 10\u201312, 2016, pages 549\u2013564. USENIX Association, 2016."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3450268.3453524"},{"key":"e_1_3_2_1_67_1","volume-title":"The era of 1-bit llms: All large language models are in 1.58 bits. CoRR, abs\/2402.17764","author":"Ma Shuming","year":"2024","unstructured":"Shuming Ma, Hongyu Wang, Lingxiao Ma, Lei Wang, Wenhui Wang, Shaohan Huang, Li Dong, Ruiping Wang, Jilong Xue, and Furu Wei. The era of 1-bit llms: All large language models are in 1.58 bits. CoRR, abs\/2402.17764, 2024."},{"key":"e_1_3_2_1_68_1","volume-title":"Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023","author":"Ma Xinyin","year":"2023","unstructured":"Xinyin Ma, Gongfan Fang, and Xinchao Wang. Llm-pruner: On the structural pruning of large language models. In Alice Oh, Tristan Naumann, Amir Globerson, Kate Saenko, Moritz Hardt, and Sergey Levine, editors, Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023, 2023."},{"key":"e_1_3_2_1_69_1","first-page":"172","volume-title":"17th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2023","author":"Mai Haohui","year":"2023","unstructured":"Haohui Mai, Jiacheng Zhao, Hongren Zheng, Yiyang Zhao, Zibin Liu, Mingyu Gao, Cong Wang, Huimin Cui, Xiaobing Feng, and Christos Kozyrakis. Honeycomb: Secure and efficient GPU executions via static validation. In Roxana Geambasu and Ed Nightingale, editors, 17th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2023, Boston, MA, USA, July 10\u201312, 2023, pages 155\u2013172. USENIX Association, 2023."},{"key":"e_1_3_2_1_70_1","first-page":"2522","volume-title":"29th USENIX Security Symposium (USENIX Security 20)","author":"Mishra Pratyush","unstructured":"Pratyush Mishra, Ryan Lehmkuhl, Akshayaram Srinivasan, Wenting Zheng, and Raluca Ada Popa. Delphi: A cryptographic inference service for neural networks. In 29th USENIX Security Symposium (USENIX Security 20), pages 2505\u20132522. USENIX Association, August 2020."},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1109\/CLOUD62652.2024.00028"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2018.2869169"},{"key":"e_1_3_2_1_73_1","first-page":"738","volume-title":"Proceedings of the 2022 USENIX Annual Technical Conference, USENIX ATC 2022","author":"Shen Tianxiang","year":"2022","unstructured":"Tianxiang Shen, Ji Qi, Jianyu Jiang, Xian Wang, Siyuan Wen, Xusheng Chen, Shixiong Zhao, Sen Wang, Li Chen, Xiapu Luo, Fengwei Zhang, and Heming Cui. SOTER: guarding black-box inference for general neural networks at the edge. In Jiri Schindler and Noa Zilberman, editors, Proceedings of the 2022 USENIX Annual Technical Conference, USENIX ATC 2022, Carlsbad, CA, USA, July 11\u201313, 2022, pages 723\u2013738. USENIX Association, 2022."},{"key":"e_1_3_2_1_74_1","volume-title":"ACAI: extending arm confidential computing architecture protection from cpus to accelerators. CoRR, abs\/2305.15986","author":"Sridhara Supraja","year":"2023","unstructured":"Supraja Sridhara, Andrin Bertschi, Benedict Schl\u00fcter, Mark Kuhne, Fabio Aliberti, and Shweta Shinde. ACAI: extending arm confidential computing architecture protection from cpus to accelerators. CoRR, abs\/2305.15986, 2023."},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP61157.2025.00001"},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP46215.2023.10179382"},{"key":"e_1_3_2_1_77_1","first-page":"1972","volume-title":"30th USENIX Security Symposium, USENIX Security 2021","author":"Sun Zhichuang","year":"2021","unstructured":"Zhichuang Sun, Ruimin Sun, Long Lu, and Alan Mislove. Mind your weight(s): A large-scale study on insufficient machine learning model protection in mobile apps. In Michael D. Bailey and Rachel Greenstadt, editors, 30th USENIX Security Symposium, USENIX Security 2021, August 11\u201313, 2021, pages 1955\u20131972. USENIX Association, 2021."},{"key":"e_1_3_2_1_78_1","volume-title":"Pipellm: Fast and confidential large language model services with speculative pipelined encryption. arXiv preprint arXiv:2411.03357","author":"Tan Yifan","year":"2024","unstructured":"Yifan Tan, Cheng Tan, Zeyu Mi, and Haibo Chen. Pipellm: Fast and confidential large language model services with speculative pipelined encryption. arXiv preprint arXiv:2411.03357, 2024."},{"key":"e_1_3_2_1_79_1","first-page":"696","volume-title":"13th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2018","author":"Volos Stavros","year":"2018","unstructured":"Stavros Volos, Kapil Vaswani, and Rodrigo Bruno. Graviton: Trusted execution environments on gpus. In Andrea C. Arpaci-Dusseau and Geoff Voelker, editors, 13th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2018, Carlsbad, CA, USA, October 8\u201310, 2018, pages 681\u2013696. USENIX Association, 2018."},{"key":"e_1_3_2_1_80_1","volume-title":"31st Annual Network and Distributed System Security Symposium, NDSS 2024","author":"Wang Chenxu","year":"2024","unstructured":"Chenxu Wang, Fengwei Zhang, Yunjie Deng, Kevin Leach, Jiannong Cao, Zhenyu Ning, Shoumeng Yan, and Zhengyu He. CAGE: complementing arm CCA with GPU extensions. In 31st Annual Network and Distributed System Security Symposium, NDSS 2024, San Diego, California, USA, February 26 - March 1, 2024. The Internet Society, 2024."},{"key":"e_1_3_2_1_81_1","first-page":"298","volume-title":"34th USENIX Security Symposium (USENIX Security 25)","author":"Wang Pengli","year":"2025","unstructured":"Pengli Wang, Bingyou Dong, Yifeng Cai, Zheng Zhang, Junlin Liu, Huanran Xue, Ye Wu, Yao Zhang, and Ziqi Zhang. Game of arrows: On the ({In-) Security} of weight obfuscation for {On-Device}{TEE-Shielded}{LLM} partition algorithms. In 34th USENIX Security Symposium (USENIX Security 25), pages 279\u2013298, 2025."},{"key":"e_1_3_2_1_82_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS55109.2022.00025"},{"key":"e_1_3_2_1_83_1","first-page":"557","volume-title":"Proceedings of the 30th Annual International Conference on Mobile Computing and Networking, ACM MobiCom 2024","author":"Wen Hao","year":"2024","unstructured":"Hao Wen, Yuanchun Li, Guohong Liu, Shanhui Zhao, Tao Yu, Toby Jia-Jun Li, Shiqi Jiang, Yunhao Liu, Yaqin Zhang, and Yunxin Liu. Autodroid: Llm-powered task automation in android. In Weisong Shi, Deepak Ganesan, and Nicholas D. Lane, editors, Proceedings of the 30th Annual International Conference on Mobile Computing and Networking, ACM MobiCom 2024, Washington D.C., DC, USA, November 18\u201322, 2024, pages 543\u2013557. ACM, 2024."},{"key":"e_1_3_2_1_84_1","doi-asserted-by":"publisher","DOI":"10.1145\/3620678.3624659"},{"key":"e_1_3_2_1_85_1","first-page":"462","volume-title":"Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","volume":"1","author":"Xu Daliang","year":"2025","unstructured":"Daliang Xu, Hao Zhang, Liming Yang, Ruiqi Liu, Gang Huang, Mengwei Xu, and Xuanzhe Liu. Fast on-device LLM inference with npus. In Lieven Eeckhout, Georgios Smaragdakis, Kaitai Liang, Adrian Sampson, Martha A. Kim, and Christopher J. Rossbach, editors, Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 1, ASPLOS 2025, Rotterdam, The Netherlands, 30 March 2025 - 3 April 2025, pages 445\u2013462. ACM, 2025."},{"key":"e_1_3_2_1_86_1","volume-title":"On-device language models: A comprehensive review. CoRR, abs\/2409.00088","author":"Xu Jiajun","year":"2024","unstructured":"Jiajun Xu, Zhiyuan Li, Wei Chen, Qun Wang, Xin Gao, Qi Cai, and Ziyuan Ling. On-device language models: A comprehensive review. CoRR, abs\/2409.00088, 2024."},{"key":"e_1_3_2_1_87_1","volume-title":"A survey of resource-efficient LLM and multimodal foundation models. CoRR, abs\/2401.08092","author":"Xu Mengwei","year":"2024","unstructured":"Mengwei Xu, Wangsong Yin, Dongqi Cai, Rongjie Yi, Daliang Xu, Qipeng Wang, Bingyang Wu, Yihao Zhao, Chen Yang, Shihe Wang, Qiyang Zhang, Zhenyan Lu, Li Zhang, Shangguang Wang, Yuanchun Li, Yunxin Liu, Xin Jin, and Xuanzhe Liu. A survey of resource-efficient LLM and multimodal foundation models. CoRR, abs\/2401.08092, 2024."},{"key":"e_1_3_2_1_88_1","volume-title":"Powerinfer-2: Fast large language model inference on a smartphone. CoRR, abs\/2406.06282","author":"Xue Zhenliang","year":"2024","unstructured":"Zhenliang Xue, Yixin Song, Zeyu Mi, Le Chen, Yubin Xia, and Haibo Chen. Powerinfer-2: Fast large language model inference on a smartphone. CoRR, abs\/2406.06282, 2024."},{"key":"e_1_3_2_1_89_1","volume-title":"abs\/2412.15115","author":"Yang An","year":"2024","unstructured":"An Yang, Baosong Yang, Beichen Zhang, Binyuan Hui, Bo Zheng, Bowen Yu, Chengyuan Li, Dayiheng Liu, Fei Huang, Haoran Wei, Huan Lin, Jian Yang, Jianhong Tu, Jianwei Zhang, Jianxin Yang, Jiaxi Yang, Jingren Zhou, Junyang Lin, Kai Dang, Keming Lu, Keqin Bao, Kexin Yang, Le Yu, Mei Li, Mingfeng Xue, Pei Zhang, Qin Zhu, Rui Men, Runji Lin, Tianhao Li, Tingyu Xia, Xingzhang Ren, Xuancheng Ren, Yang Fan, Yang Su, Yichang Zhang, Yu Wan, Yuqiong Liu, Zeyu Cui, Zhenru Zhang, and Zihan Qiu. Qwen2.5 technical report. CoRR, abs\/2412.15115, 2024."},{"key":"e_1_3_2_1_90_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2024.03.008"},{"key":"e_1_3_2_1_91_1","volume-title":"Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022","author":"Yao Zhewei","year":"2022","unstructured":"Zhewei Yao, Reza Yazdani Aminabadi, Minjia Zhang, Xiaoxia Wu, Conglong Li, and Yuxiong He. Zeroquant: Efficient and affordable post-training quantization for large-scale transformers. In Sanmi Koyejo, S. Mohamed, A. Agarwal, Danielle Belgrave, K. Cho, and A. Oh, editors, Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans, LA, USA, November 28 - December 9, 2022, 2022."},{"key":"e_1_3_2_1_92_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2017.10"},{"key":"e_1_3_2_1_93_1","first-page":"304","volume-title":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","volume":"2","author":"Zhang Jiyuan","year":"2024","unstructured":"Jiyuan Zhang, Weiwei Jia, Siyuan Chai, Peizhe Liu, Jongyul Kim, and Tianyin Xu. Direct memory translation for virtualized clouds. In Rajiv Gupta, Nael B. Abu-Ghazaleh, Madan Musuvathi, and Dan Tsafrir, editors, Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2, ASPLOS 2024, La Jolla, CA, USA, 27 April 2024- 1 May 2024, pages 287\u2013304. ACM, 2024."},{"key":"e_1_3_2_1_94_1","volume-title":"Thomas Hou. Truspy: Cache side-channel information leakage from the secure world on ARM devices. IACR Cryptol. ePrint Arch., page 980","author":"Zhang Ning","year":"2016","unstructured":"Ning Zhang, Kun Sun, Deborah Shands, Wenjing Lou, and Y. Thomas Hou. Truspy: Cache side-channel information leakage from the secure world on ARM devices. IACR Cryptol. ePrint Arch., page 980, 2016."},{"key":"e_1_3_2_1_95_1","volume-title":"Tinyllama: An open-source small language model","author":"Zhang Peiyuan","year":"2024","unstructured":"Peiyuan Zhang, Guangtao Zeng, Tianduo Wang, and Wei Lu. Tinyllama: An open-source small language model, 2024."},{"key":"e_1_3_2_1_96_1","volume-title":"Forty-first international conference on machine learning","author":"Zhang Zheng","year":"2024","unstructured":"Zheng Zhang, Na Wang, Ziqi Zhang, Yao Zhang, Tianyi Zhang, Jianwei Liu, and Ye Wu. Groupcover: a secure, efficient and scalable inference framework for on-device model protection based on tees. In Forty-first international conference on machine learning, 2024."},{"key":"e_1_3_2_1_97_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP54263.2024.00052"},{"key":"e_1_3_2_1_98_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP.2014.27"},{"key":"e_1_3_2_1_99_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP40000.2020.00054"}],"event":{"name":"EUROSYS '26: 21st European Conference on Computer Systems","location":"McEwan Hall\/The University of Edinburgh Edinburgh Scotland UK","acronym":"EUROSYS '26","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the 21st European Conference on Computer Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3767295.3769334","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T12:02:34Z","timestamp":1780660954000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3767295.3769334"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,26]]},"references-count":99,"alternative-id":["10.1145\/3767295.3769334","10.1145\/3767295"],"URL":"https:\/\/doi.org\/10.1145\/3767295.3769334","relation":{},"subject":[],"published":{"date-parts":[[2026,4,26]]},"assertion":[{"value":"2026-04-26","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}