{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T13:04:46Z","timestamp":1780664686002,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":79,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,4,26]],"date-time":"2026-04-26T00:00:00Z","timestamp":1777161600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"National Research Foundation of Korea (NRF)","award":["RS-2024-00344323"],"award-info":[{"award-number":["RS-2024-00344323"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,27]]},"DOI":"10.1145\/3767295.3803619","type":"proceedings-article","created":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T20:20:04Z","timestamp":1777062004000},"page":"1812-1828","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["viNPU: Optimizing Vision Transformer Inference on Mobile NPUs"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9035-2602","authenticated-orcid":false,"given":"Jeho","family":"Lee","sequence":"first","affiliation":[{"name":"Yonsei University, Seoul, Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2584-8741","authenticated-orcid":false,"given":"Gunjoong","family":"Kim","sequence":"additional","affiliation":[{"name":"Yonsei University, Seoul, Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-7279-6397","authenticated-orcid":false,"given":"Chanyoung","family":"Jung","sequence":"additional","affiliation":[{"name":"Yonsei University, Seoul, Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-9877-2368","authenticated-orcid":false,"given":"Jaehee","family":"Kim","sequence":"additional","affiliation":[{"name":"Yonsei University, Seoul, Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7336-6295","authenticated-orcid":false,"given":"Seonghoon","family":"Park","sequence":"additional","affiliation":[{"name":"Yonsei University, Seoul, Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9060-5091","authenticated-orcid":false,"given":"Hojung","family":"Cha","sequence":"additional","affiliation":[{"name":"Yonsei University, Seoul, Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,4,26]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"AIMET. 2025. AIMET Documentation. https:\/\/quic.github.io\/aimet-pages\/AimetDocs\/index.html."},{"key":"e_1_3_2_1_2_1","unstructured":"AIMET. 2025. Automatic mixed precision. https:\/\/quic.github.io\/aimet-pages\/AimetDocs\/techniques\/mixed_precision\/amp.html#featureguide-amp."},{"key":"e_1_3_2_1_3_1","unstructured":"Apple. 2023. Introducing Apple Vision Pro: Apple's first spatial computer. https:\/\/www.apple.com\/newsroom\/2023\/06\/introducing-apple-vision-pro\/."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","unstructured":"Yelysei Bondarenko Markus Nagel and Tijmen Blankevoort. 2021. Understanding and Overcoming the Challenges of Efficient Transformer Quantization. 7947\u20137969. 10.18653\/v1\/2021.emnlpmain.627","DOI":"10.18653\/v1\/2021.emnlpmain.627"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3731569.3764808"},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the 13th USENIX Conference on Operating Systems Design and Implementation","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Meghan Cowan, Haichen Shen, Leyuan Wang, Yuwei Hu, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. 2018. TVM: an automated end-to-end optimizing compiler for deep learning. In Proceedings of the 13th USENIX Conference on Operating Systems Design and Implementation (Carlsbad, CA, USA) (OSDI'18). USENIX Association, USA, 579\u2013594."},{"key":"e_1_3_2_1_8_1","volume-title":"Proceedings of the 38th International Conference on Neural Information Processing Systems","author":"Chen Zigeng","year":"2025","unstructured":"Zigeng Chen, Gongfan Fang, Xinyin Ma, and Xinchao Wang. 2025. SlimSAM: 0.1% data makes segment anything slim. In Proceedings of the 38th International Conference on Neural Information Processing Systems (Vancouver, BC, Canada) (NIPS '24). Curran Associates Inc., Red Hook, NY, USA, Article 1246, 28 pages."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Tri Dao Daniel Y. Fu Stefano Ermon Atri Rudra and Christopher R\u00e9. 2022. FlashAttention: Fast and Memory-Efficient Exact Attention with IO-Awareness. In Advances in Neural Information Processing Systems (NeurIPS).","DOI":"10.52202\/068431-1189"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/MRA.2021.3138384"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_12_1","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","author":"Dettmers Tim","year":"2022","unstructured":"Tim Dettmers, Mike Lewis, Younes Belkada, and Luke Zettlemoyer. 2022. LLM.int8(): 8-bit matrix multiplication for transformers at scale. In Proceedings of the 36th International Conference on Neural Information Processing Systems (New Orleans, LA, USA) (NIPS '22). Curran Associates Inc., Red Hook, NY, USA, Article 2198, 15 pages."},{"key":"e_1_3_2_1_13_1","unstructured":"Android Developers. 2025. Android Debug Bridge (adb). https:\/\/developer.android.com\/tools\/adb."},{"key":"e_1_3_2_1_14_1","unstructured":"Android Developers. 2025. BatteryManager. https:\/\/developer.android.com\/reference\/android\/os\/BatteryManager."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547826"},{"key":"e_1_3_2_1_16_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_17_1","unstructured":"FLIR. 2025. FLIR E4. https:\/\/www.flir.com\/support\/products\/e4\/#Overview."},{"key":"e_1_3_2_1_18_1","volume-title":"Proceedings of the 40th International Conference on Machine Learning","author":"Frantar Elias","year":"2023","unstructured":"Elias Frantar and Dan Alistarh. 2023. SparseGPT: massive language models can be accurately pruned in one-shot. In Proceedings of the 40th International Conference on Machine Learning (Honolulu, Hawaii, USA) (ICML'23). JMLR.org, Article 414, 15 pages."},{"key":"e_1_3_2_1_19_1","volume-title":"Model tells you what to discard: Adaptive kv cache compression for llms. arXiv preprint arXiv:2310.01801","author":"Ge Suyu","year":"2023","unstructured":"Suyu Ge, Yunan Zhang, Liyuan Liu, Minjia Zhang, Jiawei Han, and Jianfeng Gao. 2023. Model tells you what to discard: Adaptive kv cache compression for llms. arXiv preprint arXiv:2310.01801 (2023)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913491297"},{"key":"e_1_3_2_1_21_1","unstructured":"Google. 2025. LiteRT overview. https:\/\/ai.google.dev\/edge\/litert."},{"key":"e_1_3_2_1_22_1","unstructured":"Google. 2025. TPU architecture. https:\/\/cloud.google.com\/tpu\/docs\/system-architecture-tpu-vm#chips."},{"key":"e_1_3_2_1_23_1","unstructured":"HoYoverse. 2025. Genshin Impact. https:\/\/genshin.hoyoverse.com\/en\/."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3498361.3538948"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3498361.3538932"},{"key":"e_1_3_2_1_26_1","volume-title":"Proceedings of Machine Learning and Systems","author":"Jiang Xiaotang","year":"2020","unstructured":"Xiaotang Jiang, Huan Wang, Yiliu Chen, Ziqi Wu, Lichuan Wang, Bin Zou, Yafeng Yang, Zongyang Cui, Yu Cai, Tianhang Yu, et al. 2020. MNN: A universal and efficient inference engine. Proceedings of Machine Learning and Systems (2020)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3711875.3729161"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3300061.3300128"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575747"},{"key":"e_1_3_2_1_30_1","volume-title":"International conference on algorithmic learning theory. PMLR, 597\u2013619","author":"Keles Feyza Duman","year":"2023","unstructured":"Feyza Duman Keles, Pruthuvi Mahesakya Wijewardena, and Chinmay Hegde. 2023. On the computational complexity of self-attention. In International conference on algorithmic learning theory. PMLR, 597\u2013619."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3680207.3765267"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3302424.3303950"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"e_1_3_2_1_34_1","unstructured":"Alex Krizhevsky Geoffrey Hinton et al. 2009. Learning multiple layers of features from tiny images. (2009)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3636534.3690688"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3643832.3661891"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2020.3030548"},{"key":"e_1_3_2_1_38_1","volume-title":"Q-vit: Fully differentiable quantization for vision transformer. arXiv preprint arXiv:2201.07703","author":"Li Zhexin","year":"2022","unstructured":"Zhexin Li, Tong Yang, Peisong Wang, and Jian Cheng. 2022. Q-vit: Fully differentiable quantization for vision transformer. arXiv preprint arXiv:2201.07703 (2022)."},{"key":"e_1_3_2_1_39_1","volume-title":"Proceedings of Machine Learning and Systems, P. Gibbons, G. Pekhimenko, and C. De Sa (Eds.)","volume":"6","author":"Lin Ji","year":"2024","unstructured":"Ji Lin, Jiaming Tang, Haotian Tang, Shang Yang, Wei-Ming Chen, Wei-Chen Wang, Guangxuan Xiao, Xingyu Dang, Chuang Gan, and Song Han. 2024. AWQ: Activation-aware Weight Quantization for On-Device LLM Compression and Acceleration. In Proceedings of Machine Learning and Systems, P. Gibbons, G. Pekhimenko, and C. De Sa (Eds.), Vol. 6. 87\u2013100. https:\/\/proceedings.mlsys.org\/paper_files\/paper\/2024\/file\/42a452cbafa9dd64e9ba4aa95cc1ef21-Paper-Conference.pdf"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_2_1_41_1","volume-title":"Fq-vit: Post-training quantization for fully quantized vision transformer. arXiv preprint arXiv:2111.13824","author":"Lin Yang","year":"2021","unstructured":"Yang Lin, Tianyu Zhang, Peiqin Sun, Zheng Li, and Shuchang Zhou. 2021. Fq-vit: Post-training quantization for fully quantized vision transformer. arXiv preprint arXiv:2111.13824 (2021)."},{"key":"e_1_3_2_1_42_1","volume-title":"Fq-vit: Post-training quantization for fully quantized vision transformer. arXiv preprint arXiv:2111.13824","author":"Lin Yang","year":"2021","unstructured":"Yang Lin, Tianyu Zhang, Peiqin Sun, Zheng Li, and Shuchang Zhou. 2021. Fq-vit: Post-training quantization for fully quantized vision transformer. arXiv preprint arXiv:2111.13824 (2021)."},{"key":"e_1_3_2_1_43_1","volume-title":"Proceedings of the 40th International Conference on Machine Learning","author":"Liu Zichang","year":"2023","unstructured":"Zichang Liu, Jue Wang, Tri Dao, Tianyi Zhou, Binhang Yuan, Zhao Song, Anshumali Shrivastava, Ce Zhang, Yuandong Tian, Christopher R\u00e9, and Beidi Chen. 2023. Deja Vu: contextual sparsity for efficient LLMs at inference time. In Proceedings of the 40th International Conference on Machine Learning (Honolulu, Hawaii, USA) (ICML'23). JMLR.org, Article 919, 40 pages."},{"key":"e_1_3_2_1_44_1","first-page":"28092","article-title":"Post-training quantization for vision transformer","volume":"34","author":"Liu Zhenhua","year":"2021","unstructured":"Zhenhua Liu, Yunhe Wang, Kai Han, Wei Zhang, Siwei Ma, and Wen Gao. 2021. Post-training quantization for vision transformer. Advances in Neural Information Processing Systems 34 (2021), 28092\u201328103.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_45_1","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems","author":"Ma Xinyin","year":"2023","unstructured":"Xinyin Ma, Gongfan Fang, and Xinchao Wang. 2023. LLM-pruner: on the structural pruning of large language models. In Proceedings of the 37th International Conference on Neural Information Processing Systems (New Orleans, LA, USA) (NIPS '23). Curran Associates Inc., Red Hook, NY, USA, Article 950, 19 pages."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/HCS59251.2023.10254715"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.3390\/s20051446"},{"key":"e_1_3_2_1_48_1","unstructured":"MediaTek. 2025. Leading the Generative AI Technology Revolution. https:\/\/i.mediatek.com\/ai."},{"key":"e_1_3_2_1_49_1","volume-title":"Garnett (Eds.)","volume":"32","author":"Michel Paul","year":"2019","unstructured":"Paul Michel, Omer Levy, and Graham Neubig. 2019. Are Sixteen Heads Really Better than One?. In Advances in Neural Information Processing Systems, H. Wallach, H. Larochelle, A. Beygelzimer, F. d'Alch\u00e9-Buc, E. Fox, and R. Garnett (Eds.), Vol. 32. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2019\/file\/2c601ad9d2ff9bc8b282670cdd54f69f-Paper.pdf"},{"key":"e_1_3_2_1_50_1","volume-title":"Yelysei Bondarenko, Mart Van Baalen, and Tijmen Blankevoort.","author":"Nagel Markus","year":"2021","unstructured":"Markus Nagel, Marios Fournarakis, Rana Ali Amjad, Yelysei Bondarenko, Mart Van Baalen, and Tijmen Blankevoort. 2021. A white paper on neural network quantization. arXiv preprint arXiv:2106.08295 (2021)."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3453483.3454083"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3620666.3651384"},{"key":"e_1_3_2_1_53_1","unstructured":"NVIDIA. 2025. What Is Spatial Computing? https:\/\/www.nvidia.com\/en-us\/glossary\/spatial-computing\/."},{"key":"e_1_3_2_1_54_1","unstructured":"ONNX. 2025. ONNX Concepts. https:\/\/onnx.ai\/onnx\/intro\/concepts.html."},{"key":"e_1_3_2_1_55_1","unstructured":"ONNX. 2025. ONNX with Python. https:\/\/onnx.ai\/onnx\/intro\/python.html."},{"key":"e_1_3_2_1_56_1","unstructured":"Maxime Oquab Timoth\u00e9e Darcet Th\u00e9o Moutakanni Huy Vo Marc Szafraniec Vasil Khalidov Pierre Fernandez Daniel Haziza Francisco Massa Alaaeldin El-Nouby et al. 2023. Dinov2: Learning robust visual features without supervision. arXiv preprint arXiv:2304.07193 (2023)."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581791.3596851"},{"key":"e_1_3_2_1_58_1","unstructured":"Qualcomm. 2025. Qualcomm AI Engine Direct SDK. https:\/\/www.qualcomm.com\/developer\/software\/qualcomm-ai-engine-direct-sdk."},{"key":"e_1_3_2_1_59_1","volume-title":"International conference on machine learning. PmLR, 8748\u20138763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748\u20138763."},{"key":"e_1_3_2_1_60_1","unstructured":"ONNX Runtime. 2025. Graph Optimizations in ONNX Runtime. https:\/\/onnxruntime.ai\/docs\/performance\/model-optimizations\/graph-optimizations.html."},{"key":"e_1_3_2_1_61_1","unstructured":"ONNX Runtime. 2025. Welcome to ONNX Runtime. https:\/\/onnxruntime.ai\/docs\/."},{"key":"e_1_3_2_1_62_1","unstructured":"Samsung. 2025. Samsung's Pivotal Role in Pioneering On-Device Generative AI. https:\/\/semiconductor.samsung.com\/news-events\/tech-blog\/samsungs-pivotal-role-in-pioneering-on-device-generative-ai\/."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3689031.3696067"},{"key":"e_1_3_2_1_64_1","volume-title":"MAS-Attention: Memory-Aware Stream Processing for Attention Acceleration on Resource-Constrained Edge Devices. arXiv preprint arXiv:2411.17720","author":"Shakerdargah Mohammadali","year":"2024","unstructured":"Mohammadali Shakerdargah, Shan Lu, Chao Gao, and Di Niu. 2024. MAS-Attention: Memory-Aware Stream Processing for Attention Acceleration on Resource-Constrained Edge Devices. arXiv preprint arXiv:2411.17720 (2024)."},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/3694715.3695964"},{"key":"e_1_3_2_1_66_1","unstructured":"StereoLabs. 2025. ZED 2. https:\/\/www.stereolabs.com\/products\/zed-2."},{"key":"e_1_3_2_1_67_1","volume-title":"2023 USENIX Annual Technical Conference (USENIX ATC 23)","author":"Sung Hsin-Hsuan","year":"2023","unstructured":"Hsin-Hsuan Sung, Jou-An Chen, Wei Niu, Jiexiong Guan, Bin Ren, and Xipeng Shen. 2023. Decentralized {Application-Level} adaptive scheduling for {Multi-Instance}{DNNs} on open mobile devices. In 2023 USENIX Annual Technical Conference (USENIX ATC 23). 865\u2013877."},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1580"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTSS46320.2019.00042"},{"key":"e_1_3_2_1_71_1","volume-title":"Proceedings of the 40th International Conference on Machine Learning","author":"Xiao Guangxuan","year":"2023","unstructured":"Guangxuan Xiao, Ji Lin, Mickael Seznec, Hao Wu, Julien Demouth, and Song Han. 2023. SmoothQuant: accurate and efficient post-training quantization for large language models. In Proceedings of the 40th International Conference on Machine Learning (Honolulu, Hawaii, USA) (ICML'23). JMLR.org, Article 1585, 13 pages."},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1145\/3669940.3707239"},{"key":"e_1_3_2_1_73_1","volume-title":"Powerinfer-2: Fast large language model inference on a smartphone. arXiv preprint arXiv:2406.06282","author":"Xue Zhenliang","year":"2024","unstructured":"Zhenliang Xue, Yixin Song, Zeyu Mi, Xinrui Zheng, Yubin Xia, and Haibo Chen. 2024. Powerinfer-2: Fast large language model inference on a smartphone. arXiv preprint arXiv:2406.06282 (2024)."},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00987"},{"key":"e_1_3_2_1_75_1","volume-title":"Proceedings of the 38th International Conference on Neural Information Processing Systems","author":"Yang Lihe","year":"2025","unstructured":"Lihe Yang, Bingyi Kang, Zilong Huang, Zhen Zhao, Xiaogang Xu, Jiashi Feng, and Hengshuang Zhao. 2025. Depth anything V2. In Proceedings of the 38th International Conference on Neural Information Processing Systems (Vancouver, BC, Canada) (NIPS '24). Curran Associates Inc., Red Hook, NY, USA, Article 688, 37 pages."},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02170"},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19775-8_12"},{"key":"e_1_3_2_1_78_1","volume-title":"Proceedings of Machine Learning and Systems, D. Song, M. Carbin, and T. Chen (Eds.)","volume":"5","author":"Zhang Zining","year":"2023","unstructured":"Zining Zhang, Bingsheng He, and Zhenjie Zhang. 2023. Practical Edge Kernels for Integer-Only Vision Transformers Under Post-training Quantization. In Proceedings of Machine Learning and Systems, D. Song, M. Carbin, and T. Chen (Eds.), Vol. 5. Curan, 35\u201347. https:\/\/proceedings.mlsys.org\/paper_files\/paper\/2023\/file\/12f429641e5c63a9ca7fd1c5c4804d32-Paper-mlsys2023.pdf"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.544"}],"event":{"name":"EUROSYS '26: 21st European Conference on Computer Systems","location":"McEwan Hall\/The University of Edinburgh Edinburgh Scotland UK","acronym":"EUROSYS '26","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the 21st European Conference on Computer Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3767295.3803619","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T12:11:32Z","timestamp":1780661492000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3767295.3803619"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,26]]},"references-count":79,"alternative-id":["10.1145\/3767295.3803619","10.1145\/3767295"],"URL":"https:\/\/doi.org\/10.1145\/3767295.3803619","relation":{},"subject":[],"published":{"date-parts":[[2026,4,26]]},"assertion":[{"value":"2026-04-26","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}