{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,16]],"date-time":"2026-04-16T19:05:17Z","timestamp":1776366317642,"version":"3.51.2"},"reference-count":22,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,4,28]],"date-time":"2025-04-28T00:00:00Z","timestamp":1745798400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,4,28]],"date-time":"2025-04-28T00:00:00Z","timestamp":1745798400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100005320","name":"Xidian University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100005320","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,4,28]]},"DOI":"10.1109\/aicas64808.2025.11173155","type":"proceedings-article","created":{"date-parts":[[2025,9,25]],"date-time":"2025-09-25T17:52:35Z","timestamp":1758822755000},"page":"1-5","source":"Crossref","is-referenced-by-count":1,"title":["Optimizing Inference Performance for Large Language Models on ARMv9 Architecture"],"prefix":"10.1109","author":[{"given":"Longhao","family":"Chen","sequence":"first","affiliation":[{"name":"Hangzhou Dianzi University,Zhuoyue Honors College,Hangzhou,China"}]},{"given":"Cheng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Xidian University,Hangzhou Institute of Technology,Hangzhou,China"}]},{"given":"Huiyuan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Hangzhou Dianzi University,Zhuoyue Honors College,Hangzhou,China"}]},{"given":"Chi","family":"Wang","sequence":"additional","affiliation":[{"name":"Hangzhou Dianzi University,School of Automation,Hangzhou,China"}]},{"given":"Yina","family":"Zhao","sequence":"additional","affiliation":[{"name":"Wuhan University,School of Geodesy and Geomatics,Wuhan,China"}]},{"given":"Xiaoxi","family":"Li","sequence":"additional","affiliation":[{"name":"Xidian University,Hangzhou Institute of Technology,Hangzhou,China"}]},{"given":"Xiguang","family":"Wu","sequence":"additional","affiliation":[{"name":"Xidian University,Hangzhou Institute of Technology,Hangzhou,China"}]},{"given":"Wei","family":"Mao","sequence":"additional","affiliation":[{"name":"Xidian University,Hangzhou Institute of Technology,Hangzhou,China"}]},{"given":"Genquan","family":"Han","sequence":"additional","affiliation":[{"name":"Xidian University,Hangzhou Institute of Technology,Hangzhou,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2979670"},{"key":"ref2","author":"Shoeybi","year":"2019","journal-title":"Megatron-lm: Training multi-billion parameter language models using model parallelism"},{"key":"ref3","first-page":"6265","article-title":"BASE layers: Simplifying training of large, sparse models","volume-title":"Proceedings of the 38th International Conference on Machine Learning, ICML","volume":"139","author":"Lewis"},{"key":"ref4","article-title":"OPTQ: accurate quantization for generative pre-trained transformers","volume-title":"The Eleventh International Conference on Learning Representations, ICLR 2023, Kigali, Rwanda, May 1-5, 2023","author":"Frantar"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2022.3210069"},{"key":"ref6","author":"Hooper","year":"2024","journal-title":"Kvquant: Towards 10 million context length llm inference with kv cache quantization"},{"key":"ref7","article-title":"Model compression and efficient inference for large language models: A survey","author":"Wang","year":"2024"},{"key":"ref8","article-title":"Training llms over neurally compressed text","author":"Lester","year":"2024"},{"key":"ref9","article-title":"Yitian 710"},{"key":"ref10","article-title":"The lottery ticket hypothesis: Finding sparse, trainable neural networks","volume-title":"7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, May 6-9, 2019","author":"Frankle"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-023-00626-4"},{"key":"ref12","author":"Han","year":"2015","journal-title":"Deep compression: Compressing deep neural network with pruning, trained quantization and huffman coding"},{"key":"ref13","article-title":"llama.cpp\/ggml-common.h at 132f55 \u00b7 ggerganov\/llama.cpp \u2014 github.com","author":"Gerganov"},{"key":"ref14","year":"2023","journal-title":"Arm Neoverse V2 Core Software Optimization Guide"},{"key":"ref15","volume-title":"Computer architecture: a quantitative approach","author":"Hennessy","year":"2017"},{"key":"ref16","article-title":"Arm Neon Technology"},{"key":"ref17","article-title":"Neon \u2014 developer.arm.com"},{"key":"ref18","article-title":"LTO (GNU Compiler Collection (GCC) Internals) \u2014 gcc.gnu.org"},{"key":"ref19","author":"Bai","year":"2023","journal-title":"Qwen technical report"},{"key":"ref20","article-title":"GitHub-ggerganov\/llama.cpp: LLM inference in C\/C++","author":"Gerganov","year":"2023"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6239"},{"key":"ref22","article-title":"A framework for few-shot language model evaluation","author":"Gao","year":"2023"}],"event":{"name":"2025 IEEE 7th International Conference on Artificial Intelligence Circuits and Systems (AICAS)","location":"Bordeaux, France","start":{"date-parts":[[2025,4,28]]},"end":{"date-parts":[[2025,4,30]]}},"container-title":["2025 IEEE 7th International Conference on Artificial Intelligence Circuits and Systems (AICAS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11172731\/11173086\/11173155.pdf?arnumber=11173155","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T12:51:32Z","timestamp":1759236692000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11173155\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,28]]},"references-count":22,"URL":"https:\/\/doi.org\/10.1109\/aicas64808.2025.11173155","relation":{},"subject":[],"published":{"date-parts":[[2025,4,28]]}}}