{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T16:00:35Z","timestamp":1759334435040,"version":"build-2065373602"},"reference-count":15,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,4,28]],"date-time":"2025-04-28T00:00:00Z","timestamp":1745798400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,4,28]],"date-time":"2025-04-28T00:00:00Z","timestamp":1745798400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100006190","name":"Research and Development","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006190","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100005320","name":"Xidian University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100005320","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,4,28]]},"DOI":"10.1109\/aicas64808.2025.11173152","type":"proceedings-article","created":{"date-parts":[[2025,9,25]],"date-time":"2025-09-25T17:52:35Z","timestamp":1758822755000},"page":"1-5","source":"Crossref","is-referenced-by-count":0,"title":["Hardware-Aware Optimization of Large Language Models for Enhanced Throughput on Arm CPUs"],"prefix":"10.1109","author":[{"given":"Xingyu","family":"Zhu","sequence":"first","affiliation":[{"name":"Xidian University,Hangzhou Institute of Technology,Hangzhou,China"}]},{"given":"Longhao","family":"Chen","sequence":"additional","affiliation":[{"name":"Hangzhou Dianzi University,Zhuoyue Honors College,Hangzhou,China"}]},{"given":"Cheng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Xidian University,Hangzhou Institute of Technology,Hangzhou,China"}]},{"given":"Pengcheng","family":"Yang","sequence":"additional","affiliation":[{"name":"Xidian University,Hangzhou Institute of Technology,Hangzhou,China"}]},{"given":"Tingjie","family":"Yang","sequence":"additional","affiliation":[{"name":"Xidian University,Hangzhou Institute of Technology,Hangzhou,China"}]},{"given":"Guosheng","family":"Yu","sequence":"additional","affiliation":[{"name":"T-HEAD Semiconductor Co., Ltd,China"}]},{"given":"Evens","family":"Pan","sequence":"additional","affiliation":[{"name":"Arm Technology (China) Co., Ltd,China"}]},{"given":"Xiguang","family":"Wu","sequence":"additional","affiliation":[{"name":"Xidian University,Hangzhou Institute of Technology,Hangzhou,China"}]},{"given":"Bo","family":"Li","sequence":"additional","affiliation":[{"name":"Xidian University,Hangzhou Institute of Technology,Hangzhou,China"}]},{"given":"Wei","family":"Mao","sequence":"additional","affiliation":[{"name":"Xidian University,Hangzhou Institute of Technology,Hangzhou,China"}]},{"given":"Genquan","family":"Han","sequence":"additional","affiliation":[{"name":"Xidian University,Hangzhou Institute of Technology,Hangzhou,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161317"},{"article-title":"Qwen technical report","year":"2023","author":"Bai","key":"ref3"},{"article-title":"OPTQ: Accurate quantization for generative pre-trained transformers","volume-title":"The International Conference on Learning Representations (ICLR)","author":"Frantar","key":"ref4"},{"key":"ref5","first-page":"87","article-title":"Awq: Activation-aware weight quantization for on-device llm compression and acceleration","volume-title":"Proceedings of Machine Learning and Systems (MLSys)","volume":"6","author":"Lin"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2022.3210069"},{"article-title":"Flashattention: fast and memory-efficient exact attention with io-awareness","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","author":"Dao","key":"ref7"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS54959.2023.00042"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"article-title":"SuperCLUE Total Leaderboard (August 2024)","year":"2024","author":"AI","key":"ref10"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00060"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/JETCAS.2022.3212314"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2020.2976475"},{"article-title":"SliceGPT: Compress large language models by deleting rows and columns","volume-title":"The International Conference on Learning Representations (ICLR)","author":"Ashkboos","key":"ref14"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6239"}],"event":{"name":"2025 IEEE 7th International Conference on Artificial Intelligence Circuits and Systems (AICAS)","start":{"date-parts":[[2025,4,28]]},"location":"Bordeaux, France","end":{"date-parts":[[2025,4,30]]}},"container-title":["2025 IEEE 7th International Conference on Artificial Intelligence Circuits and Systems (AICAS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11172731\/11173086\/11173152.pdf?arnumber=11173152","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T15:56:26Z","timestamp":1759247786000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11173152\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,28]]},"references-count":15,"URL":"https:\/\/doi.org\/10.1109\/aicas64808.2025.11173152","relation":{},"subject":[],"published":{"date-parts":[[2025,4,28]]}}}