{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T01:22:40Z","timestamp":1760059360640,"version":"build-2065373602"},"reference-count":19,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,4,22]],"date-time":"2024-04-22T00:00:00Z","timestamp":1713744000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,4,22]],"date-time":"2024-04-22T00:00:00Z","timestamp":1713744000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,4,22]]},"DOI":"10.1109\/aicas59952.2024.10595886","type":"proceedings-article","created":{"date-parts":[[2024,7,19]],"date-time":"2024-07-19T13:30:48Z","timestamp":1721395848000},"page":"1-5","source":"Crossref","is-referenced-by-count":0,"title":["AICAS Grand Challenge 2024: Software and Hardware Co-optimization for General Large Language Model Inference on CPU"],"prefix":"10.1109","author":[{"given":"Junfeng","family":"Tan","sequence":"first","affiliation":[{"name":"Xidian University,Hangzhou Institute of Technology,Hangzhou,China"}]},{"given":"Guosheng","family":"Yu","sequence":"additional","affiliation":[{"name":"T-HEAD Semiconductor Co., Ltd,China"}]},{"given":"Jianing","family":"Li","sequence":"additional","affiliation":[{"name":"Nanjing University,School of Electronic Science and Engineering,China"}]},{"given":"Xiaohan","family":"Ma","sequence":"additional","affiliation":[{"name":"T-HEAD Semiconductor Co., Ltd,China"}]},{"given":"Fang","family":"Bao","sequence":"additional","affiliation":[{"name":"Arm Technology (China) Co., Ltd."}]},{"given":"Evens","family":"Pan","sequence":"additional","affiliation":[{"name":"Arm Technology (China) Co., Ltd."}]},{"given":"David","family":"Bian","sequence":"additional","affiliation":[{"name":"Arm Technology (China) Co., Ltd."}]},{"given":"Yongfu","family":"Li","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University,Department of Micro-Nano Electronics,China"}]},{"given":"Yuan","family":"Du","sequence":"additional","affiliation":[{"name":"Nanjing University,School of Electronic Science and Engineering,China"}]},{"given":"Li","family":"Du","sequence":"additional","affiliation":[{"name":"Nanjing University,School of Electronic Science and Engineering,China"}]},{"given":"Bo","family":"Li","sequence":"additional","affiliation":[{"name":"Xidian University,Hangzhou Institute of Technology,Hangzhou,China"}]},{"given":"Wei","family":"Mao","sequence":"additional","affiliation":[{"name":"Xidian University,Hangzhou Institute of Technology,Hangzhou,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00286"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3453483.3454083"},{"key":"ref3","article-title":"Large scale distributed deep networks","volume-title":"Advances in neural information processing systems","volume":"25","author":"Dean"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/BioCAS54905.2022.9948551"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/BioCAS58349.2023.10388719"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/AICAS57966.2023.10168648"},{"article-title":"Qwen technical report","year":"2023","author":"Bai","key":"ref7"},{"key":"ref8","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Advances in Neural Information Processing Systems","author":"Brown"},{"article-title":"Rlaif: Scaling reinforcement learning from human feedback with ai feedback","year":"2023","author":"Lee","key":"ref9"},{"article-title":"Think you have solved question answering? try arc, the ai2 reasoning challenge","year":"2018","author":"Clark","key":"ref10"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1472"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6239"},{"article-title":"OPTQ: Accurate quantization for generative pre-trained transformers","volume-title":"The Eleventh International Conference on Learning Representations","author":"Frantar","key":"ref13"},{"article-title":"AWQ: Activation-aware weight quantization for llm compression and acceleration","volume-title":"The Seventh Annual Conference on Machine Learning and Systems","author":"Lin","key":"ref14"},{"article-title":"A simple and effective pruning approach for large language models","year":"2023","author":"Sun","key":"ref15"},{"key":"ref16","first-page":"21702","article-title":"Llm-pruner: On the structural pruning of large language models","volume-title":"Advances in neural information processing systems","volume":"36","author":"Ma"},{"article-title":"Efficient streaming language models with attention sinks","volume-title":"The Twelfth International Conference on Learning Representations","author":"Xiao","key":"ref17"},{"article-title":"LoRA: Low-rank adaptation of large language models","volume-title":"International Conference on Learning Representations","author":"Hu","key":"ref18"},{"key":"ref19","article-title":"Qlora: Efficient finetuning of quantized llms","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Dettmers"}],"event":{"name":"2024 IEEE 6th International Conference on AI Circuits and Systems (AICAS)","start":{"date-parts":[[2024,4,22]]},"location":"Abu Dhabi, United Arab Emirates","end":{"date-parts":[[2024,4,25]]}},"container-title":["2024 IEEE 6th International Conference on AI Circuits and Systems (AICAS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10595550\/10595552\/10595886.pdf?arnumber=10595886","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,9]],"date-time":"2025-10-09T17:48:38Z","timestamp":1760032118000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10595886\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,22]]},"references-count":19,"URL":"https:\/\/doi.org\/10.1109\/aicas59952.2024.10595886","relation":{},"subject":[],"published":{"date-parts":[[2024,4,22]]}}}