{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,28]],"date-time":"2025-10-28T17:52:41Z","timestamp":1761673961547,"version":"build-2065373602"},"reference-count":10,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62488101","62495104","62025404"],"award-info":[{"award-number":["62488101","62495104","62025404"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004739","name":"Youth Innovation Promotion Association of the Chinese Academy of Sciences","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004739","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Comput. Arch. Lett."],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1109\/lca.2025.3618104","type":"journal-article","created":{"date-parts":[[2025,10,7]],"date-time":"2025-10-07T17:37:03Z","timestamp":1759858623000},"page":"321-324","source":"Crossref","is-referenced-by-count":0,"title":["Low-Latency PIM Accelerator for Edge LLM Inference"],"prefix":"10.1109","volume":"24","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-2775-2068","authenticated-orcid":false,"given":"Xinyu","family":"Wang","sequence":"first","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8068-7024","authenticated-orcid":false,"given":"Xiaotian","family":"Sun","sequence":"additional","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3741-535X","authenticated-orcid":false,"given":"Wanqian","family":"Li","sequence":"additional","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0107-0196","authenticated-orcid":false,"given":"Feng","family":"Min","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2165-7151","authenticated-orcid":false,"given":"Xiaoyu","family":"Zhang","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}]},{"given":"Xinjiang","family":"Zhang","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0904-6681","authenticated-orcid":false,"given":"Yinhe","family":"Han","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7337-1844","authenticated-orcid":false,"given":"Xiaoming","family":"Chen","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}]}],"member":"263","reference":[{"article-title":"QServe: W4A8KV4 quantization and system co-design for efficient LLM serving","year":"2024","author":"Lin","key":"ref1"},{"article-title":"QQQ: Quality quattuor-bit quantization for large language models","year":"2024","author":"Zhang","key":"ref2"},{"year":"2025","key":"ref3","article-title":"Qwen2.5 technical report"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/3620666.3651324"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3695053.3731008"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2024.3457676"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TED.2024.3384133"},{"article-title":"Megatron-LM: Training multi-billion parameter language models using model parallelism","year":"2019","author":"Shoeybi","key":"ref8"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/isscc42614.2022.9731694"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-023-41736-2"}],"container-title":["IEEE Computer Architecture Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10208\/11062520\/11194705.pdf?arnumber=11194705","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,28]],"date-time":"2025-10-28T17:32:49Z","timestamp":1761672769000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11194705\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7]]},"references-count":10,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/lca.2025.3618104","relation":{},"ISSN":["1556-6056","1556-6064","2473-2575"],"issn-type":[{"type":"print","value":"1556-6056"},{"type":"electronic","value":"1556-6064"},{"type":"electronic","value":"2473-2575"}],"subject":[],"published":{"date-parts":[[2025,7]]}}}