{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,26]],"date-time":"2025-11-26T19:14:11Z","timestamp":1764184451463,"version":"3.46.0"},"reference-count":21,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"8","license":[{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2023YFB4403103"],"award-info":[{"award-number":["2023YFB4403103"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Circuits Syst. II"],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1109\/tcsii.2025.3580135","type":"journal-article","created":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T13:44:16Z","timestamp":1750167856000},"page":"1073-1077","source":"Crossref","is-referenced-by-count":0,"title":["KV-Cache Oriented Query-Aware Sparse Attention Accelerator With Cross-Stage Precision-Configurable Digital CIM"],"prefix":"10.1109","volume":"72","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-1455-5749","authenticated-orcid":false,"given":"Yang","family":"Zhang","sequence":"first","affiliation":[{"name":"School of Integrated Circuits, Southeast University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-3853-2398","authenticated-orcid":false,"given":"Xilong","family":"Kang","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, Southeast University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-4835-5720","authenticated-orcid":false,"given":"Weixuan","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, Southeast University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yizhi","family":"Ding","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, Southeast University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lizheng","family":"Ren","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, Southeast University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-7903-4623","authenticated-orcid":false,"given":"Yiran","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, Southeast University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-0148-396X","authenticated-orcid":false,"given":"Ruiqi","family":"Tan","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, Southeast University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7821-2252","authenticated-orcid":false,"given":"Zhen","family":"Wang","sequence":"additional","affiliation":[{"name":"Nanjing Prochip Electronic Technology Co. Ltd., Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9794-8049","authenticated-orcid":false,"given":"Hao","family":"Cai","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, Southeast University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0894-1054","authenticated-orcid":false,"given":"Bo","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, Southeast University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref2","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","volume":"33","author":"Brown"},{"key":"ref3","article-title":"LLAMA 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023","journal-title":"arXiv:2307.09288"},{"key":"ref4","first-page":"606","article-title":"Efficiently scaling transformer inference","volume-title":"Proc. Mach. Learn. Syst.","volume":"5","author":"Pope"},{"key":"ref5","first-page":"16344","article-title":"FlashAttention: Fast and memory-efficient exact attention with IO-awareness","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","volume":"35","author":"Dao"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/DAC56929.2023.10247993"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3655936"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/dac56929.2023.10247678"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589057"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC49657.2024.10454308"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/isscc42615.2023.10067842"},{"key":"ref13","article-title":"Quest: Query-aware sparsity for efficient long-context LLM inference","author":"Tang","year":"2024","journal-title":"arXiv:2406.10774"},{"key":"ref14","article-title":"MICSim: A modular simulator for mixed-signal compute-in-memory based AI accelerator","author":"Wang","year":"2024","journal-title":"arXiv:2409.14838"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TCSI.2024.3434706"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/DAC.2018.8465702"},{"key":"ref17","first-page":"10323","article-title":"SparseGPT: Massive language models can be accurately pruned in one-shot","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Frantar"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i18.30049"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TCSI.2023.3315060"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC42615.2023.10067360"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3655690"}],"container-title":["IEEE Transactions on Circuits and Systems II: Express Briefs"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/8920\/11104806\/11037450.pdf?arnumber=11037450","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,26]],"date-time":"2025-11-26T19:07:18Z","timestamp":1764184038000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11037450\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8]]},"references-count":21,"journal-issue":{"issue":"8"},"URL":"https:\/\/doi.org\/10.1109\/tcsii.2025.3580135","relation":{},"ISSN":["1549-7747","1558-3791"],"issn-type":[{"type":"print","value":"1549-7747"},{"type":"electronic","value":"1558-3791"}],"subject":[],"published":{"date-parts":[[2025,8]]}}}