{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,20]],"date-time":"2026-04-20T21:19:09Z","timestamp":1776719949863,"version":"3.51.2"},"reference-count":39,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Korean Government [Ministry of Science and ICT (MSIT)]","award":["RS-2026-02214322"],"award-info":[{"award-number":["RS-2026-02214322"]}]},{"DOI":"10.13039\/501100010418","name":"Institute of Information and Communications Technology Planning and Evaluation (IITP)-Information Technology Research Center","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100010418","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Korean Government","award":["IITP-2026-RS-2022-00156295"],"award-info":[{"award-number":["IITP-2026-RS-2022-00156295"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2026]]},"DOI":"10.1109\/access.2026.3680126","type":"journal-article","created":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T20:00:05Z","timestamp":1775851205000},"page":"56679-56693","source":"Crossref","is-referenced-by-count":0,"title":["QubitCache: Quantum-Inspired Probabilistic Attention Preservation for KV-Cache Compression"],"prefix":"10.1109","volume":"14","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-7691-0930","authenticated-orcid":false,"given":"Jieui","family":"Kang","sequence":"first","affiliation":[{"name":"Artificial Intelligence Convergence, Ewha Womans University, Seoul, South Korea"}]},{"given":"Jaeyoung","family":"Choi","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Ewha Womans University, Seoul, South Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-3936-2896","authenticated-orcid":false,"given":"Wonhui","family":"Noh","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Ewha Womans University, Seoul, South Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8722-8486","authenticated-orcid":false,"given":"Jaehyeong","family":"Sim","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Ewha Womans University, Seoul, South Korea"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.52202\/068431-1189"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.52202\/075280-1506"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.52202\/075280-2279"},{"key":"ref6","article-title":"SnapKV: LLM knows what you are looking for before generation","author":"Li","year":"2024","journal-title":"arXiv:2404.14469"},{"key":"ref7","article-title":"PyramidKV: Dynamic KV cache compression based on pyramidal information funneling","author":"Cai","year":"2024","journal-title":"arXiv:2406.02069"},{"key":"ref8","first-page":"18506","article-title":"MiniKV: Pushing the limits of LLM inference via 2-bit layer-discriminative KV cache","volume-title":"Proc. Findings Assoc. Comput. Linguistics","author":"Sharma"},{"key":"ref9","article-title":"Compactor: Calibrated query-agnostic KV cache compression with approximate leverage scores","author":"Chari","year":"2025","journal-title":"arXiv:2507.08143"},{"key":"ref10","article-title":"GEAR: An efficient KV cache compression recipe for near-lossless generative inference of LLM","author":"Kang","year":"2024","journal-title":"arXiv:2403.05527"},{"key":"ref11","article-title":"Efficient streaming language models with attention sinks","author":"Xiao","year":"2023","journal-title":"arXiv:2309.17453"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.550"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.coling-main.580"},{"key":"ref14","first-page":"14014","article-title":"Are sixteen heads really better than one?","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Michel"},{"key":"ref15","article-title":"Rethinking attention with performers","author":"Choromanski","year":"2020","journal-title":"arXiv:2009.14794"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1017\/cbo9780511976667"},{"key":"ref17","article-title":"RULER: What\u2019s the real context size of your long-context language models?","volume-title":"Proc. Conf. Empirical Methods Natural Lang. Process.","author":"Hsieh"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.52202\/079017-0040"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3725338"},{"key":"ref20","article-title":"RocketKV: Accelerating long-context LLM inference via two-stage KV cache compression","author":"Behnam","year":"2025","journal-title":"arXiv:2502.14051"},{"key":"ref21","article-title":"KVzip: Query-agnostic KV cache compression with context reconstruction","author":"Kim","year":"2025","journal-title":"arXiv:2505.23416"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.52202\/068431-2126"},{"key":"ref23","article-title":"DeltaKV: Residual-based KV cache compression via long-range similarity","author":"Hao","year":"2026","journal-title":"arXiv:2602.08005"},{"key":"ref24","article-title":"KVCompose: Efficient structured KV cache compression with composite tokens","author":"Akulov","year":"2025","journal-title":"arXiv:2509.05165"},{"key":"ref25","volume-title":"A Wavelet Tour of Signal Processing","author":"Stephane","year":"1999"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1214\/aos\/1176348766"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.21236\/ADA238689"},{"key":"ref28","first-page":"9895","article-title":"Sparse is enough in scaling transformers","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Jaszczur"},{"key":"ref29","first-page":"17283","article-title":"Big bird: Transformers for longer sequences","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Zaheer"},{"key":"ref30","article-title":"Mistral 7B","author":"Jiang","year":"2023","journal-title":"arXiv:2310.06825"},{"key":"ref31","article-title":"DySCO: Dynamically scaled computation offloading for efficient long-context LLM inference","author":"Ye","year":"2026","journal-title":"arXiv:2602.22175"},{"key":"ref32","article-title":"LongBench: A bilingual, multitask benchmark for long context understanding","author":"Bai","year":"2023","journal-title":"arXiv:2308.14508"},{"key":"ref33","article-title":"World model on million-length video and language with blockwise RingAttention","author":"Liu","year":"2024","journal-title":"arXiv:2402.08268"},{"key":"ref34","article-title":"How long can context length of open-source LLMs truly promise?","volume-title":"Proc. NeurIPS Workshop Instruct. Tuning Instruct. Following","author":"Li"},{"key":"ref35","article-title":"Mixtral of experts","author":"Jiang","year":"2024","journal-title":"arXiv:2401.04088"},{"key":"ref36","article-title":"Quantum computing with qiskit","author":"Javadi-Abhari","year":"2024","journal-title":"arXiv:2405.08810"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.385"},{"key":"ref38","first-page":"21618","article-title":"Rethinking graph transformers with spectral attention","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Kreuzer"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W19-4828"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6287639\/11323511\/11479587.pdf?arnumber=11479587","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,20]],"date-time":"2026-04-20T20:06:22Z","timestamp":1776715582000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11479587\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"references-count":39,"URL":"https:\/\/doi.org\/10.1109\/access.2026.3680126","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]}}}