{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,6]],"date-time":"2026-06-06T22:00:29Z","timestamp":1780783229424,"version":"3.54.1"},"reference-count":36,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Integration"],"published-print":{"date-parts":[[2026,9]]},"DOI":"10.1016\/j.vlsi.2026.102784","type":"journal-article","created":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T06:36:21Z","timestamp":1780641381000},"page":"102784","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["ALISTA: Accelerator using LSH-based maximum inner-product search in transformer attention"],"prefix":"10.1016","volume":"110","author":[{"given":"Shine Parekkadan","family":"Sunny","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Satyajit","family":"Das","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.vlsi.2026.102784_b1","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst. (NeurIPS)"},{"key":"10.1016\/j.vlsi.2026.102784_b2","series-title":"Language models are unsupervised multitask learners","author":"Radford","year":"2019"},{"key":"10.1016\/j.vlsi.2026.102784_b3","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2019","journal-title":"Proc. NAACL-HLT"},{"key":"10.1016\/j.vlsi.2026.102784_b4","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"140","key":"10.1016\/j.vlsi.2026.102784_b5","first-page":"1","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.vlsi.2026.102784_b6","series-title":"On the opportunities and risks of foundation models","author":"Bommasani","year":"2021"},{"key":"10.1016\/j.vlsi.2026.102784_b7","series-title":"Linformer: Self-attention with linear complexity","author":"Wang","year":"2020"},{"key":"10.1016\/j.vlsi.2026.102784_b8","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v35i16.17664","article-title":"Nystr\u00f6mformer: A Nystr\u00f6m-based algorithm for approximating self-attention","author":"Xiong","year":"2021","journal-title":"Proc. AAAI"},{"key":"10.1016\/j.vlsi.2026.102784_b9","article-title":"FNet: Mixing tokens with Fourier transforms","author":"Lee-Thorp","year":"2022","journal-title":"Proc. NAACL-HLT"},{"key":"10.1016\/j.vlsi.2026.102784_b10","series-title":"SPECTRE: An FFT-based efficient drop-in replacement to self-attention for long contexts","author":"Fein-Ashley","year":"2025"},{"key":"10.1016\/j.vlsi.2026.102784_b11","article-title":"Big bird: Transformers for longer sequences","volume":"33","author":"Zaheer","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst. (NeurIPS)"},{"key":"10.1016\/j.vlsi.2026.102784_b12","series-title":"Longformer: The long-document transformer","author":"Beltagy","year":"2020"},{"key":"10.1016\/j.vlsi.2026.102784_b13","article-title":"Reformer: The efficient transformer","author":"Kitaev","year":"2020","journal-title":"Int. Conf. Learn. Represent. (ICLR)"},{"key":"10.1016\/j.vlsi.2026.102784_b14","article-title":"Long range arena: A benchmark for efficient transformers","author":"Tay","year":"2021","journal-title":"Int. Conf. Learn. Represent. (ICLR)"},{"key":"10.1016\/j.vlsi.2026.102784_b15","article-title":"Are sixteen heads really better than one?","volume":"32","author":"Michel","year":"2019","journal-title":"Adv. Neural Inf. Process. Syst. (NeurIPS)"},{"key":"10.1016\/j.vlsi.2026.102784_b16","article-title":"FAMOUS: Flexible accelerator for multi-head self-attention","author":"Zhang","year":"2022","journal-title":"IEEE Trans. Comput.-Aided Des. Integr. Circuits Syst."},{"key":"10.1016\/j.vlsi.2026.102784_b17","unstructured":"X. Zhang, Y. Liu, Y. Chen, A3: Accelerating Attention Mechanisms with Approximation, in: Proceedings of the 54th Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO, 2021."},{"issue":"12","key":"10.1016\/j.vlsi.2026.102784_b18","doi-asserted-by":"crossref","first-page":"2295","DOI":"10.1109\/JPROC.2017.2761740","article-title":"Efficient processing of deep neural networks: A tutorial and survey","volume":"105","author":"Sze","year":"2017","journal-title":"Proc. IEEE"},{"key":"10.1016\/j.vlsi.2026.102784_b19","series-title":"Proceedings of the 29th International Conference on Architectural Support for Programming Languages and Operating Systems","article-title":"JUNO: Optimizing high-dimensional approximate nearest neighbour search with sparsity-aware algorithm and ray-tracing core mapping","author":"Liu","year":"2024"},{"key":"10.1016\/j.vlsi.2026.102784_b20","series-title":"Proceedings of the 42nd International Conference on Machine Learning","article-title":"HashAttention: Semantic sparsity for faster inference","author":"Desai","year":"2025"},{"issue":"4","key":"10.1016\/j.vlsi.2026.102784_b21","first-page":"1","article-title":"JUNO++: Optimizing ANNS and enabling efficient sparse attention in LLM via ray tracing core","volume":"22","author":"Liu","year":"2025","journal-title":"ACM Trans. Archit. Code Optim."},{"key":"10.1016\/j.vlsi.2026.102784_b22","article-title":"H2O: Heavy-hitter oracle for efficient generative inference of large language models","volume":"36","author":"Zhang","year":"2023","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.vlsi.2026.102784_b23","series-title":"Efficient streaming language models with attention sinks","author":"Xiao","year":"2023"},{"key":"10.1016\/j.vlsi.2026.102784_b24","doi-asserted-by":"crossref","unstructured":"T.J. Ham, et al., ELSA: Hardware-Software Co-design for Efficient, Lightweight Self-Attention, in: International Symposium on Computer Architecture, ISCA, 2021.","DOI":"10.1109\/ISCA52012.2021.00060"},{"key":"10.1016\/j.vlsi.2026.102784_b25","doi-asserted-by":"crossref","unstructured":"J. Dass, S. Wu, H. Shi, C. Li, Z. Ye, Z. Wang, Y. Lin, ViTALiTy: Unifying Low-rank and Sparse Approximation for Vision Transformer Acceleration with a Linear Taylor Attention, in: IEEE International Symposium on High Performance Computer Architecture, HPCA, 2023.","DOI":"10.1109\/HPCA56546.2023.10071081"},{"key":"10.1016\/j.vlsi.2026.102784_b26","article-title":"DOTA: Detect and omit weak attentions for scalable transformer acceleration","author":"of DOTA","year":"2022","journal-title":"IEEE Trans. Comput."},{"key":"10.1016\/j.vlsi.2026.102784_b27","doi-asserted-by":"crossref","unstructured":"P. Indyk, R. Motwani, Approximate Nearest Neighbors: Towards Removing the Curse of Dimensionality, in: Proceedings of the Thirtieth Annual ACM Symposium on Theory of Computing, STOC, 1998.","DOI":"10.1145\/276698.276876"},{"key":"10.1016\/j.vlsi.2026.102784_b28","doi-asserted-by":"crossref","unstructured":"M. Charikar, Similarity Estimation Techniques from Rounding Algorithms, in: Proceedings of the 34th Annual ACM Symposium on Theory of Computing, 2002.","DOI":"10.1145\/509907.509965"},{"key":"10.1016\/j.vlsi.2026.102784_b29","article-title":"Asymmetric LSH for maximum inner product search","volume":"27","author":"Shrivastava","year":"2014","journal-title":"Adv. Neural Inf. Process. Syst. (NeurIPS)"},{"key":"10.1016\/j.vlsi.2026.102784_b30","article-title":"On the power of asymmetric locality sensitive hashing for maximum inner product search","volume":"28","author":"Neyshabur","year":"2015","journal-title":"Adv. Neural Inf. Process. Syst. (NeurIPS)"},{"key":"10.1016\/j.vlsi.2026.102784_b31","article-title":"Norm-ranging LSH for maximum inner product search","volume":"31","author":"Yan","year":"2018","journal-title":"Adv. Neural Inf. Process. Syst. (NeurIPS)"},{"key":"10.1016\/j.vlsi.2026.102784_b32","series-title":"Predicting attention sparsity in transformers","author":"Treviso","year":"2022"},{"key":"10.1016\/j.vlsi.2026.102784_b33","series-title":"Sparsity patterns in transformer attention","author":"Farina","year":"2024"},{"key":"10.1016\/j.vlsi.2026.102784_b34","series-title":"Regularizing attention via dominant interaction selection","author":"Gandhi","year":"2024"},{"issue":"1","key":"10.1016\/j.vlsi.2026.102784_b35","doi-asserted-by":"crossref","first-page":"37","DOI":"10.1109\/MC.1982.1653825","article-title":"Why systolic architectures?","volume":"15","author":"Kung","year":"1982","journal-title":"Computer"},{"issue":"9","key":"10.1016\/j.vlsi.2026.102784_b36","first-page":"1154","article-title":"Hexagonal systolic arrays for matrix multiplication","volume":"41","author":"Hsu","year":"1992","journal-title":"IEEE Trans. Comput."}],"container-title":["Integration"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0167926026001392?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0167926026001392?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,6]],"date-time":"2026-06-06T21:55:25Z","timestamp":1780782925000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0167926026001392"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,9]]},"references-count":36,"alternative-id":["S0167926026001392"],"URL":"https:\/\/doi.org\/10.1016\/j.vlsi.2026.102784","relation":{},"ISSN":["0167-9260"],"issn-type":[{"value":"0167-9260","type":"print"}],"subject":[],"published":{"date-parts":[[2026,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"ALISTA: Accelerator using LSH-based maximum inner-product search in transformer attention","name":"articletitle","label":"Article Title"},{"value":"Integration","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.vlsi.2026.102784","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"102784"}}