{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T11:09:24Z","timestamp":1771672164726,"version":"3.50.1"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T00:00:00Z","timestamp":1771632000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T00:00:00Z","timestamp":1771632000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Codasip"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"DOI":"10.1007\/s11227-026-08322-x","type":"journal-article","created":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T10:38:49Z","timestamp":1771670329000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Vectorized FlashAttention with low-cost exponential computation in RISC-V vector processors"],"prefix":"10.1007","volume":"82","author":[{"given":"Vasileios","family":"Titopoulos","sequence":"first","affiliation":[]},{"given":"Kosmas","family":"Alexandridis","sequence":"additional","affiliation":[]},{"given":"Giorgos","family":"Dimitrakopoulos","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,2,21]]},"reference":[{"issue":"140","key":"8322_CR1","first-page":"1","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel C, Shazeer N, Roberts A, Lee K, Narang S, Matena M, Zhou Y, Li W, Liu PJ (2020) Exploring the limits of transfer learning with a unified text-to-text transformer. J Mach Learn Res 21(140):1\u201367","journal-title":"J Mach Learn Res"},{"key":"8322_CR2","doi-asserted-by":"crossref","unstructured":"Wen Q, Zhou T, Zhang C, Chen W, Ma Z, Yan J, Sun L (2022) Transformers in time series: a survey. arXiv preprint arXiv:2202.07125","DOI":"10.24963\/ijcai.2023\/759"},{"key":"8322_CR3","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser Lu, Polosukhin I (2017) Attention is all you need. In: International Conferences on Neural Information Processing Systems (NIPS), pp 6000\u20136010"},{"key":"8322_CR4","unstructured":"Beltagy I, Peters ME, Cohan A (2020) Longformer: the long-document transformer. arXiv preprint arXiv:2004.05150"},{"key":"8322_CR5","unstructured":"Child R, Gray S, Radford A, Sutskever I (2019) Generating long sequences with sparse transformers. arXiv preprint arXiv:1904.10509"},{"key":"8322_CR6","unstructured":"Katharopoulos A, Vyas A, Pappas N, Fleuret F (2020) Transformers are rnns: fast autoregressive transformers with linear attention. In: International Conference on Machine Learning, pp 5156\u20135165. PMLR"},{"key":"8322_CR7","unstructured":"Wang S, Li BZ, Khabsa M, Fang H, Ma H (2020) Linformer: self-attention with linear complexity. arXiv preprint arXiv:2006.04768"},{"key":"8322_CR8","doi-asserted-by":"crossref","unstructured":"Ham TJ, Jung SJ, Kim S, Oh YH, Park Y, Song Y, Park J-H, Lee S, Park K, Lee JW et al (2020) A3: Accelerating attention mechanisms in neural networks with approximation. In: IEEE International Symposium on High-Performance Computer Architecture (HPCA), pp 328\u2013341","DOI":"10.1109\/HPCA47549.2020.00035"},{"key":"8322_CR9","doi-asserted-by":"crossref","unstructured":"Jang H, Kim J, Jo J-E, Lee J, Kim J (2019) Mnnfast: a fast and scalable system architecture for memory-augmented neural networks. In: International Symposium on Computer Architecture (ISCA), pp 250\u2013263","DOI":"10.1145\/3307650.3322214"},{"key":"8322_CR10","doi-asserted-by":"crossref","unstructured":"Alexandridis K, Titopoulos V, Dimitrakopoulos G (2025) Low-cost flashattention with fused exponential and multiplication hardware operators. arXiv preprint arXiv:2505.14314","DOI":"10.1109\/ISVLSI65124.2025.11130263"},{"key":"8322_CR11","doi-asserted-by":"crossref","unstructured":"Wang R, Islamoglu G, Belano A, Potocnik V, Conti F, Garofalo A, Benini L (2025) Vexp: A low-cost risc-v isa extension for accelerated softmax computation in transformers. arXiv preprint arXiv:2504.11227","DOI":"10.1109\/ARITH64983.2025.00016"},{"issue":"2","key":"8322_CR12","doi-asserted-by":"publisher","first-page":"723","DOI":"10.1109\/TCAD.2024.3434447","volume":"44","author":"Z Wang","year":"2025","unstructured":"Wang Z, Wang G, He G (2025) COSA plus Enhanced co-operative systolic arrays for attention mechanism in transformers. IEEE Trans Comput-Aided Des Integrated Circuits Syst 44(2):723\u2013736","journal-title":"IEEE Trans Comput-Aided Des Integrated Circuits Syst"},{"key":"8322_CR13","doi-asserted-by":"crossref","unstructured":"Stevens JR et al (2021) Softermax: Hardware\/software co-design of an efficient softmax for transformers. In: ACM\/IEEE Design Automation Conference (DAC), pp 469\u2013474","DOI":"10.1109\/DAC18074.2021.9586134"},{"key":"8322_CR14","first-page":"16344","volume":"35","author":"T Dao","year":"2022","unstructured":"Dao T, Fu D, Ermon S, Rudra A, R\u00e9 C (2022) Flashattention: fast and memory-efficient exact attention with IO-awareness. Adv Neural Inf Process Syst 35:16344\u201316359","journal-title":"Adv Neural Inf Process Syst"},{"key":"8322_CR15","unstructured":"Dao T (2023) Flashattention-2: faster attention with better parallelism and work partitioning. arXiv preprint arXiv:2307.08691"},{"key":"8322_CR16","unstructured":"Rabe MN, Staats C (2021) Self-attention does not need $$O(n^2)$$ memory. arXiv preprint arXiv:2112.05682"},{"key":"8322_CR17","doi-asserted-by":"crossref","unstructured":"Binkert N et al (2011) The gem5 simulator. SIGARCH Comput Archit News 39(2)","DOI":"10.1145\/2024716.2024718"},{"key":"8322_CR18","unstructured":"Lowe-Power J, Ahmad AM, Akram A, Alian M, Amslinger R, Andreozzi M, Armejach A, Asmussen N, Beckmann B, Bharadwaj S et al (2020) The gem5 simulator: Version 20.0+. arXiv preprint arXiv:2007.03152"},{"key":"8322_CR19","doi-asserted-by":"crossref","unstructured":"Ham TJ, Lee Y, Seo SH, Kim S, Choi H, Jung SJ, Lee JW (2021) ELSA: Hardware-software co-design for efficient, lightweight self-attention mechanism in neural networks. In: International Symposium on Computer Architecture (ISCA), pp. 692\u2013705","DOI":"10.1109\/ISCA52012.2021.00060"},{"key":"8322_CR20","unstructured":"Milakov M, Gimelshein N (2018) Online normalizer calculation for softmax. arXiv preprint arXiv:1805.02867"},{"key":"8322_CR21","first-page":"8828","volume":"36","author":"H Liu","year":"2023","unstructured":"Liu H, Abbeel P (2023) Blockwise parallel transformers for large context models. Adv Neural Inf Process Syst 36:8828\u20138844","journal-title":"Adv Neural Inf Process Syst"},{"key":"8322_CR22","first-page":"68658","volume":"37","author":"J Shah","year":"2024","unstructured":"Shah J, Bikshandi G, Zhang Y, Thakkar V, Ramani P, Dao T (2024) Flashattention-3: fast and accurate attention with asynchrony and low-precision. Adv Neural Inf Process Syst 37:68658\u201368685","journal-title":"Adv Neural Inf Process Syst"},{"key":"8322_CR23","doi-asserted-by":"crossref","unstructured":"Srivastava N, Jin H, Liu J, Albonesi D, Zhang Z (2020) Matraptor: A sparse-sparse matrix multiplication accelerator based on row-wise product. In: IEEE International Symposium on Microarchitecture, pp 766\u2013780","DOI":"10.1109\/MICRO50266.2020.00068"},{"issue":"4","key":"8322_CR24","doi-asserted-by":"publisher","first-page":"80","DOI":"10.1109\/38.595279","volume":"17","author":"JF Blinn","year":"1997","unstructured":"Blinn JF (1997) Floating-point tricks. IEEE Comput Graphics Appl 17(4):80\u201384","journal-title":"IEEE Comput Graphics Appl"},{"key":"8322_CR25","doi-asserted-by":"crossref","unstructured":"Koenig J et al (2017) A hardware accelerator for computing an exact dot product. In: IEEE Symposium on Computer Arithmetic (ARITH), pp 114\u2013121","DOI":"10.1109\/ARITH.2017.38"},{"key":"8322_CR26","doi-asserted-by":"crossref","unstructured":"Ta T, Al-Hawaj K, Cebry N, Ou Y, Hall E, Golden C, Batten C (2022) big.VLITTLE: On-demand data-parallel acceleration for mobile systems on chip. In: IEEE International Symposium on Microarch (MICRO), pp 181\u2013198","DOI":"10.1109\/MICRO56248.2022.00025"},{"key":"8322_CR27","doi-asserted-by":"crossref","unstructured":"Wolf T et al (2020) Transformers: State-of-the-art natural language processing. In: Conference on Empirical Methods in Natural Language Processing: System Demonstrations, pp 38\u201345","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"issue":"4","key":"8322_CR28","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3422667","volume":"17","author":"C Ram\u00edrez","year":"2020","unstructured":"Ram\u00edrez C, Hern\u00e1ndez CA, Palomar O, Unsal O, Ram\u00edrez MA, Cristal A (2020) A risc-v simulator and benchmark suite for designing and evaluating vector architectures. ACM Trans Architect Code Optim 17(4):1\u201330","journal-title":"ACM Trans Architect Code Optim"},{"key":"8322_CR29","doi-asserted-by":"crossref","unstructured":"Bavier E, Knight N, Lassus Saint-Geni\u00e8s H, Love E (2023) Vectorized nonlinear functions with the risc-v vector extension. In: IEEE Symposium on Computer Arithmetic (ARITH), pp 127\u2013130","DOI":"10.1109\/ARITH58626.2023.00032"},{"key":"8322_CR30","doi-asserted-by":"crossref","unstructured":"Wang A, Singh A, Michael J, Hill F, Levy O, Bowman SR (2018) Glue: A multi-task benchmark and analysis platform for natural language understanding. arXiv preprint arXiv:1804.07461","DOI":"10.18653\/v1\/W18-5446"},{"key":"8322_CR31","unstructured":"Zhu K et al (2023) Promptbench: A unified library for evaluation of large language models. arXiv preprint arXiv:2312.07910"},{"key":"8322_CR32","unstructured":"Kim S et al (2021) I-bert: Integer-only bert quantization. In: International Conference on Machine Learning, pp 5506\u20135518. PMLR"},{"key":"8322_CR33","doi-asserted-by":"publisher","first-page":"512","DOI":"10.1109\/TEC.1962.5219391","volume":"4","author":"JN Mitchell","year":"1962","unstructured":"Mitchell JN (1962) Computer multiplication and division using binary logarithms. IRE Trans Electr Comput 4:512\u2013517","journal-title":"IRE Trans Electr Comput"},{"key":"8322_CR34","doi-asserted-by":"crossref","unstructured":"Chen Z, Qu Z, Quan Y, Liu L, Ding Y, Xie Y (2023) Dynamic n:m fine-grained structured sparse attention mechanism. In: ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, pp 369\u2013379","DOI":"10.1145\/3572848.3577500"},{"issue":"4","key":"8322_CR35","doi-asserted-by":"publisher","first-page":"1129","DOI":"10.1109\/JSSC.2023.3234893","volume":"58","author":"B Keller","year":"2023","unstructured":"Keller B, Venkatesan R, Dai S, Tell SG, Zimmer B, Sakr C, Dally WJ, Gray CT, Khailany B (2023) A 95.6-TOPS\/W deep learning inference accelerator with per-vector scaled 4-bit quantization in 5 nm. IEEE J Solid State Circuits 58(4):1129\u20131141","journal-title":"IEEE J Solid State Circuits"},{"key":"8322_CR36","doi-asserted-by":"crossref","unstructured":"Lu S, Wang M, Liang S, Lin J, Wang Z (2020) Hardware accelerator for multi-head attention and position-wise feed-forward in the transformer. In: IEEE International System-on-Chip Conference (SOCC), pp 84\u201389","DOI":"10.1109\/SOCC49529.2020.9524802"},{"issue":"8","key":"8322_CR37","doi-asserted-by":"publisher","first-page":"1223","DOI":"10.1109\/TVLSI.2023.3282046","volume":"31","author":"S Sridharan","year":"2023","unstructured":"Sridharan S, Stevens JR, Roy K, Raghunathan A (2023) X-former: In-memory acceleration of transformers. IEEE Trans Very Large Scale Integr Syst 31(8):1223\u20131233","journal-title":"IEEE Trans Very Large Scale Integr Syst"},{"key":"8322_CR38","doi-asserted-by":"crossref","unstructured":"Koca NA, Do AT, Chang C-H (2023) Hardware-efficient softmax approximation for self-attention networks. In: International Symposium on Circuits and Systems (ISCAS), pp 1\u20135","DOI":"10.1109\/ISCAS46773.2023.10181465"},{"key":"8322_CR39","doi-asserted-by":"crossref","unstructured":"Wang W, Zhou S, Sun W, Sun P, Liu Y (2023) SOLE: hardware-software co-design of softmax and layernorm for efficient transformer inference. In: IEEE\/ACM International Conference on Computer Aided Design (ICCAD), pp 1\u20139","DOI":"10.1109\/ICCAD57390.2023.10323725"},{"key":"8322_CR40","doi-asserted-by":"crossref","unstructured":"Sadeghi ME, Fayyazi A, Azizi S, Pedram M (2024) Peano-vit: Power-efficient approximations of non-linearities in vision transformers. In: ACM\/IEEE International Symposium on Low Power Electronics and Design (ISLPED), pp 1\u20136","DOI":"10.1145\/3665314.3670843"},{"key":"8322_CR41","doi-asserted-by":"crossref","unstructured":"Song Z, Qi C, Yao Y, Zhou P, Zi Y, Wang N, Liang X (2024) TSAcc: An efficient tempo-spatial similarity aware accelerator for attention acceleration. In: ACM\/IEEE Design Automation Conference","DOI":"10.1145\/3649329.3655982"},{"key":"8322_CR42","doi-asserted-by":"crossref","unstructured":"Marchisio A, Dura D, Capra M, Martina M, Masera G, Shafique M (2023) Swifttron: An efficient hardware accelerator for quantized transformers. In: International Joint Conference on Neural Networks (IJCNN), pp 1\u20139","DOI":"10.1109\/IJCNN54540.2023.10191521"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-026-08322-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-026-08322-x","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-026-08322-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T10:38:56Z","timestamp":1771670336000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-026-08322-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,21]]},"references-count":42,"journal-issue":{"issue":"4","published-online":{"date-parts":[[2026,3]]}},"alternative-id":["8322"],"URL":"https:\/\/doi.org\/10.1007\/s11227-026-08322-x","relation":{},"ISSN":["1573-0484"],"issn-type":[{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2,21]]},"assertion":[{"value":"22 July 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 February 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 February 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"189"}}