{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T17:53:31Z","timestamp":1775066011049,"version":"3.50.1"},"reference-count":30,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,7,6]],"date-time":"2025-07-06T00:00:00Z","timestamp":1751760000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,7,6]],"date-time":"2025-07-06T00:00:00Z","timestamp":1751760000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100004830","name":"Siemens","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004830","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100019906","name":"Infineon Technologies","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100019906","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,7,6]]},"DOI":"10.1109\/isvlsi65124.2025.11130263","type":"proceedings-article","created":{"date-parts":[[2025,8,27]],"date-time":"2025-08-27T18:20:15Z","timestamp":1756318815000},"page":"1-6","source":"Crossref","is-referenced-by-count":2,"title":["Low-Cost FlashAttention with Fused Exponential and Multiplication Hardware Operators"],"prefix":"10.1109","author":[{"given":"Kosmas","family":"Alexandridis","sequence":"first","affiliation":[{"name":"Democritus University of Thrace,Integrated Circuits Lab Electrical and Computer Engineering,Xanthi,Greece"}]},{"given":"Vasileios","family":"Titopoulos","sequence":"additional","affiliation":[{"name":"Democritus University of Thrace,Integrated Circuits Lab Electrical and Computer Engineering,Xanthi,Greece"}]},{"given":"Giorgos","family":"Dimitrakopoulos","sequence":"additional","affiliation":[{"name":"Democritus University of Thrace,Integrated Circuits Lab Electrical and Computer Engineering,Xanthi,Greece"}]}],"member":"263","reference":[{"issue":"140","key":"ref1","first-page":"1","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel","year":"2020","journal-title":"Journal of Machine Learning Research"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2023\/759"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref4","article-title":"Longformer: The longdocument transformer","author":"Beltagy","year":"2020","journal-title":"arXiv preprint arXiv:2004.05150"},{"key":"ref5","article-title":"Generating long sequences with sparse transformers","author":"Child","year":"2019","journal-title":"arXiv preprint arXiv:1904.10509"},{"key":"ref6","first-page":"5156","article-title":"Transformers are rnns: Fast autoregressive transformers with linear attention","volume-title":"Intern. conference on machine learning","author":"Katharopoulos"},{"key":"ref7","article-title":"Linformer: Self-attention with linear complexity","author":"Wang","year":"2020","journal-title":"arXiv preprint arXiv:2006.04768"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/hpca47549.2020.00035"},{"key":"ref9","first-page":"250","article-title":"Mnnfast: a fast and scalable system architecture for memory-augmented neural networks","volume-title":"Intern. Symp. on Computer Architecture (ISCA)","volume":"2019","author":"Jang"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2024.3434447"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18074.2021.9586134"},{"key":"ref12","first-page":"16344","article-title":"Flashattention: Fast and memory-efficient exact attention with IO-awareness","volume":"35","author":"Dao","year":"2022","journal-title":"Advances in neural information processing systems"},{"key":"ref13","article-title":"Flashattention-2: Faster attention with better parallelism and work partitioning","author":"Dao","year":"2023","journal-title":"arXiv preprint arXiv:2307.08691"},{"key":"ref14","article-title":"Self-attention does not need Oleft(n^2right) memory","author":"Rabe","year":"2021","journal-title":"arXiv preprint arXiv:2112.05682"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-5446"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2023.3234893"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/socc49529.2020.9524802"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2023.3282046"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00060"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3655982"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN54540.2023.10191521"},{"key":"ref23","article-title":"Online normalizer calculation for softmax","author":"Milakov","year":"2018","journal-title":"arXiv preprint arXiv:1805.02867"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2024.3488966"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/iscas46773.2023.10181465"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD57390.2023.10323725"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/3665314.3670843"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ARITH.2017.38"},{"key":"ref29","article-title":"A study of bfloat16 for deep learning training","author":"Kalamkar","year":"2019","journal-title":"arXiv preprint arXiv:1905.12322"},{"key":"ref30","article-title":"Promptbench: A unified library for evaluation of large language models","author":"Zhu","year":"2023","journal-title":"arXiv preprint arXiv:2312.07910"},{"key":"ref31","first-page":"5506","article-title":"I-bert: Integer-only bert quantization","volume-title":"International conference on machine learning. PMLR","author":"Kim"}],"event":{"name":"2025 IEEE Computer Society Annual Symposium on VLSI (ISVLSI)","location":"Kalamata, Greece","start":{"date-parts":[[2025,7,6]]},"end":{"date-parts":[[2025,7,9]]}},"container-title":["2025 IEEE Computer Society Annual Symposium on VLSI (ISVLSI)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11129697\/11130193\/11130263.pdf?arnumber=11130263","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,28]],"date-time":"2025-08-28T04:32:00Z","timestamp":1756355520000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11130263\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,6]]},"references-count":30,"URL":"https:\/\/doi.org\/10.1109\/isvlsi65124.2025.11130263","relation":{},"subject":[],"published":{"date-parts":[[2025,7,6]]}}}