{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,23]],"date-time":"2025-05-23T04:05:31Z","timestamp":1747973131227,"version":"3.41.0"},"reference-count":23,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T00:00:00Z","timestamp":1743379200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T00:00:00Z","timestamp":1743379200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100003347","name":"Fudan University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003347","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,3,31]]},"DOI":"10.23919\/date64628.2025.10992963","type":"proceedings-article","created":{"date-parts":[[2025,5,21]],"date-time":"2025-05-21T17:36:35Z","timestamp":1747848995000},"page":"1-7","source":"Crossref","is-referenced-by-count":0,"title":["AttentionLib: A Scalable Optimization Framework for Automated Attention Acceleration on FPGA"],"prefix":"10.23919","author":[{"given":"Zhenyu","family":"Liu","sequence":"first","affiliation":[{"name":"Fudan University,State Key Lab of Integrated Chips &#x0026; Systems,Shanghai,China"}]},{"given":"Xilang","family":"Zhou","sequence":"additional","affiliation":[{"name":"Fudan University,State Key Lab of Integrated Chips &#x0026; Systems,Shanghai,China"}]},{"given":"Faxian","family":"Sun","sequence":"additional","affiliation":[{"name":"Fudan University,State Key Lab of Integrated Chips &#x0026; Systems,Shanghai,China"}]},{"given":"Jianli","family":"Chen","sequence":"additional","affiliation":[{"name":"Fudan University,State Key Lab of Integrated Chips &#x0026; Systems,Shanghai,China"}]},{"given":"Jun","family":"Yu","sequence":"additional","affiliation":[{"name":"Fudan University,State Key Lab of Integrated Chips &#x0026; Systems,Shanghai,China"}]},{"given":"Kun","family":"Wang","sequence":"additional","affiliation":[{"name":"Fudan University,State Key Lab of Integrated Chips &#x0026; Systems,Shanghai,China"}]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.1145\/2749469.2750389"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1145\/3007787.3001177"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1145\/3079856.3080246"},{"doi-asserted-by":"publisher","key":"ref4","DOI":"10.48550\/ARXIV.1706.03762"},{"year":"2019","author":"Devlin","journal-title":"BERT: Pre-Training of Deep Bidirectional Transformers for Language Understanding","key":"ref5"},{"year":"2020","author":"Brown","journal-title":"Language Models Are Few-Shot Learners","key":"ref6"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.1109\/MM.2020.2985963"},{"year":"2021","author":"Dosovitskiy","journal-title":"An Image Is Worth 16x16 Words: Transformers for Image Recognition at Scale","key":"ref8"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1109\/DAC18074.2021.9586329"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.1109\/CGO51591.2021.9370308"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.1109\/ISCA52012.2021.00062"},{"key":"ref12","doi-asserted-by":"crossref","DOI":"10.1145\/3508352.3549424","article-title":"An MLIR-Based Compiler Flow for System-Level Design and Hardware Acceleration","volume-title":"Proceedings of the 41st IEEE\/ACM International Conference on Computer-Aided Design","author":"Bohm Agostini","year":"2022"},{"year":"2022","author":"Dao","journal-title":"FlashAttention: Fast and Memory-Efficient Exact Attention with IO- Awareness","key":"ref13"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.1109\/HPCA53966.2022.00060"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.1109\/TKDE.2021.3126456"},{"doi-asserted-by":"publisher","key":"ref16","DOI":"10.1145\/3575693.3575747"},{"year":"2023","author":"Touvron","journal-title":"LLaMA: Open and Efficient Foundation Language Models","key":"ref17"},{"key":"ref18","doi-asserted-by":"crossref","DOI":"10.1145\/3613424.3623792","article-title":"TileFlow: A Framework for Modeling Fusion Dataflow via Tree-Based Analysis","volume-title":"56th Annual IEEE\/ACM International Symposium on Microarchitecture","author":"Zheng","year":"2023"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.1145\/3676536.3676749"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.1109\/ISCA59077.2024.00019"},{"doi-asserted-by":"publisher","key":"ref21","DOI":"10.1145\/3617232.3624850"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.1109\/HPCA57654.2024.00017"},{"year":"2024","author":"Zhong","journal-title":"DistServe: Disaggregating Prefill and Decoding for Goodput-Optimized Large Language Model Serving","key":"ref23"}],"event":{"name":"2025 Design, Automation &amp; Test in Europe Conference (DATE)","start":{"date-parts":[[2025,3,31]]},"location":"Lyon, France","end":{"date-parts":[[2025,4,2]]}},"container-title":["2025 Design, Automation &amp;amp; Test in Europe Conference (DATE)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10992638\/10992588\/10992963.pdf?arnumber=10992963","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,22]],"date-time":"2025-05-22T06:07:29Z","timestamp":1747894049000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10992963\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,31]]},"references-count":23,"URL":"https:\/\/doi.org\/10.23919\/date64628.2025.10992963","relation":{},"subject":[],"published":{"date-parts":[[2025,3,31]]}}}