{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,11]],"date-time":"2026-06-11T05:59:34Z","timestamp":1781157574217,"version":"3.54.1"},"reference-count":4,"publisher":"IEEE","license":[{"start":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T00:00:00Z","timestamp":1778630400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T00:00:00Z","timestamp":1778630400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026,5,13]]},"DOI":"10.1109\/fccm68464.2026.00050","type":"proceedings-article","created":{"date-parts":[[2026,6,10]],"date-time":"2026-06-10T19:59:45Z","timestamp":1781121585000},"page":"262-262","source":"Crossref","is-referenced-by-count":0,"title":["STEEL: Sparsity-Aware Fused Attention for Energy-Efficient Long-Sequence Inference on AMD\u2019s XDNA\u2122 NPU"],"prefix":"10.1109","author":[{"given":"Victor J.B.","family":"Jung","sequence":"first","affiliation":[{"name":"ETH Z&#x00FC;rich,Integrated Systems Laboratory (IIS),Switzerland"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Gagandeep","family":"Singh","sequence":"additional","affiliation":[{"name":"AMD Research and Advanced Development (RAD)"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Joseph","family":"Melber","sequence":"additional","affiliation":[{"name":"AMD Research and Advanced Development (RAD)"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Kristof","family":"Denolf","sequence":"additional","affiliation":[{"name":"AMD Research and Advanced Development (RAD)"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Francesco","family":"Conti","sequence":"additional","affiliation":[{"name":"University of Bologna,Department of Electrical, Electronic and Information Engineering (DEI),Italy"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Luca","family":"Benini","sequence":"additional","affiliation":[{"name":"ETH Z&#x00FC;rich,Integrated Systems Laboratory (IIS),Switzerland"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Dato: A Task-Based Programming Model for Dataflow Accelerators","author":"Fang","year":"2025"},{"key":"ref2","article-title":"Intelligence per Watt: Measuring Intelligence Efficiency of Local AI","author":"Saad-Falcon","year":"2025"},{"key":"ref3","article-title":"FlashAttention-2: Faster Attention with Better Parallelism and Work Partitioning","author":"Dao","year":"2023"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ISLPED58423.2023.10244348"}],"event":{"name":"2026 IEEE 34th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)","location":"Atlanta, GA, USA","start":{"date-parts":[[2026,5,13]]},"end":{"date-parts":[[2026,5,16]]}},"container-title":["2026 IEEE 34th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11552597\/11552602\/11552708.pdf?arnumber=11552708","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,11]],"date-time":"2026-06-11T05:25:32Z","timestamp":1781155532000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11552708\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5,13]]},"references-count":4,"URL":"https:\/\/doi.org\/10.1109\/fccm68464.2026.00050","relation":{},"subject":[],"published":{"date-parts":[[2026,5,13]]}}}