{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T14:33:20Z","timestamp":1777127600428,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":26,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,8,5]],"date-time":"2024-08-05T00:00:00Z","timestamp":1722816000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"PRISM","award":["367154"],"award-info":[{"award-number":["367154"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,8,5]]},"DOI":"10.1145\/3665314.3670798","type":"proceedings-article","created":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T19:31:18Z","timestamp":1725910278000},"page":"1-6","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Efficient Transformer Acceleration via Reconfiguration for Encoder and Decoder Models and Sparsity-Aware Algorithm Mapping"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-4235-6243","authenticated-orcid":false,"given":"Chang Eun","family":"Song","sequence":"first","affiliation":[{"name":"Computer Science and Engineering, University of California, San Diego, La Jolla, CA, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-5112-300X","authenticated-orcid":false,"given":"Ashkan","family":"Moradifirouzabadi","sequence":"additional","affiliation":[{"name":"University of California, San Diego, La Jolla, CA, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6954-997X","authenticated-orcid":false,"given":"Tajana","family":"Rosing","sequence":"additional","affiliation":[{"name":"University of California, San Diego, La Jolla, CA, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6889-4010","authenticated-orcid":false,"given":"Mingu","family":"Kang","sequence":"additional","affiliation":[{"name":"University of California San Diego, La Jolla, CA, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,9,9]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"NVIDIA Deep Learning Accelerator (NVDLA) 2017."},{"key":"e_1_3_2_1_2_1","unstructured":"ARM Trillion 2018."},{"key":"e_1_3_2_1_3_1","volume-title":"https:\/\/developer.arm.com\/ip-products\/physical-ip\/embedded-memory","author":"Artisan Memory Compilers ARM.","year":"2021","unstructured":"ARM. Artisan Memory Compilers. https:\/\/developer.arm.com\/ip-products\/physical-ip\/embedded-memory, 2021. Accessed: 2021-11-08."},{"key":"e_1_3_2_1_4_1","volume-title":"Language Models are Few-shot Learners","author":"Tom Brown","year":"2020","unstructured":"Tom Brown et al. Language Models are Few-shot Learners. 2020."},{"key":"e_1_3_2_1_5_1","volume-title":"Fine-Tune BERT with Sparse Self-Attention Mechanism. In EMNLP-IJCNLP","author":"Baiyun","year":"2019","unstructured":"Baiyun Cui et al. Fine-Tune BERT with Sparse Self-Attention Mechanism. In EMNLP-IJCNLP, 2019."},{"key":"e_1_3_2_1_6_1","first-page":"1","volume-title":"2018 ACM\/IEEE 45th Annual International Symposium on Computer Architecture (ISCA)","author":"Jeremy","year":"2018","unstructured":"Jeremy Fowers et al. A configurable cloud-scale dnn processor for real-time ai. In 2018 ACM\/IEEE 45th Annual International Symposium on Computer Architecture (ISCA), pages 1--14. IEEE, 2018."},{"key":"e_1_3_2_1_7_1","volume-title":"HPCA","author":"Jun Tae","year":"2020","unstructured":"Tae Jun Ham et al. A3: Accelerating Attention Mechanisms in Neural Networks with Approximation. In HPCA, 2020."},{"key":"e_1_3_2_1_8_1","volume-title":"Lightweight Self-Attention Mechanism in Neural Networks. In ISCA","author":"Jun Tae","year":"2021","unstructured":"Tae Jun Ham et al. ELSA: Hardware-Software Co-design for Efficient, Lightweight Self-Attention Mechanism in Neural Networks. In ISCA, 2021."},{"key":"e_1_3_2_1_9_1","first-page":"1","volume-title":"ISCA","author":"Norman","year":"2017","unstructured":"Norman P Jouppi et al. In-datacenter performance analysis of a tensor processing unit. In ISCA, pages 1--12, 2017."},{"key":"e_1_3_2_1_10_1","volume-title":"ICML","author":"Angelos","year":"2020","unstructured":"Angelos Katharopoulos et al. Transformers are RNNs: Fast AutoRegressive Transformers with Linear Attention. In ICML, 2020."},{"key":"e_1_3_2_1_11_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In NAACL-HLT","author":"Ming-Wei Chang Jacob Devlin","year":"2019","unstructured":"Jacob Devlin Ming-Wei Chang Kenton et al. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In NAACL-HLT, 2019."},{"key":"e_1_3_2_1_12_1","volume-title":"ISCA","author":"Zheng","year":"2022","unstructured":"Zheng Li et al. Accelerating Attention through Gradient-Based Learned Runtime Pruning. In ISCA, 2022."},{"key":"e_1_3_2_1_13_1","volume-title":"Generating Wikipedia by Summarizing Long Sequences. arXiv preprint arXiv","author":"Liu Peter J","year":"1801","unstructured":"Peter J Liu et al. Generating Wikipedia by Summarizing Long Sequences. arXiv preprint arXiv. 1801.10198, 2018."},{"key":"e_1_3_2_1_14_1","volume-title":"The WikiText Long Term Dependency Language Modeling Dataset. https:\/\/blog.salesforceairesearch.com\/the-wikitext-long-term-dependency-language-modeling-dataset\/","author":"Merity Stephen","year":"2021","unstructured":"Stephen Merity. The WikiText Long Term Dependency Language Modeling Dataset. https:\/\/blog.salesforceairesearch.com\/the-wikitext-long-term-dependency-language-modeling-dataset\/, 2021. Accessed: 2021-11-08."},{"key":"e_1_3_2_1_15_1","volume-title":"VLSI-Circuits","author":"Moons Bert","year":"2016","unstructured":"Bert Moons and Marian Verhelst. A 0.3--2.6 TOPS\/W precision-scalable processor for real-time large-scale ConvNets. In VLSI-Circuits, 2016."},{"key":"e_1_3_2_1_16_1","volume-title":"OpenAIblog","author":"Alec","year":"2019","unstructured":"Alec Radford et al. Language Models are Unsupervised Multitask Learners. OpenAIblog, 2019."},{"key":"e_1_3_2_1_17_1","volume-title":"Compressive transformers for long-range sequence modelling. arXiv preprint arXiv.1911.05507","author":"Rae Jack W","year":"2019","unstructured":"Jack W Rae et al. Compressive transformers for long-range sequence modelling. arXiv preprint arXiv.1911.05507, 2019."},{"key":"e_1_3_2_1_18_1","volume-title":"Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer","author":"Colin Raffel","year":"2019","unstructured":"Colin Raffel et al. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. 2019."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.vlsi.2017.02.002"},{"key":"e_1_3_2_1_20_1","volume-title":"EdgeBERT: Sentence-level Energy Optimizations for Latency-Aware Multi-Task NLP Inference. In MICRO","author":"Thierry","year":"2021","unstructured":"Thierry Tambe et al. EdgeBERT: Sentence-level Energy Optimizations for Latency-Aware Multi-Task NLP Inference. In MICRO, 2021."},{"key":"e_1_3_2_1_21_1","volume-title":"Long Range Arena: A Benchmark for Efficient Transformers. arXiv preprint arXiv.2011.04006","author":"Yi Tay","year":"2020","unstructured":"Yi Tay et al. Long Range Arena: A Benchmark for Efficient Transformers. arXiv preprint arXiv.2011.04006, 2020."},{"key":"e_1_3_2_1_22_1","volume-title":"NeurIPS","author":"Ashish","year":"2017","unstructured":"Ashish Vaswani et al. Attention is All You Need. In NeurIPS, 2017."},{"key":"e_1_3_2_1_23_1","volume-title":"GLUE: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding. arXiv preprint arXiv:1804.07461","author":"Alex Wang","year":"2018","unstructured":"Alex Wang et al. GLUE: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding. arXiv preprint arXiv:1804.07461, 2018."},{"key":"e_1_3_2_1_24_1","volume-title":"SpAtten: Efficient Sparse Attention Architecture with Cascade Token and Head Pruning. In HPCA","author":"Hanrui","year":"2021","unstructured":"Hanrui Wang et al. SpAtten: Efficient Sparse Attention Architecture with Cascade Token and Head Pruning. In HPCA, 2021."},{"key":"e_1_3_2_1_25_1","volume-title":"HuggingFace's Transformers: State-of-the-Art Natural Language Processing. arXiv preprint arXiv","author":"Thomas Wolf","year":"1910","unstructured":"Thomas Wolf et al. HuggingFace's Transformers: State-of-the-Art Natural Language Processing. arXiv preprint arXiv. 1910.03771, 2019."},{"key":"e_1_3_2_1_26_1","volume-title":"MICRO","author":"Amir","year":"2022","unstructured":"Amir Yazdanbakhsh et al. Sparse attention acceleration with synergistic in-memory pruning and on-chip recomputation. In MICRO, 2022."}],"event":{"name":"ISLPED '24: 29th ACM\/IEEE International Symposium on Low Power Electronics and Design","location":"Newport Beach CA USA","acronym":"ISLPED '24","sponsor":["SIGDA ACM Special Interest Group on Design Automation","IEEE CAS","IEEE EDA"]},"container-title":["Proceedings of the 29th ACM\/IEEE International Symposium on Low Power Electronics and Design"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3665314.3670798","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3665314.3670798","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:34Z","timestamp":1750294714000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3665314.3670798"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,5]]},"references-count":26,"alternative-id":["10.1145\/3665314.3670798","10.1145\/3665314"],"URL":"https:\/\/doi.org\/10.1145\/3665314.3670798","relation":{},"subject":[],"published":{"date-parts":[[2024,8,5]]},"assertion":[{"value":"2024-09-09","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}