{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T17:52:06Z","timestamp":1775065926430,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":18,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,23]],"date-time":"2024-06-23T00:00:00Z","timestamp":1719100800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Natural Science Foundation of China","award":["62202288"],"award-info":[{"award-number":["62202288"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,23]]},"DOI":"10.1145\/3649329.3655982","type":"proceedings-article","created":{"date-parts":[[2024,11,7]],"date-time":"2024-11-07T19:27:22Z","timestamp":1731007642000},"page":"1-6","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["TSAcc: An Efficient \\underline{T}empo-\\underline{S}patial Similarity Aware \\underline{Acc}elerator for Attention Acceleration"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6494-4786","authenticated-orcid":false,"given":"Zhuoran","family":"Song","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-9814-2468","authenticated-orcid":false,"given":"Chunyu","family":"Qi","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-0852-3784","authenticated-orcid":false,"given":"Yuanzheng","family":"Yao","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-0869-9782","authenticated-orcid":false,"given":"Peng","family":"Zhou","sequence":"additional","affiliation":[{"name":"Alibaba Cloud, Shanghai, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-6936-3908","authenticated-orcid":false,"given":"Yanyi","family":"Zi","sequence":"additional","affiliation":[{"name":"Alibaba Cloud, Shanghai, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-8276-1868","authenticated-orcid":false,"given":"Nan","family":"Wang","sequence":"additional","affiliation":[{"name":"Alibaba Cloud, Shanghai, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2790-5884","authenticated-orcid":false,"given":"Xiaoyao","family":"Liang","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,11,7]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/990308.990309"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3085572"},{"key":"e_1_3_2_1_3_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Jacob Devlin","year":"2018","unstructured":"Jacob Devlin et al. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_4_1","unstructured":"Tae Jun Ham et al. 2020. A^ 3: Accelerating attention mechanisms in neural networks with approximation. In HPCA. IEEE 328--341."},{"key":"e_1_3_2_1_5_1","unstructured":"Norman P Jouppi et al. 2017. In-datacenter performance analysis of a tensor processing unit. In ISCA. 1--12."},{"key":"e_1_3_2_1_6_1","volume-title":"Reformer: The efficient transformer. arXiv preprint arXiv:2001.04451","author":"Kitaev Nikita","year":"2020","unstructured":"Nikita Kitaev, \u0141ukasz Kaiser, and Anselm Levskaya. 2020. Reformer: The efficient transformer. arXiv preprint arXiv:2001.04451 (2020)."},{"key":"e_1_3_2_1_7_1","volume-title":"Sanger: A Co-Design Framework for Enabling Sparse Attention using Reconfigurable Architecture. In MICRO.","author":"Liqiang Lu","year":"2021","unstructured":"Liqiang Lu et al. 2021. Sanger: A Co-Design Framework for Enabling Sparse Attention using Reconfigurable Architecture. In MICRO."},{"key":"e_1_3_2_1_8_1","volume-title":"Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems","author":"Adam Paszke","year":"2019","unstructured":"Adam Paszke et al. 2019. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems (2019)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Zheng Qu et al. 2022. DOTA: detect and omit weak attentions for scalable transformer acceleration. In ASPLOS. 14--26.","DOI":"10.1145\/3503222.3507738"},{"key":"e_1_3_2_1_10_1","unstructured":"Alec Radford et al. 2019. Language models are unsupervised multitask learners. OpenAI blog 1 8 (2019) 9."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00353"},{"key":"e_1_3_2_1_12_1","volume-title":"Edgebert: Sentence-level energy optimizations for latency-aware multi-task nlp inference. In MICRO-54. 830--844.","author":"Thierry Tambe","year":"2021","unstructured":"Thierry Tambe et al. 2021. Edgebert: Sentence-level energy optimizations for latency-aware multi-task nlp inference. In MICRO-54. 830--844."},{"key":"e_1_3_2_1_13_1","unstructured":"Yi Tay et al. 2020. Sparse sinkhorn attention. In ICML."},{"key":"e_1_3_2_1_14_1","unstructured":"Ashish Vaswani et al. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Elena Voita et al. 2019. Analyzing multi-head self-attention: Specialized heads do the heavy lifting the rest can be pruned. arXiv (2019).","DOI":"10.18653\/v1\/P19-1580"},{"key":"e_1_3_2_1_16_1","volume-title":"GLUE: A multi-task benchmark and analysis platform for natural language understanding. arXiv","author":"Alex Wang","year":"2018","unstructured":"Alex Wang et al. 2018. GLUE: A multi-task benchmark and analysis platform for natural language understanding. arXiv (2018)."},{"key":"e_1_3_2_1_17_1","volume-title":"[n. d.]. Spatten: Efficient sparse attention architecture with cascade token and head pruning","author":"Hanrui Wang","unstructured":"Hanrui Wang et al. [n. d.]. Spatten: Efficient sparse attention architecture with cascade token and head pruning. In HPCA. IEEE."},{"key":"e_1_3_2_1_18_1","volume-title":"[n. d.]. Gobo: Quantizing attention-based nlp models for low latency and energy efficient inference","author":"Zadeh Ali Hadi","unstructured":"Ali Hadi Zadeh et al. [n. d.]. Gobo: Quantizing attention-based nlp models for low latency and energy efficient inference. In MICRO. IEEE."}],"event":{"name":"DAC '24: 61st ACM\/IEEE Design Automation Conference","location":"San Francisco CA USA","acronym":"DAC '24","sponsor":["SIGDA ACM Special Interest Group on Design Automation","IEEE-CEDA","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the 61st ACM\/IEEE Design Automation Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3649329.3655982","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3649329.3655982","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:55Z","timestamp":1750295875000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3649329.3655982"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,23]]},"references-count":18,"alternative-id":["10.1145\/3649329.3655982","10.1145\/3649329"],"URL":"https:\/\/doi.org\/10.1145\/3649329.3655982","relation":{},"subject":[],"published":{"date-parts":[[2024,6,23]]},"assertion":[{"value":"2024-11-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}