{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,9]],"date-time":"2026-04-09T14:45:39Z","timestamp":1775745939887,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":70,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,10,17]],"date-time":"2021-10-17T00:00:00Z","timestamp":1634428800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,10,18]]},"DOI":"10.1145\/3466752.3480125","type":"proceedings-article","created":{"date-parts":[[2021,10,17]],"date-time":"2021-10-17T19:12:05Z","timestamp":1634497925000},"page":"977-991","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":185,"title":["Sanger: A Co-Design Framework for Enabling Sparse Attention using Reconfigurable Architecture"],"prefix":"10.1145","author":[{"given":"Liqiang","family":"Lu","sequence":"first","affiliation":[{"name":"Peking University, China"}]},{"given":"Yicheng","family":"Jin","sequence":"additional","affiliation":[{"name":"Peking University"}]},{"given":"Hangrui","family":"Bi","sequence":"additional","affiliation":[{"name":"Peking University"}]},{"given":"Zizhang","family":"Luo","sequence":"additional","affiliation":[{"name":"Peking University"}]},{"given":"Peng","family":"Li","sequence":"additional","affiliation":[{"name":"Advanced Institute of Information Technology, Peking University"}]},{"given":"Tao","family":"Wang","sequence":"additional","affiliation":[{"name":"Peking University"}]},{"given":"Yun","family":"Liang","sequence":"additional","affiliation":[{"name":"Peking University, China"}]}],"member":"320","published-online":{"date-parts":[[2021,10,17]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/2228360.2228584"},{"key":"e_1_3_2_1_2_1","volume-title":"Longformer: The long-document transformer. arXiv preprint arXiv:2004.05150(2020).","author":"Beltagy Iz","year":"2020","unstructured":"Iz Beltagy , Matthew\u00a0 E Peters , and Arman Cohan . 2020 . Longformer: The long-document transformer. arXiv preprint arXiv:2004.05150(2020). Iz Beltagy, Matthew\u00a0E Peters, and Arman Cohan. 2020. Longformer: The long-document transformer. arXiv preprint arXiv:2004.05150(2020)."},{"key":"e_1_3_2_1_3_1","unstructured":"Yoshua Bengio N. L\u00e9onard and Aaron\u00a0C. Courville. 2013. Estimating or Propagating Gradients Through Stochastic Neurons for Conditional Computation. ArXiv abs\/1308.3432(2013).  Yoshua Bengio N. L\u00e9onard and Aaron\u00a0C. Courville. 2013. Estimating or Propagating Gradients Through Stochastic Neurons for Conditional Computation. ArXiv abs\/1308.3432(2013)."},{"key":"e_1_3_2_1_4_1","volume-title":"Proceedings of KDD cup and workshop.","author":"Bennett James","year":"2007","unstructured":"James Bennett and Stan Lanning . 2007 . The netflix prize . In Proceedings of KDD cup and workshop. James Bennett and Stan Lanning. 2007. The netflix prize. In Proceedings of KDD cup and workshop."},{"key":"e_1_3_2_1_5_1","volume-title":"Proceedings of Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Cao Shijie","unstructured":"Shijie Cao , Lingxiao Ma , W. Xiao , Chen Zhang , Yunxin Liu , L. Zhang , L. Nie , and Z. Yang . 2019. SeerNet: Predicting Convolutional Neural Network Feature-Map Sparsity Through Low-Bit Quantization . In Proceedings of Conference on Computer Vision and Pattern Recognition (CVPR). Shijie Cao, Lingxiao Ma, W. Xiao, Chen Zhang, Yunxin Liu, L. Zhang, L. Nie, and Z. Yang. 2019. SeerNet: Predicting Convolutional Neural Network Feature-Map Sparsity Through Low-Bit Quantization. In Proceedings of Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541967"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.58"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001177"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Yu-Hsin Chen Tien-Ju Yang Joel Emer and Vivienne Sze. 2018. Eyeriss v2: A Flexible Accelerator for Emerging Deep Neural Networks on Mobile Devices. arXiv preprint arXiv:1807.07928(2018).  Yu-Hsin Chen Tien-Ju Yang Joel Emer and Vivienne Sze. 2018. Eyeriss v2: A Flexible Accelerator for Emerging Deep Neural Networks on Mobile Devices. arXiv preprint arXiv:1807.07928(2018).","DOI":"10.1109\/JETCAS.2019.2910232"},{"key":"e_1_3_2_1_10_1","unstructured":"Sharan Chetlur C. Woolley Philippe Vandermersch J. Cohen John Tran Bryan Catanzaro and Evan Shelhamer. 2014. cuDNN: Efficient Primitives for Deep Learning. ArXiv abs\/1410.0759(2014).  Sharan Chetlur C. Woolley Philippe Vandermersch J. Cohen John Tran Bryan Catanzaro and Evan Shelhamer. 2014. cuDNN: Efficient Primitives for Deep Learning. ArXiv abs\/1410.0759(2014)."},{"key":"e_1_3_2_1_11_1","unstructured":"Rewon Child Scott Gray Alec Radford and Ilya Sutskever. 2019. Generating long sequences with sparse transformers. arXiv preprint arXiv:1904.10509(2019).  Rewon Child Scott Gray Alec Radford and Ilya Sutskever. 2019. Generating long sequences with sparse transformers. arXiv preprint arXiv:1904.10509(2019)."},{"key":"e_1_3_2_1_12_1","volume-title":"Proceedings of Conference on Empirical Methods in Natural Language Processing\/International Joint Conference on Natural Language Processing.","author":"Correia M.","unstructured":"Gon\u00e7alo\u00a0 M. Correia , Vlad Niculae , and Andr\u00e9 F . \u00a0T. Martins. 2019. Adaptively Sparse Transformers . In Proceedings of Conference on Empirical Methods in Natural Language Processing\/International Joint Conference on Natural Language Processing. Gon\u00e7alo\u00a0M. Correia, Vlad Niculae, and Andr\u00e9 F.\u00a0T. Martins. 2019. Adaptively Sparse Transformers. In Proceedings of Conference on Empirical Methods in Natural Language Processing\/International Joint Conference on Natural Language Processing."},{"key":"e_1_3_2_1_13_1","volume-title":"Proceedings of Conference on Empirical Methods in Natural Language Processing\/International Joint Conference on Natural Language Processing.","author":"Cui Baiyun","unstructured":"Baiyun Cui , Y. Li , Ming Chen , and Z. Zhang . 2019. Fine-tune BERT with Sparse Self-Attention Mechanism . In Proceedings of Conference on Empirical Methods in Natural Language Processing\/International Joint Conference on Natural Language Processing. Baiyun Cui, Y. Li, Ming Chen, and Z. Zhang. 2019. Fine-tune BERT with Sparse Self-Attention Mechanism. In Proceedings of Conference on Empirical Methods in Natural Language Processing\/International Joint Conference on Natural Language Processing."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304041"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00024"},{"key":"e_1_3_2_1_16_1","volume-title":"Proceedings of the Conference of the North American Chapter of the Association for Computational Linguistics \u2013 Human Language Technologies.","author":"Devlin J.","year":"2019","unstructured":"J. Devlin , Ming-Wei Chang , Kenton Lee , and Kristina Toutanova . 2019 . BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding . In Proceedings of the Conference of the North American Chapter of the Association for Computational Linguistics \u2013 Human Language Technologies. J. Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proceedings of the Conference of the North American Chapter of the Association for Computational Linguistics \u2013 Human Language Technologies."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750389"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.5555\/3433701.3433722"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00035"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3020078.3021745"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.30"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358283"},{"key":"e_1_3_2_1_23_1","unstructured":"Intel. 2021. Oneapi-Src\/oneDNN. https:\/\/github.com\/oneapi-src\/oneDNN.  Intel. 2021. Oneapi-Src\/oneDNN. https:\/\/github.com\/oneapi-src\/oneDNN."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358286"},{"key":"e_1_3_2_1_25_1","volume-title":"Reformer: The Efficient Transformer. ArXiv abs\/2001.04451(2020).","author":"Kitaev Nikita","year":"2020","unstructured":"Nikita Kitaev , Lukasz Kaiser , and Anselm Levskaya . 2020 . Reformer: The Efficient Transformer. ArXiv abs\/2001.04451(2020). Nikita Kitaev, Lukasz Kaiser, and Anselm Levskaya. 2020. Reformer: The Efficient Transformer. ArXiv abs\/2001.04451(2020)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304028"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASAP.2019.00-31"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173162.3173176"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358295"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3370748.3406567"},{"key":"e_1_3_2_1_32_1","volume-title":"An Efficient Hardware Design for Accelerating Sparse CNNs with NAS-based Models. Transactions on Computer-Aided Design of Integrated Circuits and Systems (TCAD)","author":"Liang Yun","year":"2021","unstructured":"Yun Liang , Liqiang Lu , Yicheng Jin , Jiaming Xie , Ruirui Huang , Jiansong Zhang , and Wei Lin . 2021. An Efficient Hardware Design for Accelerating Sparse CNNs with NAS-based Models. Transactions on Computer-Aided Design of Integrated Circuits and Systems (TCAD) ( 2021 ). Yun Liang, Liqiang Lu, Yicheng Jin, Jiaming Xie, Ruirui Huang, Jiansong Zhang, and Wei Lin. 2021. An Efficient Hardware Design for Accelerating Sparse CNNs with NAS-based Models. Transactions on Computer-Aided Design of Integrated Circuits and Systems (TCAD) (2021)."},{"key":"e_1_3_2_1_33_1","volume-title":"OMNI: A framework for integrating hardware and software optimizations for sparse CNNs. Transactions on Computer-Aided Design of Integrated Circuits and Systems (TCAD)","author":"Liang Yun","year":"2020","unstructured":"Yun Liang , Liqiang Lu , and Jiaming Xie . 2020 . OMNI: A framework for integrating hardware and software optimizations for sparse CNNs. Transactions on Computer-Aided Design of Integrated Circuits and Systems (TCAD) (2020). Yun Liang, Liqiang Lu, and Jiaming Xie. 2020. OMNI: A framework for integrating hardware and software optimizations for sparse CNNs. Transactions on Computer-Aided Design of Integrated Circuits and Systems (TCAD) (2020)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/2694344.2694358"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00062"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3195970.3196120"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/FCCM.2019.00013"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/2507157.2507163"},{"key":"e_1_3_2_1_39_1","unstructured":"NVIDIA. 2021. NVIDIA\/DeepLearningExamples. https:\/\/github.com\/NVIDIA\/DeepLearningExamples.  NVIDIA. 2021. NVIDIA\/DeepLearningExamples. https:\/\/github.com\/NVIDIA\/DeepLearningExamples."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00067"},{"key":"e_1_3_2_1_41_1","volume":"201","author":"Parashar Angshuman","unstructured":"Angshuman Parashar , Minsoo Rhu , Anurag Mukkara , Antonio Puglielli , Rangharajan Venkatesan , Brucek Khailany , Joel Emer , Stephen\u00a0 W Keckler , and William\u00a0 J Dally. 201 7. Scnn: An accelerator for compressed-sparse convolutional neural networks. In Proceedings of SIGARCH Computer Architecture News. Angshuman Parashar, Minsoo Rhu, Anurag Mukkara, Antonio Puglielli, Rangharajan Venkatesan, Brucek Khailany, Joel Emer, Stephen\u00a0W Keckler, and William\u00a0J Dally. 2017. Scnn: An accelerator for compressed-sparse convolutional neural networks. In Proceedings of SIGARCH Computer Architecture News.","journal-title":"J Dally."},{"key":"e_1_3_2_1_42_1","volume-title":"PyTorch: An Imperative Style","author":"Paszke Adam","unstructured":"Adam Paszke , S. Gross , Francisco Massa , A. Lerer , J. Bradbury , G. Chanan , Trevor Killeen , Z. Lin , N. Gimelshein , L. Antiga , Alban Desmaison , Andreas K\u00f6pf , Edward Yang , Zach DeVito , Martin Raison , Alykhan Tejani , Sasank Chilamkurthy , B. Steiner , Lu Fang , Junjie Bai , and Soumith Chintala . 2019. PyTorch: An Imperative Style , High-Performance Deep Learning Library . In NeurIPS. Adam Paszke, S. Gross, Francisco Massa, A. Lerer, J. Bradbury, G. Chanan, Trevor Killeen, Z. Lin, N. Gimelshein, L. Antiga, Alban Desmaison, Andreas K\u00f6pf, Edward Yang, Zach DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, B. Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In NeurIPS."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485925"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00015"},{"key":"e_1_3_2_1_45_1","volume-title":"Language models are unsupervised multitask learners. OpenAI blog","author":"Radford Alec","year":"2019","unstructured":"Alec Radford , Jeffrey Wu , Rewon Child , David Luan , Dario Amodei , and Ilya Sutskever . 2019. Language models are unsupervised multitask learners. OpenAI blog ( 2019 ). Alec Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei, and Ilya Sutskever. 2019. Language models are unsupervised multitask learners. OpenAI blog (2019)."},{"key":"e_1_3_2_1_46_1","volume-title":"Language models are unsupervised multitask learners. OpenAI blog","author":"Radford Alec","year":"2019","unstructured":"Alec Radford , Jeffrey Wu , Rewon Child , David Luan , Dario Amodei , Ilya Sutskever , 2019. Language models are unsupervised multitask learners. OpenAI blog ( 2019 ). Alec Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei, Ilya Sutskever, 2019. Language models are unsupervised multitask learners. OpenAI blog (2019)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1264"},{"key":"e_1_3_2_1_48_1","volume-title":"Proceedings of Transactions of the Association for Computational Linguistics (TACL).","author":"Roy Aurko","year":"2020","unstructured":"Aurko Roy , M. Saffar , Ashish Vaswani , and David Grangier . 2020 . Efficient Content-Based Sparse Attention with Routing Transformers . In Proceedings of Transactions of the Association for Computational Linguistics (TACL). Aurko Roy, M. Saffar, Ashish Vaswani, and David Grangier. 2020. Efficient Content-Based Sparse Attention with Routing Transformers. In Proceedings of Transactions of the Association for Computational Linguistics (TACL)."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/2833179.2833183"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00068"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00062"},{"key":"e_1_3_2_1_52_1","volume-title":"Proceedings of International Conference on Machine Learning (ICML).","author":"Tay Yi","unstructured":"Yi Tay , Dara Bahri , L. Yang , Donald Metzler , and D. Juan . 2020. Sparse Sinkhorn Attention . In Proceedings of International Conference on Machine Learning (ICML). Yi Tay, Dara Bahri, L. Yang, Donald Metzler, and D. Juan. 2020. Sparse Sinkhorn Attention. In Proceedings of International Conference on Machine Learning (ICML)."},{"key":"e_1_3_2_1_53_1","unstructured":"Yi Tay M. Dehghani Samira Abnar Y. Shen Dara Bahri Philip Pham J. Rao Liu Yang Sebastian Ruder and Donald Metzler. 2020. Long Range Arena: A Benchmark for Efficient Transformers. ArXiv abs\/2011.04006(2020).  Yi Tay M. Dehghani Samira Abnar Y. Shen Dara Bahri Philip Pham J. Rao Liu Yang Sebastian Ruder and Donald Metzler. 2020. Long Range Arena: A Benchmark for Efficient Transformers. ArXiv abs\/2011.04006(2020)."},{"key":"e_1_3_2_1_54_1","unstructured":"Yi Tay Mostafa Dehghani Dara Bahri and Donald Metzler. 2020. Efficient transformers: A survey. arXiv preprint arXiv:2009.06732(2020).  Yi Tay Mostafa Dehghani Dara Bahri and Donald Metzler. 2020. Efficient transformers: A survey. arXiv preprint arXiv:2009.06732(2020)."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.5555\/3295222.3295349"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-5446"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00018"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/APCCAS.2018.8605654"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00813"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00086"},{"key":"e_1_3_2_1_62_1","volume-title":"Proceedings of Conference on Empirical Methods in Natural Language Processing.","author":"Xie Qizhe","unstructured":"Qizhe Xie , Guokun Lai , Zihang Dai , and E. Hovy . 2018. Large-scale Cloze Test Dataset Created by Teachers . In Proceedings of Conference on Empirical Methods in Natural Language Processing. Qizhe Xie, Guokun Lai, Zihang Dai, and E. Hovy. 2018. Large-scale Cloze Test Dataset Created by Teachers. In Proceedings of Conference on Empirical Methods in Natural Language Processing."},{"key":"e_1_3_2_1_63_1","volume-title":"Proceedings of International Conference on Machine Learning (ICML).","author":"Xu Kelvin","year":"2015","unstructured":"Kelvin Xu , Jimmy Ba , Ryan Kiros , Kyunghyun Cho , Aaron\u00a0 C. Courville , R. Salakhutdinov , R. Zemel , and Yoshua Bengio . 2015 . Show, Attend and Tell: Neural Image Caption Generation with Visual Attention . In Proceedings of International Conference on Machine Learning (ICML). Kelvin Xu, Jimmy Ba, Ryan Kiros, Kyunghyun Cho, Aaron\u00a0C. Courville, R. Salakhutdinov, R. Zemel, and Yoshua Bengio. 2015. Show, Attend and Tell: Neural Image Caption Generation with Visual Attention. In Proceedings of International Conference on Machine Learning (ICML)."},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322271"},{"key":"e_1_3_2_1_65_1","unstructured":"Manzil Zaheer Guru Guruganesh Avinava Dubey Joshua Ainslie Chris Alberti Santiago Ontanon Philip Pham Anirudh Ravula Qifan Wang Li Yang 2020. Big bird: Transformers for longer sequences. arXiv preprint arXiv:2007.14062(2020).  Manzil Zaheer Guru Guruganesh Avinava Dubey Joshua Ainslie Chris Alberti Santiago Ontanon Philip Pham Anirudh Ravula Qifan Wang Li Yang 2020. Big bird: Transformers for longer sequences. arXiv preprint arXiv:2007.14062(2020)."},{"key":"e_1_3_2_1_66_1","volume-title":"Proceedings of International Conference on Machine Learning (ICML).","author":"Zhang Han","year":"2019","unstructured":"Han Zhang , I. Goodfellow , Dimitris\u00a0 N. Metaxas , and Augustus Odena . 2019 . Self-Attention Generative Adversarial Networks . In Proceedings of International Conference on Machine Learning (ICML). Han Zhang, I. Goodfellow, Dimitris\u00a0N. Metaxas, and Augustus Odena. 2019. Self-Attention Generative Adversarial Networks. In Proceedings of International Conference on Machine Learning (ICML)."},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783723"},{"key":"e_1_3_2_1_68_1","unstructured":"Guangxiang Zhao Junyang Lin Zhiyuan Zhang Xuancheng Ren Qi Su and X. Sun. 2019. Explicit Sparse Transformer: Concentrated Attention Through Explicit Selection. ArXiv abs\/1912.11637(2019).  Guangxiang Zhao Junyang Lin Zhiyuan Zhang Xuancheng Ren Qi Su and X. Sun. 2019. Explicit Sparse Transformer: Concentrated Attention Through Explicit Selection. ArXiv abs\/1912.11637(2019)."},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00011"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358269"}],"event":{"name":"MICRO '21: 54th Annual IEEE\/ACM International Symposium on Microarchitecture","location":"Virtual Event Greece","acronym":"MICRO '21","sponsor":["SIGMICRO ACM Special Interest Group on Microarchitectural Research and Processing"]},"container-title":["MICRO-54: 54th Annual IEEE\/ACM International Symposium on Microarchitecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3466752.3480125","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3466752.3480125","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:18:57Z","timestamp":1750191537000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3466752.3480125"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10,17]]},"references-count":70,"alternative-id":["10.1145\/3466752.3480125","10.1145\/3466752"],"URL":"https:\/\/doi.org\/10.1145\/3466752.3480125","relation":{},"subject":[],"published":{"date-parts":[[2021,10,17]]},"assertion":[{"value":"2021-10-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}