{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,20]],"date-time":"2026-06-20T16:21:34Z","timestamp":1781972494189,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":73,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,17]],"date-time":"2023-06-17T00:00:00Z","timestamp":1686960000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-sa\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["Grant 62125403"],"award-info":[{"award-number":["Grant 62125403"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100009592","name":"Beijing Municipal Science and Technology Commission","doi-asserted-by":"publisher","award":["Grant Z221100007722023"],"award-info":[{"award-number":["Grant Z221100007722023"]}],"id":[{"id":"10.13039\/501100009592","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["Grant 2021ZD0114400"],"award-info":[{"award-number":["Grant 2021ZD0114400"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100017582","name":"Beijing National Research Center For Information Science And Technology","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100017582","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012237","name":"Beijing Innovation Center for Future Chip","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012237","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,17]]},"DOI":"10.1145\/3579371.3589057","type":"proceedings-article","created":{"date-parts":[[2023,6,16]],"date-time":"2023-06-16T20:25:28Z","timestamp":1686947128000},"page":"1-14","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":105,"title":["FACT: FFN-Attention Co-optimized Transformer Architecture with Eager Correlation Prediction"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5530-5416","authenticated-orcid":false,"given":"Yubin","family":"Qin","sequence":"first","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8293-8881","authenticated-orcid":false,"given":"Yang","family":"Wang","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-4229-4985","authenticated-orcid":false,"given":"Dazheng","family":"Deng","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-6074-7324","authenticated-orcid":false,"given":"Zhiren","family":"Zhao","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0105-8351","authenticated-orcid":false,"given":"Xiaolong","family":"Yang","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7548-4116","authenticated-orcid":false,"given":"Leibo","family":"Liu","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5117-7920","authenticated-orcid":false,"given":"Shaojun","family":"Wei","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6942-4395","authenticated-orcid":false,"given":"Yang","family":"Hu","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2309-572X","authenticated-orcid":false,"given":"Shouyi","family":"Yin","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2023,6,17]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"55th IEEE\/ACM International Symposium on Microarchitecture, MICRO 2022","author":"Andri Renzo","year":"2022","unstructured":"Renzo Andri , Beatrice Bussolino , Antonio Cipolletta , Lukas Cavigelli , and Zhe Wang . 2022 . Going Further With Winograd Convolutions: Tap-Wise Quantization for Efficient Inference on 4x4 Tiles . In 55th IEEE\/ACM International Symposium on Microarchitecture, MICRO 2022 , Chicago, IL, USA, October 1--5 , 2022. IEEE, 582--598. Renzo Andri, Beatrice Bussolino, Antonio Cipolletta, Lukas Cavigelli, and Zhe Wang. 2022. Going Further With Winograd Convolutions: Tap-Wise Quantization for Efficient Inference on 4x4 Tiles. In 55th IEEE\/ACM International Symposium on Microarchitecture, MICRO 2022, Chicago, IL, USA, October 1--5, 2022. IEEE, 582--598."},{"key":"e_1_3_2_1_2_1","volume-title":"ALRESCHA: A Lightweight Reconfigurable Sparse-Computation Accelerator. In IEEE International Symposium on High Performance Computer Architecture, HPCA 2020","author":"Asgari Bahar","year":"2020","unstructured":"Bahar Asgari , Ramyad Hadidi , Tushar Krishna , Hyesoon Kim , and Sudhakar Yalamanchili . 2020 . ALRESCHA: A Lightweight Reconfigurable Sparse-Computation Accelerator. In IEEE International Symposium on High Performance Computer Architecture, HPCA 2020 , San Diego, CA, USA, February 22--26 , 2020. IEEE, 249--260. Bahar Asgari, Ramyad Hadidi, Tushar Krishna, Hyesoon Kim, and Sudhakar Yalamanchili. 2020. ALRESCHA: A Lightweight Reconfigurable Sparse-Computation Accelerator. In IEEE International Symposium on High Performance Computer Architecture, HPCA 2020, San Diego, CA, USA, February 22--26, 2020. IEEE, 249--260."},{"key":"e_1_3_2_1_3_1","volume-title":"Longformer: The Long-Document Transformer. CoRR abs\/2004.05150","author":"Beltagy Iz","year":"2020","unstructured":"Iz Beltagy , Matthew E. Peters , and Arman Cohan . 2020 . Longformer: The Long-Document Transformer. CoRR abs\/2004.05150 (2020). arXiv:2004.05150 Iz Beltagy, Matthew E. Peters, and Arman Cohan. 2020. Longformer: The Long-Document Transformer. CoRR abs\/2004.05150 (2020). arXiv:2004.05150"},{"key":"e_1_3_2_1_4_1","volume-title":"Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020","author":"Brown Tom B.","year":"2020","unstructured":"Tom B. Brown , Benjamin Mann , Nick Ryder , Melanie Subbiah , Jared Kaplan , Prafulla Dhariwal , Arvind Neelakantan , Pranav Shyam , Girish Sastry , Amanda Askell , Sandhini Agarwal , Ariel Herbert-Voss , Gretchen Krueger , Tom Henighan , Rewon Child , Aditya Ramesh , Daniel M. Ziegler , Jeffrey Wu , Clemens Winter , Christopher Hesse , Mark Chen , Eric Sigler , Mateusz Litwin , Scott Gray , Benjamin Chess , Jack Clark , Christopher Berner , Sam McCandlish , Alec Radford , Ilya Sutskever , and Dario Amodei . 2020 . Language Models are Few-Shot Learners . In Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020 , NeurIPS 2020, December 6--12, 2020, virtual, Hugo Larochelle, Marc'Aurelio Ranzato, Raia Hadsell, Maria-Florina Balcan, and Hsuan-Tien Lin (Eds.). Tom B. Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, Sandhini Agarwal, Ariel Herbert-Voss, Gretchen Krueger, Tom Henighan, Rewon Child, Aditya Ramesh, Daniel M. Ziegler, Jeffrey Wu, Clemens Winter, Christopher Hesse, Mark Chen, Eric Sigler, Mateusz Litwin, Scott Gray, Benjamin Chess, Jack Clark, Christopher Berner, Sam McCandlish, Alec Radford, Ilya Sutskever, and Dario Amodei. 2020. Language Models are Few-Shot Learners. In Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6--12, 2020, virtual, Hugo Larochelle, Marc'Aurelio Ranzato, Raia Hadsell, Maria-Florina Balcan, and Hsuan-Tien Lin (Eds.)."},{"key":"e_1_3_2_1_5_1","volume-title":"Proceedings, Part I (Lecture Notes in Computer Science","volume":"229","author":"Carion Nicolas","year":"2020","unstructured":"Nicolas Carion , Francisco Massa , Gabriel Synnaeve , Nicolas Usunier , Alexander Kirillov , and Sergey Zagoruyko . 2020 . End-to-End Object Detection with Transformers. In Computer Vision - ECCV 2020 - 16th European Conference, Glasgow, UK, August 23--28, 2020 , Proceedings, Part I (Lecture Notes in Computer Science , Vol. 12346), Andrea Vedaldi, Horst Bischof, Thomas Brox, and Jan-Michael Frahm (Eds.). Springer, 213-- 229 . Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov, and Sergey Zagoruyko. 2020. End-to-End Object Detection with Transformers. In Computer Vision - ECCV 2020 - 16th European Conference, Glasgow, UK, August 23--28, 2020, Proceedings, Part I (Lecture Notes in Computer Science, Vol. 12346), Andrea Vedaldi, Horst Bischof, Thomas Brox, and Jan-Michael Frahm (Eds.). Springer, 213--229."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3474255"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2302.05442"},{"key":"e_1_3_2_1_8_1","volume-title":"GoSPA: An Energy-efficient High-performance Globally Optimized SParse Convolutional Neural Network Accelerator. In 48th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2021","author":"Deng Chunhua","year":"2021","unstructured":"Chunhua Deng , Yang Sui , Siyu Liao , Xuehai Qian , and Bo Yuan . 2021 . GoSPA: An Energy-efficient High-performance Globally Optimized SParse Convolutional Neural Network Accelerator. In 48th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2021 , Valencia, Spain, June 14--18 , 2021. IEEE, 1110--1123. Chunhua Deng, Yang Sui, Siyu Liao, Xuehai Qian, and Bo Yuan. 2021. GoSPA: An Energy-efficient High-performance Globally Optimized SParse Convolutional Neural Network Accelerator. In 48th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2021, Valencia, Spain, June 14--18, 2021. IEEE, 1110--1123."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/n19-1423"},{"key":"e_1_3_2_1_11_1","volume-title":"9th International Conference on Learning Representations, ICLR 2021","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy , Lucas Beyer , Alexander Kolesnikov , Dirk Weissenborn , Xiaohua Zhai , Thomas Unterthiner , Mostafa Dehghani , Matthias Minderer , Georg Heigold , Sylvain Gelly , Jakob Uszkoreit , and Neil Houlsby . 2021 . An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale . In 9th International Conference on Learning Representations, ICLR 2021 , Virtual Event, Austria, May 3--7 , 2021. OpenReview.net. Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3--7, 2021. OpenReview.net."},{"key":"e_1_3_2_1_12_1","volume-title":"55th IEEE\/ACM International Symposium on Microarchitecture, MICRO 2022","author":"Fan Hongxiang","year":"2022","unstructured":"Hongxiang Fan , Thomas Chau , Stylianos I. Venieris , Royson Lee , Alexandros Kouris , Wayne Luk , Nicholas D. Lane , and Mohamed S. Abdelfattah . 2022. Adaptable Butterfly Accelerator for Attention-based NNs via Hardware and Algorithm Co-design . In 55th IEEE\/ACM International Symposium on Microarchitecture, MICRO 2022 , Chicago, IL, USA, October 1--5 , 2022 . IEEE, 599--615. Hongxiang Fan, Thomas Chau, Stylianos I. Venieris, Royson Lee, Alexandros Kouris, Wayne Luk, Nicholas D. Lane, and Mohamed S. Abdelfattah. 2022. Adaptable Butterfly Accelerator for Attention-based NNs via Hardware and Algorithm Co-design. In 55th IEEE\/ACM International Symposium on Microarchitecture, MICRO 2022, Chicago, IL, USA, October 1--5, 2022. IEEE, 599--615."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2022.3197282"},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of the 52nd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2019","author":"Gondimalla Ashish","year":"2019","unstructured":"Ashish Gondimalla , Noah Chesnut , Mithuna Thottethodi , and T. N. Vijaykumar . 2019. SparTen: A Sparse Tensor Accelerator for Convolutional Neural Networks . In Proceedings of the 52nd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2019 , Columbus, OH, USA, October 12--16 , 2019 . ACM, 151--165. Ashish Gondimalla, Noah Chesnut, Mithuna Thottethodi, and T. N. Vijaykumar. 2019. SparTen: A Sparse Tensor Accelerator for Convolutional Neural Networks. In Proceedings of the 52nd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2019, Columbus, OH, USA, October 12--16, 2019. ACM, 151--165."},{"key":"e_1_3_2_1_15_1","volume-title":"CANDLES: Channel-Aware Novel Dataflow-Microarchitecture Co-Design for Low Energy Sparse Neural Network Acceleration","author":"Gudaparthi Sumanth","year":"2022","unstructured":"Sumanth Gudaparthi , Sarabjeet Singh , Surya Narayanan , Rajeev Balasubramonian , and Visvesh Sathe . 2022 . CANDLES: Channel-Aware Novel Dataflow-Microarchitecture Co-Design for Low Energy Sparse Neural Network Acceleration . In IEEE International Symposium on High-Performance Computer Architecture, HPCA 2022, Seoul, South Korea, April 2--6, 2022. IEEE , 876--891. Sumanth Gudaparthi, Sarabjeet Singh, Surya Narayanan, Rajeev Balasubramonian, and Visvesh Sathe. 2022. CANDLES: Channel-Aware Novel Dataflow-Microarchitecture Co-Design for Low Energy Sparse Neural Network Acceleration. In IEEE International Symposium on High-Performance Computer Architecture, HPCA 2022, Seoul, South Korea, April 2--6, 2022. IEEE, 876--891."},{"key":"e_1_3_2_1_16_1","volume-title":"Findings of the Association for Computational Linguistics: NAACL 2022","author":"Guo Mandy","year":"2022","unstructured":"Mandy Guo , Joshua Ainslie , David C. Uthus , Santiago Onta\u00f1\u00f3n , Jianmo Ni , Yun-Hsuan Sung , and Yinfei Yang . 2022 . LongT5: Efficient Text-To-Text Transformer for Long Sequences . In Findings of the Association for Computational Linguistics: NAACL 2022 , Seattle, WA, United States, July 10--15 , 2022, Marine Carpuat, Marie-Catherine de Marneffe, and Iv\u00e1n Vladimir Meza Ru\u00edz (Eds.). Association for Computational Linguistics, 724--736. Mandy Guo, Joshua Ainslie, David C. Uthus, Santiago Onta\u00f1\u00f3n, Jianmo Ni, Yun-Hsuan Sung, and Yinfei Yang. 2022. LongT5: Efficient Text-To-Text Transformer for Long Sequences. In Findings of the Association for Computational Linguistics: NAACL 2022, Seattle, WA, United States, July 10--15, 2022, Marine Carpuat, Marie-Catherine de Marneffe, and Iv\u00e1n Vladimir Meza Ru\u00edz (Eds.). Association for Computational Linguistics, 724--736."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00035"},{"key":"e_1_3_2_1_18_1","volume-title":"Proceedings of the 48th Annual International Symposium on Computer Architecture","author":"Ham Tae Jun","unstructured":"Tae Jun Ham , Yejin Lee , Seong Hoon Seo , Soosung Kim , Hyunji Choi , Sung Jun Jung , and Jae W. Lee . 2021. ELSA: Hardware-Software Co-Design for Efficient, Lightweight Self-Attention Mechanism in Neural Networks . In Proceedings of the 48th Annual International Symposium on Computer Architecture ( Virtual Event, Spain) (ISCA '21). IEEE Press, 692--705. Tae Jun Ham, Yejin Lee, Seong Hoon Seo, Soosung Kim, Hyunji Choi, Sung Jun Jung, and Jae W. Lee. 2021. ELSA: Hardware-Software Co-Design for Efficient, Lightweight Self-Attention Mechanism in Neural Networks. In Proceedings of the 48th Annual International Symposium on Computer Architecture (Virtual Event, Spain) (ISCA '21). IEEE Press, 692--705."},{"key":"e_1_3_2_1_19_1","volume-title":"ISCA '22: The 49th Annual International Symposium on Computer Architecture","author":"Hanson Edward","year":"2022","unstructured":"Edward Hanson , Shiyu Li , Hai Helen Li , and Yiran Chen . 2022 . Cascading structured pruning: enabling high data reuse for sparse DNN accelerators . In ISCA '22: The 49th Annual International Symposium on Computer Architecture , New York, New York, USA, June 18 -- 22 , 2022, Valentina Salapura, Mohamed Zahran, Fred Chong, and Lingjia Tang (Eds.). ACM, 522--535. Edward Hanson, Shiyu Li, Hai Helen Li, and Yiran Chen. 2022. Cascading structured pruning: enabling high data reuse for sparse DNN accelerators. In ISCA '22: The 49th Annual International Symposium on Computer Architecture, New York, New York, USA, June 18 -- 22, 2022, Valentina Salapura, Mohamed Zahran, Fred Chong, and Lingjia Tang (Eds.). ACM, 522--535."},{"key":"e_1_3_2_1_20_1","volume-title":"Proceedings of the 52nd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2019","author":"Hegde Kartik","year":"2019","unstructured":"Kartik Hegde , Hadi Asghari Moghaddam , Michael Pellauer , Neal Clayton Crago , Aamer Jaleel , Edgar Solomonik , Joel S. Emer , and Christopher W. Fletcher . 2019. ExTensor: An Accelerator for Sparse Tensor Algebra . In Proceedings of the 52nd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2019 , Columbus, OH, USA, October 12--16 , 2019 . ACM, 319--333. Kartik Hegde, Hadi Asghari Moghaddam, Michael Pellauer, Neal Clayton Crago, Aamer Jaleel, Edgar Solomonik, Joel S. Emer, and Christopher W. Fletcher. 2019. ExTensor: An Accelerator for Sparse Tensor Algebra. In Proceedings of the 52nd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2019, Columbus, OH, USA, October 12--16, 2019. ACM, 319--333."},{"key":"e_1_3_2_1_21_1","volume-title":"SPAGHETTI: Streaming Accelerators for Highly Sparse GEMM on FPGAs. In IEEE International Symposium on High-Performance Computer Architecture, HPCA 2021","author":"Hojabr Reza","year":"2021","unstructured":"Reza Hojabr , Ali Sedaghati , Amirali Sharifian , Ahmad Khonsari , and Arrvindh Shriraman . 2021 . SPAGHETTI: Streaming Accelerators for Highly Sparse GEMM on FPGAs. In IEEE International Symposium on High-Performance Computer Architecture, HPCA 2021 , Seoul, South Korea, February 27 - March 3, 2021. IEEE, 84--96. Reza Hojabr, Ali Sedaghati, Amirali Sharifian, Ahmad Khonsari, and Arrvindh Shriraman. 2021. SPAGHETTI: Streaming Accelerators for Highly Sparse GEMM on FPGAs. In IEEE International Symposium on High-Performance Computer Architecture, HPCA 2021, Seoul, South Korea, February 27 - March 3, 2021. IEEE, 84--96."},{"key":"e_1_3_2_1_22_1","volume-title":"DFX: A Low-latency Multi-FPGA Appliance for Accelerating Transformer-based Text Generation. In 55th IEEE\/ACM International Symposium on Microarchitecture, MICRO 2022","author":"Hong Seongmin","year":"2022","unstructured":"Seongmin Hong , Seungjae Moon , Junsoo Kim , Sungjae Lee , Minsub Kim , Dongsoo Lee , and Joo-Young Kim . 2022 . DFX: A Low-latency Multi-FPGA Appliance for Accelerating Transformer-based Text Generation. In 55th IEEE\/ACM International Symposium on Microarchitecture, MICRO 2022 , Chicago, IL, USA, October 1--5 , 2022. IEEE, 616--630. Seongmin Hong, Seungjae Moon, Junsoo Kim, Sungjae Lee, Minsub Kim, Dongsoo Lee, and Joo-Young Kim. 2022. DFX: A Low-latency Multi-FPGA Appliance for Accelerating Transformer-based Text Generation. In 55th IEEE\/ACM International Symposium on Microarchitecture, MICRO 2022, Chicago, IL, USA, October 1--5, 2022. IEEE, 616--630."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358286"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2021.3066572"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/LSSC.2020.3041497"},{"key":"e_1_3_2_1_26_1","volume-title":"Large Scale Learning of General Visual Representations for Transfer. CoRR abs\/1912.11370","author":"Kolesnikov Alexander","year":"2019","unstructured":"Alexander Kolesnikov , Lucas Beyer , Xiaohua Zhai , Joan Puigcerver , Jessica Yung , Sylvain Gelly , and Neil Houlsby . 2019. Large Scale Learning of General Visual Representations for Transfer. CoRR abs\/1912.11370 ( 2019 ). arXiv:1912.11370 Alexander Kolesnikov, Lucas Beyer, Xiaohua Zhai, Joan Puigcerver, Jessica Yung, Sylvain Gelly, and Neil Houlsby. 2019. Large Scale Learning of General Visual Representations for Transfer. CoRR abs\/1912.11370 (2019). arXiv:1912.11370"},{"key":"e_1_3_2_1_27_1","volume-title":"ALBERT: A Lite BERT for Self-supervised Learning of Language Representations. In 8th International Conference on Learning Representations, ICLR 2020","author":"Lan Zhenzhong","year":"2020","unstructured":"Zhenzhong Lan , Mingda Chen , Sebastian Goodman , Kevin Gimpel , Piyush Sharma , and Radu Soricut . 2020 . ALBERT: A Lite BERT for Self-supervised Learning of Language Representations. In 8th International Conference on Learning Representations, ICLR 2020 , Addis Ababa, Ethiopia, April 26--30 , 2020. OpenReview.net. Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, and Radu Soricut. 2020. ALBERT: A Lite BERT for Self-supervised Learning of Language Representations. In 8th International Conference on Learning Representations, ICLR 2020, Addis Ababa, Ethiopia, April 26--30, 2020. OpenReview.net."},{"key":"e_1_3_2_1_28_1","volume-title":"ISCA '22: The 49th Annual International Symposium on Computer Architecture","author":"Lew Jonathan S.","year":"2022","unstructured":"Jonathan S. Lew , Yunpeng Liu , Wenyi Gong , Negar Goli , R. David Evans , and Tor M. Aamodt . 2022. Anticipating and eliminating redundant computations in accelerated sparse training . In ISCA '22: The 49th Annual International Symposium on Computer Architecture , New York, New York, USA, June 18 -- 22 , 2022 , Valentina Salapura, Mohamed Zahran, Fred Chong, and Lingjia Tang (Eds.). ACM, 536--551. Jonathan S. Lew, Yunpeng Liu, Wenyi Gong, Negar Goli, R. David Evans, and Tor M. Aamodt. 2022. Anticipating and eliminating redundant computations in accelerated sparse training. In ISCA '22: The 49th Annual International Symposium on Computer Architecture, New York, New York, USA, June 18 -- 22, 2022, Valentina Salapura, Mohamed Zahran, Fred Chong, and Lingjia Tang (Eds.). ACM, 536--551."},{"key":"e_1_3_2_1_29_1","volume-title":"Ristretto: An Atomized Processing Architecture for Sparsity-Condensed Stream Flow in CNN. In 55th IEEE\/ACM International Symposium on Microarchitecture, MICRO 2022","author":"Li Gang","year":"2022","unstructured":"Gang Li , Weixiang Xu , Zhuoran Song , Naifeng Jing , Jian Cheng , and Xiaoyao Liang . 2022 . Ristretto: An Atomized Processing Architecture for Sparsity-Condensed Stream Flow in CNN. In 55th IEEE\/ACM International Symposium on Microarchitecture, MICRO 2022 , Chicago, IL, USA, October 1--5 , 2022. IEEE, 1434--1450. Gang Li, Weixiang Xu, Zhuoran Song, Naifeng Jing, Jian Cheng, and Xiaoyao Liang. 2022. Ristretto: An Atomized Processing Architecture for Sparsity-Condensed Stream Flow in CNN. In 55th IEEE\/ACM International Symposium on Microarchitecture, MICRO 2022, Chicago, IL, USA, October 1--5, 2022. IEEE, 1434--1450."},{"key":"e_1_3_2_1_30_1","volume-title":"BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. In International Conference on Machine Learning, ICML 2022","volume":"12900","author":"Li Junnan","year":"2022","unstructured":"Junnan Li , Dongxu Li , Caiming Xiong , and Steven C. H. Hoi . 2022 . BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. In International Conference on Machine Learning, ICML 2022 , 17--23 July 2022 , Baltimore, Maryland, USA (Proceedings of Machine Learning Research , Vol. 162), Kamalika Chaudhuri, Stefanie Jegelka, Le Song, Csaba Szepesv\u00e1ri, Gang Niu, and Sivan Sabato (Eds.). PMLR, 12888-- 12900 . Junnan Li, Dongxu Li, Caiming Xiong, and Steven C. H. Hoi. 2022. BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. In International Conference on Machine Learning, ICML 2022, 17--23 July 2022, Baltimore, Maryland, USA (Proceedings of Machine Learning Research, Vol. 162), Kamalika Chaudhuri, Stefanie Jegelka, Le Song, Csaba Szepesv\u00e1ri, Gang Niu, and Sivan Sabato (Eds.). PMLR, 12888--12900."},{"key":"e_1_3_2_1_31_1","volume-title":"ESCALATE: Boosting the Efficiency of Sparse CNN Accelerator with Kernel Decomposition. In MICRO '21: 54th Annual IEEE\/ACM International Symposium on Microarchitecture","author":"Li Shiyu","year":"2021","unstructured":"Shiyu Li , Edward Hanson , Xuehai Qian , Hai (Helen) Li , and Yiran Chen . 2021 . ESCALATE: Boosting the Efficiency of Sparse CNN Accelerator with Kernel Decomposition. In MICRO '21: 54th Annual IEEE\/ACM International Symposium on Microarchitecture , Virtual Event, Greece, October 18--22 , 2021. ACM, 992--1004. Shiyu Li, Edward Hanson, Xuehai Qian, Hai (Helen) Li, and Yiran Chen. 2021. ESCALATE: Boosting the Efficiency of Sparse CNN Accelerator with Kernel Decomposition. In MICRO '21: 54th Annual IEEE\/ACM International Symposium on Microarchitecture, Virtual Event, Greece, October 18--22, 2021. ACM, 992--1004."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527423"},{"key":"e_1_3_2_1_33_1","volume-title":"RoBERTa: A Robustly Optimized BERT Pretraining Approach. CoRR abs\/1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu , Myle Ott , Naman Goyal , Jingfei Du , Mandar Joshi , Danqi Chen , Omer Levy , Mike Lewis , Luke Zettlemoyer , and Veselin Stoyanov . 2019. RoBERTa: A Robustly Optimized BERT Pretraining Approach. CoRR abs\/1907.11692 ( 2019 ). arXiv:1907.11692 Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. RoBERTa: A Robustly Optimized BERT Pretraining Approach. CoRR abs\/1907.11692 (2019). arXiv:1907.11692"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01170"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA53966.2022.00049"},{"key":"e_1_3_2_1_37_1","volume-title":"Sanger: A Co-Design Framework for Enabling Sparse Attention Using Reconfigurable Architecture. In MICRO-54: 54th Annual IEEE\/ACM International Symposium on Microarchitecture","author":"Lu Liqiang","year":"2021","unstructured":"Liqiang Lu , Yicheng Jin , Hangrui Bi , Zizhang Luo , Peng Li , Tao Wang , and Yun Liang . 2021 . Sanger: A Co-Design Framework for Enabling Sparse Attention Using Reconfigurable Architecture. In MICRO-54: 54th Annual IEEE\/ACM International Symposium on Microarchitecture ( Virtual Event, Greece) (MICRO '21). Association for Computing Machinery, New York, NY, USA, 977--991. Liqiang Lu, Yicheng Jin, Hangrui Bi, Zizhang Luo, Peng Li, Tao Wang, and Yun Liang. 2021. Sanger: A Co-Design Framework for Enabling Sparse Attention Using Reconfigurable Architecture. In MICRO-54: 54th Annual IEEE\/ACM International Symposium on Microarchitecture (Virtual Event, Greece) (MICRO '21). Association for Computing Machinery, New York, NY, USA, 977--991."},{"key":"e_1_3_2_1_38_1","volume-title":"53rd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2020","author":"Mahmoud Mostafa","year":"2020","unstructured":"Mostafa Mahmoud , Isak Edo , Ali Hadi Zadeh , Omar Mohamed Awad , Gennady Pekhimenko , Jorge Albericio , and Andreas Moshovos . 2020 . TensorDash: Exploiting Sparsity to Accelerate Deep Neural Network Training . In 53rd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2020 , Athens, Greece, October 17--21 , 2020. IEEE, 781--795. Mostafa Mahmoud, Isak Edo, Ali Hadi Zadeh, Omar Mohamed Awad, Gennady Pekhimenko, Jorge Albericio, and Andreas Moshovos. 2020. TensorDash: Exploiting Sparsity to Accelerate Deep Neural Network Training. In 53rd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2020, Athens, Greece, October 17--21, 2020. IEEE, 781--795."},{"key":"e_1_3_2_1_39_1","volume-title":"Pointer Sentinel Mixture Models. In 5th International Conference on Learning Representations, ICLR 2017, Toulon, France, April 24--26, 2017, Conference Track Proceedings. OpenReview.net.","author":"Merity Stephen","year":"2017","unstructured":"Stephen Merity , Caiming Xiong , James Bradbury , and Richard Socher . 2017 . Pointer Sentinel Mixture Models. In 5th International Conference on Learning Representations, ICLR 2017, Toulon, France, April 24--26, 2017, Conference Track Proceedings. OpenReview.net. Stephen Merity, Caiming Xiong, James Bradbury, and Richard Socher. 2017. Pointer Sentinel Mixture Models. In 5th International Conference on Learning Representations, ICLR 2017, Toulon, France, April 24--26, 2017, Conference Track Proceedings. OpenReview.net."},{"key":"e_1_3_2_1_40_1","unstructured":"Adam Paszke Sam Gross Soumith Chintala Gregory Chanan Edward Yang Zachary DeVito Zeming Lin Alban Desmaison Luca Antiga and Adam Lerer. 2017. Automatic differentiation in PyTorch. (2017).  Adam Paszke Sam Gross Soumith Chintala Gregory Chanan Edward Yang Zachary DeVito Zeming Lin Alban Desmaison Luca Antiga and Adam Lerer. 2017. Automatic differentiation in PyTorch. (2017)."},{"key":"e_1_3_2_1_41_1","volume-title":"IEEE International Symposium on High-Performance Computer Architecture, HPCA 2021","author":"Pavon Julian","year":"2021","unstructured":"Julian Pavon , Iv\u00e1n Vargas Valdivieso , Adri\u00e1n Barredo , Joan Marimon , Miquel Moret\u00f3 , Francesc Moll , Osman S. Unsal , Mateo Valero , and Adri\u00e1n Cristal . 2021 . VIA: A Smart Scratchpad for Vector Units with Application to Sparse Matrix Computations . In IEEE International Symposium on High-Performance Computer Architecture, HPCA 2021 , Seoul, South Korea, February 27 - March 3, 2021. IEEE, 921--934. Julian Pavon, Iv\u00e1n Vargas Valdivieso, Adri\u00e1n Barredo, Joan Marimon, Miquel Moret\u00f3, Francesc Moll, Osman S. Unsal, Mateo Valero, and Adri\u00e1n Cristal. 2021. VIA: A Smart Scratchpad for Vector Units with Application to Sparse Matrix Computations. In IEEE International Symposium on High-Performance Computer Architecture, HPCA 2021, Seoul, South Korea, February 27 - March 3, 2021. IEEE, 921--934."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507738"},{"key":"e_1_3_2_1_43_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18--24","volume":"8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford , Jong Wook Kim , Chris Hallacy , Aditya Ramesh , Gabriel Goh , Sandhini Agarwal , Girish Sastry , Amanda Askell , Pamela Mishkin , Jack Clark , Gretchen Krueger , and Ilya Sutskever . 2021 . Learning Transferable Visual Models From Natural Language Supervision . In Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18--24 July 2021, Virtual Event (Proceedings of Machine Learning Research , Vol. 139), Marina Meila and Tong Zhang (Eds.). PMLR, 8748-- 8763 . Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. In Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18--24 July 2021, Virtual Event (Proceedings of Machine Learning Research, Vol. 139), Marina Meila and Tong Zhang (Eds.). PMLR, 8748--8763."},{"key":"e_1_3_2_1_44_1","unstructured":"Alec Radford Karthik Narasimhan Tim Salimans and Ilya Sutskever. 2019. Improving Language Understanding by Generative Pre-Training. (2019).  Alec Radford Karthik Narasimhan Tim Salimans and Ilya Sutskever. 2019. Improving Language Understanding by Generative Pre-Training. (2019)."},{"key":"e_1_3_2_1_45_1","unstructured":"Alec Radford Jeff Wu Rewon Child David Luan Dario Amodei and Ilya Sutskever. 2019. Language Models are Unsupervised Multitask Learners. (2019).  Alec Radford Jeff Wu Rewon Child David Luan Dario Amodei and Ilya Sutskever. 2019. Language Models are Unsupervised Multitask Learners. (2019)."},{"key":"e_1_3_2_1_46_1","first-page":"1","article-title":"Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer","volume":"21","author":"Raffel Colin","year":"2020","unstructured":"Colin Raffel , Noam Shazeer , Adam Roberts , Katherine Lee , Sharan Narang , Michael Matena , Yanqi Zhou , Wei Li , and Peter J. Liu . 2020 . Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer . Journal of Machine Learning Research 21 , 140 (2020), 1 -- 67 . Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, and Peter J. Liu. 2020. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. Journal of Machine Learning Research 21, 140 (2020), 1--67.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1264"},{"key":"e_1_3_2_1_48_1","volume-title":"High-Resolution Image Synthesis with Latent Diffusion Models. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2022","author":"Rombach Robin","year":"2022","unstructured":"Robin Rombach , Andreas Blattmann , Dominik Lorenz , Patrick Esser , and Bj\u00f6rn Ommer . 2022 . High-Resolution Image Synthesis with Latent Diffusion Models. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2022 , New Orleans, LA, USA, June 18--24 , 2022. IEEE, 10674--10685. Robin Rombach, Andreas Blattmann, Dominik Lorenz, Patrick Esser, and Bj\u00f6rn Ommer. 2022. High-Resolution Image Synthesis with Latent Diffusion Models. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2022, New Orleans, LA, USA, June 18--24, 2022. IEEE, 10674--10685."},{"key":"e_1_3_2_1_49_1","volume-title":"Capstan: A Vector RDA for Sparsity. In MICRO '21: 54th Annual IEEE\/ACM International Symposium on Microarchitecture","author":"Rucker Alexander","year":"2021","unstructured":"Alexander Rucker , Matthew Vilim , Tian Zhao , Yaqi Zhang , Raghu Prabhakar , and Kunle Olukotun . 2021 . Capstan: A Vector RDA for Sparsity. In MICRO '21: 54th Annual IEEE\/ACM International Symposium on Microarchitecture , Virtual Event, Greece, October 18--22 , 2021. ACM, 1022--1035. Alexander Rucker, Matthew Vilim, Tian Zhao, Yaqi Zhang, Raghu Prabhakar, and Kunle Olukotun. 2021. Capstan: A Vector RDA for Sparsity. In MICRO '21: 54th Annual IEEE\/ACM International Symposium on Microarchitecture, Virtual Event, Greece, October 18--22, 2021. ACM, 1022--1035."},{"key":"e_1_3_2_1_50_1","volume-title":"Proceedings of the 52nd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2019","author":"Sadi Fazle","year":"2019","unstructured":"Fazle Sadi , Joe Sweeney , Tze Meng Low , James C. Hoe , Larry T. Pileggi , and Franz Franchetti . 2019 . Efficient SpMV Operation for Large and Highly Sparse Matrices using Scalable Multi-way Merge Parallelization . In Proceedings of the 52nd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2019 , Columbus, OH, USA, October 12--16 , 2019. ACM, 347--358. Fazle Sadi, Joe Sweeney, Tze Meng Low, James C. Hoe, Larry T. Pileggi, and Franz Franchetti. 2019. Efficient SpMV Operation for Large and Highly Sparse Matrices using Scalable Multi-way Merge Parallelization. In Proceedings of the 52nd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2019, Columbus, OH, USA, October 12--16, 2019. ACM, 347--358."},{"key":"e_1_3_2_1_51_1","volume-title":"a distilled version of BERT: smaller, faster, cheaper and lighter. CoRR abs\/1910.01108","author":"Sanh Victor","year":"2019","unstructured":"Victor Sanh , Lysandre Debut , Julien Chaumond , and Thomas Wolf . 2019. DistilBERT , a distilled version of BERT: smaller, faster, cheaper and lighter. CoRR abs\/1910.01108 ( 2019 ). arXiv:1910.01108 Victor Sanh, Lysandre Debut, Julien Chaumond, and Thomas Wolf. 2019. DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter. CoRR abs\/1910.01108 (2019). arXiv:1910.01108"},{"key":"e_1_3_2_1_52_1","volume-title":"Griffin: Rethinking Sparse Optimization for Deep Learning Architectures. In IEEE International Symposium on High-Performance Computer Architecture, HPCA 2022","author":"Shin Jong Hoon","year":"2022","unstructured":"Jong Hoon Shin , Ali Shafiee , Ardavan Pedram , Hamzah Abdel-Aziz , Ling Li , and Joseph Hassoun . 2022 . Griffin: Rethinking Sparse Optimization for Deep Learning Architectures. In IEEE International Symposium on High-Performance Computer Architecture, HPCA 2022 , Seoul, South Korea, April 2--6 , 2022. IEEE, 861--875. Jong Hoon Shin, Ali Shafiee, Ardavan Pedram, Hamzah Abdel-Aziz, Ling Li, and Joseph Hassoun. 2022. Griffin: Rethinking Sparse Optimization for Deep Learning Architectures. In IEEE International Symposium on High-Performance Computer Architecture, HPCA 2022, Seoul, South Korea, April 2--6, 2022. IEEE, 861--875."},{"key":"e_1_3_2_1_53_1","volume-title":"Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism. CoRR abs\/1909.08053","author":"Shoeybi Mohammad","year":"2019","unstructured":"Mohammad Shoeybi , Mostofa Patwary , Raul Puri , Patrick LeGresley , Jared Casper , and Bryan Catanzaro . 2019. Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism. CoRR abs\/1909.08053 ( 2019 ). arXiv:1909.08053 Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley, Jared Casper, and Bryan Catanzaro. 2019. Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism. CoRR abs\/1909.08053 (2019). arXiv:1909.08053"},{"key":"e_1_3_2_1_54_1","volume-title":"Julie Bernauer, Xia Song, Mohammad Shoeybi, Yuxiong He, Michael Houston, Saurabh Tiwary, and Bryan Catanzaro.","author":"Smith Shaden","year":"2022","unstructured":"Shaden Smith , Mostofa Patwary , Brandon Norick , Patrick LeGresley , Samyam Rajbhandari , Jared Casper , Zhun Liu , Shrimai Prabhumoye , George Zerveas , Vijay Korthikanti , Elton Zheng , Rewon Child , Reza Yazdani Aminabadi , Julie Bernauer, Xia Song, Mohammad Shoeybi, Yuxiong He, Michael Houston, Saurabh Tiwary, and Bryan Catanzaro. 2022 . Using DeepSpeed and Megatron to Train Megatron-Turing NLG 530B, A Large-Scale Generative Language Model. CoRR abs\/2201.11990 (2022). arXiv:2201.11990 Shaden Smith, Mostofa Patwary, Brandon Norick, Patrick LeGresley, Samyam Rajbhandari, Jared Casper, Zhun Liu, Shrimai Prabhumoye, George Zerveas, Vijay Korthikanti, Elton Zheng, Rewon Child, Reza Yazdani Aminabadi, Julie Bernauer, Xia Song, Mohammad Shoeybi, Yuxiong He, Michael Houston, Saurabh Tiwary, and Bryan Catanzaro. 2022. Using DeepSpeed and Megatron to Train Megatron-Turing NLG 530B, A Large-Scale Generative Language Model. CoRR abs\/2201.11990 (2022). arXiv:2201.11990"},{"key":"e_1_3_2_1_55_1","volume-title":"DRQ: Dynamic Region-based Quantization for Deep Neural Network Acceleration. In 47th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2020","author":"Song Zhuoran","year":"2020","unstructured":"Zhuoran Song , Bangqi Fu , Feiyang Wu , Zhaoming Jiang , Li Jiang , Naifeng Jing , and Xiaoyao Liang . 2020 . DRQ: Dynamic Region-based Quantization for Deep Neural Network Acceleration. In 47th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2020 , Valencia, Spain, May 30 - June 3, 2020. IEEE, 1010--1021. Zhuoran Song, Bangqi Fu, Feiyang Wu, Zhaoming Jiang, Li Jiang, Naifeng Jing, and Xiaoyao Liang. 2020. DRQ: Dynamic Region-based Quantization for Deep Neural Network Acceleration. In 47th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2020, Valencia, Spain, May 30 - June 3, 2020. IEEE, 1010--1021."},{"key":"e_1_3_2_1_56_1","volume-title":"MatRaptor: A Sparse-Sparse Matrix Multiplication Accelerator Based on Row-Wise Product. In 53rd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2020","author":"Srivastava Nitish Kumar","year":"2020","unstructured":"Nitish Kumar Srivastava , Hanchen Jin , Jie Liu , David H. Albonesi , and Zhiru Zhang . 2020 . MatRaptor: A Sparse-Sparse Matrix Multiplication Accelerator Based on Row-Wise Product. In 53rd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2020 , Athens, Greece, October 17--21 , 2020. IEEE, 766--780. Nitish Kumar Srivastava, Hanchen Jin, Jie Liu, David H. Albonesi, and Zhiru Zhang. 2020. MatRaptor: A Sparse-Sparse Matrix Multiplication Accelerator Based on Row-Wise Product. In 53rd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2020, Athens, Greece, October 17--21, 2020. IEEE, 766--780."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3357895"},{"key":"e_1_3_2_1_58_1","volume-title":"EdgeBERT: Sentence-Level Energy Optimizations for Latency-Aware Multi-Task NLP Inference. In MICRO '21: 54th Annual IEEE\/ACM International Symposium on Microarchitecture","author":"Tambe Thierry","year":"2021","unstructured":"Thierry Tambe , Coleman Hooper , Lillian Pentecost , Tianyu Jia , En-Yu Yang , Marco Donato , Victor Sanh , Paul N. Whatmough , Alexander M. Rush , David Brooks , and Gu-Yeon Wei . 2021 . EdgeBERT: Sentence-Level Energy Optimizations for Latency-Aware Multi-Task NLP Inference. In MICRO '21: 54th Annual IEEE\/ACM International Symposium on Microarchitecture , Virtual Event, Greece, October 18--22 , 2021. ACM, 830--844. Thierry Tambe, Coleman Hooper, Lillian Pentecost, Tianyu Jia, En-Yu Yang, Marco Donato, Victor Sanh, Paul N. Whatmough, Alexander M. Rush, David Brooks, and Gu-Yeon Wei. 2021. EdgeBERT: Sentence-Level Energy Optimizations for Latency-Aware Multi-Task NLP Inference. In MICRO '21: 54th Annual IEEE\/ACM International Symposium on Microarchitecture, Virtual Event, Greece, October 18--22, 2021. ACM, 830--844."},{"key":"e_1_3_2_1_59_1","volume-title":"9th International Conference on Learning Representations, ICLR 2021","author":"Tay Yi","year":"2021","unstructured":"Yi Tay , Mostafa Dehghani , Samira Abnar , Yikang Shen , Dara Bahri , Philip Pham , Jinfeng Rao , Liu Yang , Sebastian Ruder , and Donald Metzler . 2021 . Long Range Arena : A Benchmark for Efficient Transformers . In 9th International Conference on Learning Representations, ICLR 2021 , Virtual Event, Austria, May 3--7 , 2021. OpenReview.net. Yi Tay, Mostafa Dehghani, Samira Abnar, Yikang Shen, Dara Bahri, Philip Pham, Jinfeng Rao, Liu Yang, Sebastian Ruder, and Donald Metzler. 2021. Long Range Arena : A Benchmark for Efficient Transformers. In 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3--7, 2021. OpenReview.net."},{"key":"e_1_3_2_1_60_1","volume-title":"Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani , Noam Shazeer , Niki Parmar , Jakob Uszkoreit , Llion Jones , Aidan N. Gomez , Lukasz Kaiser , and Illia Polosukhin . 2017 . Attention is All you Need . In Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017 , December 4 --9 , 2017, Long Beach, CA, USA, Isabelle Guyon, Ulrike von Luxburg, Samy Bengio, Hanna M. Wallach, Rob Fergus, S. V. N. Vishwanathan, and Roman Garnett (Eds.). 5998--6008. Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, December 4--9, 2017, Long Beach, CA, USA, Isabelle Guyon, Ulrike von Luxburg, Samy Bengio, Hanna M. Wallach, Rob Fergus, S. V. N. Vishwanathan, and Roman Garnett (Eds.). 5998--6008."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2021.3131609"},{"key":"e_1_3_2_1_62_1","volume-title":"GLUE: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding. In 7th International Conference on Learning Representations, ICLR 2019","author":"Wang Alex","year":"2019","unstructured":"Alex Wang , Amanpreet Singh , Julian Michael , Felix Hill , Omer Levy , and Samuel R. Bowman . 2019 . GLUE: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding. In 7th International Conference on Learning Representations, ICLR 2019 , New Orleans, LA, USA, May 6--9 , 2019 . OpenReview.net. Alex Wang, Amanpreet Singh, Julian Michael, Felix Hill, Omer Levy, and Samuel R. Bowman. 2019. GLUE: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding. In 7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, May 6--9, 2019. OpenReview.net."},{"key":"e_1_3_2_1_63_1","volume-title":"SpAtten: Efficient Sparse Attention Architecture with Cascade Token and Head Pruning. In IEEE International Symposium on High-Performance Computer Architecture, HPCA 2021","author":"Wang Hanrui","year":"2021","unstructured":"Hanrui Wang , Zhekai Zhang , and Song Han . 2021 . SpAtten: Efficient Sparse Attention Architecture with Cascade Token and Head Pruning. In IEEE International Symposium on High-Performance Computer Architecture, HPCA 2021 , Seoul, South Korea, February 27 - March 3, 2021. IEEE, 97--110. Hanrui Wang, Zhekai Zhang, and Song Han. 2021. SpAtten: Efficient Sparse Attention Architecture with Cascade Token and Head Pruning. In IEEE International Symposium on High-Performance Computer Architecture, HPCA 2021, Seoul, South Korea, February 27 - March 3, 2021. IEEE, 97--110."},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2022.3213521"},{"key":"e_1_3_2_1_65_1","volume-title":"Sylvain Gugger, Mariama Drame, Quentin Lhoest, and Alexander M. Rush.","author":"Wolf Thomas","year":"2020","unstructured":"Thomas Wolf , Lysandre Debut , Victor Sanh , Julien Chaumond , Clement Delangue , Anthony Moi , Pierric Cistac , Tim Rault , R\u00e9mi Louf , Morgan Funtowicz , Joe Davison , Sam Shleifer , Patrick von Platen , Clara Ma , Yacine Jernite , Julien Plu , Canwen Xu , Teven Le Scao , Sylvain Gugger, Mariama Drame, Quentin Lhoest, and Alexander M. Rush. 2020 . Transformers : State-of-the-Art Natural Language Processing. In Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations. Association for Computational Linguistics , Online, 38--45. Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Clement Delangue, Anthony Moi, Pierric Cistac, Tim Rault, R\u00e9mi Louf, Morgan Funtowicz, Joe Davison, Sam Shleifer, Patrick von Platen, Clara Ma, Yacine Jernite, Julien Plu, Canwen Xu, Teven Le Scao, Sylvain Gugger, Mariama Drame, Quentin Lhoest, and Alexander M. Rush. 2020. Transformers: State-of-the-Art Natural Language Processing. In Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations. Association for Computational Linguistics, Online, 38--45."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00064"},{"key":"e_1_3_2_1_67_1","volume-title":"Sparse Attention Acceleration with Synergistic In-Memory Pruning and On-Chip Recomputation. In 55th IEEE\/ACM International Symposium on Microarchitecture, MICRO 2022","author":"Yazdanbakhsh Amir","year":"2022","unstructured":"Amir Yazdanbakhsh , Ashkan Moradifirouzabadi , Zheng Li , and Mingu Kang . 2022 . Sparse Attention Acceleration with Synergistic In-Memory Pruning and On-Chip Recomputation. In 55th IEEE\/ACM International Symposium on Microarchitecture, MICRO 2022 , Chicago, IL, USA, October 1--5 , 2022. IEEE, 744--762. Amir Yazdanbakhsh, Ashkan Moradifirouzabadi, Zheng Li, and Mingu Kang. 2022. Sparse Attention Acceleration with Synergistic In-Memory Pruning and On-Chip Recomputation. In 55th IEEE\/ACM International Symposium on Microarchitecture, MICRO 2022, Chicago, IL, USA, October 1--5, 2022. IEEE, 744--762."},{"key":"e_1_3_2_1_68_1","volume-title":"GOBO: Quantizing Attention-Based NLP Models for Low Latency and Energy Efficient Inference. In 53rd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2020","author":"Zadeh Ali Hadi","year":"2020","unstructured":"Ali Hadi Zadeh , Isak Edo , Omar Mohamed Awad , and Andreas Moshovos . 2020 . GOBO: Quantizing Attention-Based NLP Models for Low Latency and Energy Efficient Inference. In 53rd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2020 , Athens, Greece, October 17--21 , 2020. IEEE, 811--824. Ali Hadi Zadeh, Isak Edo, Omar Mohamed Awad, and Andreas Moshovos. 2020. GOBO: Quantizing Attention-Based NLP Models for Low Latency and Energy Efficient Inference. In 53rd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2020, Athens, Greece, October 17--21, 2020. IEEE, 811--824."},{"key":"e_1_3_2_1_69_1","volume-title":"ISCA '22: The 49th Annual International Symposium on Computer Architecture","author":"Zadeh Ali Hadi","year":"2022","unstructured":"Ali Hadi Zadeh , Mostafa Mahmoud , Ameer Abdelhadi , and Andreas Moshovos . 2022 . Mokey: enabling narrow fixed-point inference for out-of-the-box floating-point transformer models . In ISCA '22: The 49th Annual International Symposium on Computer Architecture , New York, New York, USA, June 18 -- 22 , 2022, Valentina Salapura, Mohamed Zahran, Fred Chong, and Lingjia Tang (Eds.). ACM, 888--901. Ali Hadi Zadeh, Mostafa Mahmoud, Ameer Abdelhadi, and Andreas Moshovos. 2022. Mokey: enabling narrow fixed-point inference for out-of-the-box floating-point transformer models. In ISCA '22: The 49th Annual International Symposium on Computer Architecture, New York, New York, USA, June 18 -- 22, 2022, Valentina Salapura, Mohamed Zahran, Fred Chong, and Lingjia Tang (Eds.). ACM, 888--901."},{"key":"e_1_3_2_1_70_1","volume-title":"Big Bird: Transformers for Longer Sequences. In Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020","author":"Zaheer Manzil","year":"2020","unstructured":"Manzil Zaheer , Guru Guruganesh , Kumar Avinava Dubey , Joshua Ainslie , Chris Alberti , Santiago Onta\u00f1\u00f3n , Philip Pham , Anirudh Ravula , Qifan Wang , Li Yang , and Amr Ahmed . 2020 . Big Bird: Transformers for Longer Sequences. In Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020 , NeurIPS 2020, December 6--12, 2020, virtual, Hugo Larochelle, Marc'Aurelio Ranzato, Raia Hadsell, Maria-Florina Balcan, and Hsuan-Tien Lin (Eds.). Manzil Zaheer, Guru Guruganesh, Kumar Avinava Dubey, Joshua Ainslie, Chris Alberti, Santiago Onta\u00f1\u00f3n, Philip Pham, Anirudh Ravula, Qifan Wang, Li Yang, and Amr Ahmed. 2020. Big Bird: Transformers for Longer Sequences. In Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6--12, 2020, virtual, Hugo Larochelle, Marc'Aurelio Ranzato, Raia Hadsell, Maria-Florina Balcan, and Hsuan-Tien Lin (Eds.)."},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01179"},{"key":"e_1_3_2_1_72_1","volume-title":"Cambricon-Q: A Hybrid Architecture for Efficient Training. In 48th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2021","author":"Zhao Yongwei","year":"2021","unstructured":"Yongwei Zhao , Chang Liu , Zidong Du , Qi Guo , Xing Hu , Yimin Zhuang , Zhenxing Zhang , Xinkai Song , Wei Li , Xishan Zhang , Ling Li , Zhiwei Xu , and Tianshi Chen . 2021 . Cambricon-Q: A Hybrid Architecture for Efficient Training. In 48th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2021 , Valencia, Spain, June 14--18 , 2021. IEEE, 706--719. Yongwei Zhao, Chang Liu, Zidong Du, Qi Guo, Xing Hu, Yimin Zhuang, Zhenxing Zhang, Xinkai Song, Wei Li, Xishan Zhang, Ling Li, Zhiwei Xu, and Tianshi Chen. 2021. Cambricon-Q: A Hybrid Architecture for Efficient Training. In 48th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2021, Valencia, Spain, June 14--18, 2021. IEEE, 706--719."},{"key":"e_1_3_2_1_73_1","volume-title":"Deformable DETR: Deformable Transformers for End-to-End Object Detection. In 9th International Conference on Learning Representations, ICLR 2021","author":"Zhu Xizhou","year":"2021","unstructured":"Xizhou Zhu , Weijie Su , Lewei Lu , Bin Li , Xiaogang Wang , and Jifeng Dai . 2021 . Deformable DETR: Deformable Transformers for End-to-End Object Detection. In 9th International Conference on Learning Representations, ICLR 2021 , Virtual Event, Austria, May 3--7 , 2021. OpenReview.net. Xizhou Zhu, Weijie Su, Lewei Lu, Bin Li, Xiaogang Wang, and Jifeng Dai. 2021. Deformable DETR: Deformable Transformers for End-to-End Object Detection. In 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3--7, 2021. OpenReview.net."}],"event":{"name":"ISCA '23: 50th Annual International Symposium on Computer Architecture","location":"Orlando FL USA","acronym":"ISCA '23","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","IEEE"]},"container-title":["Proceedings of the 50th Annual International Symposium on Computer Architecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3579371.3589057","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:46:38Z","timestamp":1750178798000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3579371.3589057"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,17]]},"references-count":73,"alternative-id":["10.1145\/3579371.3589057","10.1145\/3579371"],"URL":"https:\/\/doi.org\/10.1145\/3579371.3589057","relation":{},"subject":[],"published":{"date-parts":[[2023,6,17]]},"assertion":[{"value":"2023-06-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}