{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T14:33:22Z","timestamp":1777127602375,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":113,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,6,11]],"date-time":"2022-06-11T00:00:00Z","timestamp":1654905600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF#2107598"],"award-info":[{"award-number":["CCF#2107598"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Science Foundation","award":["CNS#1822273"],"award-info":[{"award-number":["CNS#1822273"]}]},{"name":"Semiconductor Research Corporation (SRC)","award":["#2021-AH-3039"],"award-info":[{"award-number":["#2021-AH-3039"]}]},{"name":"Defense Advanced Research Project Agency (DARPA)","award":["#HR0011-18-C-0020"],"award-info":[{"award-number":["#HR0011-18-C-0020"]}]},{"name":"National Institute of Health","award":["#R01EB028350"],"award-info":[{"award-number":["#R01EB028350"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,6,18]]},"DOI":"10.1145\/3470496.3527423","type":"proceedings-article","created":{"date-parts":[[2022,5,31]],"date-time":"2022-05-31T19:06:01Z","timestamp":1654023961000},"page":"902-915","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":41,"title":["Accelerating attention through gradient-based learned runtime pruning"],"prefix":"10.1145","author":[{"given":"Zheng","family":"Li","sequence":"first","affiliation":[{"name":"University of California"}]},{"given":"Soroush","family":"Ghodrati","sequence":"additional","affiliation":[{"name":"University of California"}]},{"given":"Amir","family":"Yazdanbakhsh","sequence":"additional","affiliation":[{"name":"Google Research"}]},{"given":"Hadi","family":"Esmaeilzadeh","sequence":"additional","affiliation":[{"name":"University of California"}]},{"given":"Mingu","family":"Kang","sequence":"additional","affiliation":[{"name":"University of California"}]}],"member":"320","published-online":{"date-parts":[[2022,6,11]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2021. AI Winter. https:\/\/en.wikipedia.org\/wiki\/AI_winter. Accessed: 2021-11-08.  2021. AI Winter. https:\/\/en.wikipedia.org\/wiki\/AI_winter. Accessed: 2021-11-08."},{"key":"e_1_3_2_1_2_1","unstructured":"2021. The WikiText Long Term Dependency Language Modeling Dataset. https:\/\/blog.salesforceairesearch.com\/the-wikitext-long-term-dependency-language-modeling-dataset\/. Accessed: 2021-11-08.  2021. The WikiText Long Term Dependency Language Modeling Dataset. https:\/\/blog.salesforceairesearch.com\/the-wikitext-long-term-dependency-language-modeling-dataset\/. Accessed: 2021-11-08."},{"key":"e_1_3_2_1_3_1","unstructured":"2021. Turing Test. https:\/\/en.wikipedia.org\/wiki\/Turing_test. Accessed: 2021-11-08.  2021. Turing Test. https:\/\/en.wikipedia.org\/wiki\/Turing_test. Accessed: 2021-11-08."},{"key":"e_1_3_2_1_4_1","volume-title":"Gupta","author":"Aklaghi Vahide","year":"2018","unstructured":"Vahide Aklaghi , Amir Yazdanbakhsh , Kambiz Samadi , Hadi Esmaeilzadeh , and Rajesh K . Gupta . 2018 . SnaPEA: Predictive Early Activation for Reducing Computation in Deep Convolutional Neural Networks. In ISCA. Vahide Aklaghi, Amir Yazdanbakhsh, Kambiz Samadi, Hadi Esmaeilzadeh, and Rajesh K. Gupta. 2018. SnaPEA: Predictive Early Activation for Reducing Computation in Deep Convolutional Neural Networks. In ISCA."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Jorge Albericio Alberto Delm\u00e1s Patrick Judd Sayeh Sharify Gerard O'Leary Roman Genov and Andreas Moshovos. 2017. Bit-Pragmatic Deep Neural Network Computing. In MICRO.  Jorge Albericio Alberto Delm\u00e1s Patrick Judd Sayeh Sharify Gerard O'Leary Roman Genov and Andreas Moshovos. 2017. Bit-Pragmatic Deep Neural Network Computing. In MICRO.","DOI":"10.1145\/3123939.3123982"},{"key":"e_1_3_2_1_6_1","volume-title":"Natalie Enright Jerger, and Andreas Moshovos","author":"Albericio Jorge","year":"2016","unstructured":"Jorge Albericio , Patrick Judd , Tayler Hetherington , Tor Aamodt , Natalie Enright Jerger, and Andreas Moshovos . 2016 . Cnvlutin : Ineffectual-Neuron-Free Deep Neural Network Computing. In ISCA. Jorge Albericio, Patrick Judd, Tayler Hetherington, Tor Aamodt, Natalie Enright Jerger, and Andreas Moshovos. 2016. Cnvlutin: Ineffectual-Neuron-Free Deep Neural Network Computing. In ISCA."},{"key":"e_1_3_2_1_7_1","unstructured":"ARM. 2021. Artisan Memory Compilers. https:\/\/developer.arm.com\/ip-products\/physical-ip\/embedded-memory. Accessed: 2021-11-08.  ARM. 2021. Artisan Memory Compilers. https:\/\/developer.arm.com\/ip-products\/physical-ip\/embedded-memory. Accessed: 2021-11-08."},{"key":"e_1_3_2_1_8_1","unstructured":"Kambiz Azarian Yash Bhalgat Jinwon Lee and Tijmen Blankevoort. 2020. Learned Threshold Pruning. arXiv preprint arXiv:2003.00075.  Kambiz Azarian Yash Bhalgat Jinwon Lee and Tijmen Blankevoort. 2020. Learned Threshold Pruning. arXiv preprint arXiv:2003.00075."},{"key":"e_1_3_2_1_9_1","volume-title":"Longformer: The Long-Document Transformer. arXiv preprint arXiv:2004.05150.","author":"Beltagy Iz","year":"2020","unstructured":"Iz Beltagy , Matthew E Peters , and Arman Cohan . 2020 . Longformer: The Long-Document Transformer. arXiv preprint arXiv:2004.05150. Iz Beltagy, Matthew E Peters, and Arman Cohan. 2020. Longformer: The Long-Document Transformer. arXiv preprint arXiv:2004.05150."},{"key":"e_1_3_2_1_10_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell etal 2020. Language Models are Few-shot Learners. NeurIPS.  Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et al. 2020. Language Models are Few-shot Learners. NeurIPS."},{"key":"e_1_3_2_1_11_1","unstructured":"Cadence. 2021. Genus Synthesis Solution. https:\/\/www.cadence.com\/en_US\/home\/tools\/digital-design-and-signoff\/synthesis\/genus-synthesis-solution.html. Accessed: 2021-11-08.  Cadence. 2021. Genus Synthesis Solution. https:\/\/www.cadence.com\/en_US\/home\/tools\/digital-design-and-signoff\/synthesis\/genus-synthesis-solution.html. Accessed: 2021-11-08."},{"key":"e_1_3_2_1_12_1","unstructured":"Cadence. 2021. Innovus Implementation System. https:\/\/www.cadence.com\/en_US\/home\/tools\/digital-design-and-signoff\/soc-implementation-and-floorplanning\/innovus-implementation-system.html. Accessed: 2021-11-08.  Cadence. 2021. Innovus Implementation System. https:\/\/www.cadence.com\/en_US\/home\/tools\/digital-design-and-signoff\/soc-implementation-and-floorplanning\/innovus-implementation-system.html. Accessed: 2021-11-08."},{"key":"e_1_3_2_1_13_1","unstructured":"Tianlong Chen Yu Cheng Zhe Gan Lu Yuan Lei Zhang and Zhangyang Wang. 2021. Chasing Sparsity in Vision Transformers: An End-to-End Exploration. NeurIPS.  Tianlong Chen Yu Cheng Zhe Gan Lu Yuan Lei Zhang and Zhangyang Wang. 2021. Chasing Sparsity in Vision Transformers: An End-to-End Exploration. NeurIPS."},{"key":"e_1_3_2_1_14_1","unstructured":"Tianlong Chen Yu Cheng Zhe Gan Lu Yuan Lei Zhang and Zhangyang Wang. 2021. Chasing Sparsity in Vision Transformers:An End-to-End Exploration. arXiv:2106.04533  Tianlong Chen Yu Cheng Zhe Gan Lu Yuan Lei Zhang and Zhangyang Wang. 2021. Chasing Sparsity in Vision Transformers:An End-to-End Exploration. arXiv:2106.04533"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Yunji Chen Tao Luo Shaoli Liu Shijin Zhang Liqiang He Jia Wang Ling Li Tianshi Chen Zhiwei Xu Ninghui Sun etal 2014. DaDianNao: A Machine- Learning Supercomputer. In MICRO.  Yunji Chen Tao Luo Shaoli Liu Shijin Zhang Liqiang He Jia Wang Ling Li Tianshi Chen Zhiwei Xu Ninghui Sun et al. 2014. DaDianNao: A Machine- Learning Supercomputer. In MICRO.","DOI":"10.1109\/MICRO.2014.58"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001177"},{"key":"e_1_3_2_1_17_1","volume-title":"Eyeriss v2: A Flexible Accelerator for Emerging Deep Neural Networks on Mobile Devices. JETCAS","author":"Chen Yu-Hsin","year":"2019","unstructured":"Yu-Hsin Chen , Tien-Ju Yang , Joel Emer , and Vivienne Sze . 2019. Eyeriss v2: A Flexible Accelerator for Emerging Deep Neural Networks on Mobile Devices. JETCAS ( 2019 ). Yu-Hsin Chen, Tien-Ju Yang, Joel Emer, and Vivienne Sze. 2019. Eyeriss v2: A Flexible Accelerator for Emerging Deep Neural Networks on Mobile Devices. JETCAS (2019)."},{"key":"e_1_3_2_1_18_1","volume-title":"Prime: A Novel Processing-in-Memory Architecture for Neural Network Computation in ReRAM-based Main Memory. In ISCA.","author":"Chi Ping","year":"2016","unstructured":"Ping Chi , Shuangchen Li , Cong Xu , Tao Zhang , Jishen Zhao , Yongpan Liu , Yu Wang , and Yuan Xie . 2016 . Prime: A Novel Processing-in-Memory Architecture for Neural Network Computation in ReRAM-based Main Memory. In ISCA. Ping Chi, Shuangchen Li, Cong Xu, Tao Zhang, Jishen Zhao, Yongpan Liu, Yu Wang, and Yuan Xie. 2016. Prime: A Novel Processing-in-Memory Architecture for Neural Network Computation in ReRAM-based Main Memory. In ISCA."},{"key":"e_1_3_2_1_19_1","unstructured":"Rewon Child Scott Gray Alec Radford and Ilya Sutskever. 2019. Generating Long Sequences with Sparse Transformers. arXiv preprint arXiv:1904.10509.  Rewon Child Scott Gray Alec Radford and Ilya Sutskever. 2019. Generating Long Sequences with Sparse Transformers. arXiv preprint arXiv:1904.10509."},{"key":"e_1_3_2_1_20_1","unstructured":"Krzysztof Choromanski Valerii Likhosherstov David Dohan Xingyou Song Andreea Gane Tamas Sarlos Peter Hawkins Jared Davis Afroz Mohiuddin Lukasz Kaiser etal 2020. Rethinking Attention with Performers. arXiv preprint arXiv:2009.14794.  Krzysztof Choromanski Valerii Likhosherstov David Dohan Xingyou Song Andreea Gane Tamas Sarlos Peter Hawkins Jared Davis Afroz Mohiuddin Lukasz Kaiser et al. 2020. Rethinking Attention with Performers. arXiv preprint arXiv:2009.14794."},{"key":"e_1_3_2_1_21_1","volume-title":"Adaptively sparse transformers. arXiv preprint arXiv:1909.00015","author":"Correia Gon\u00e7alo M","year":"2019","unstructured":"Gon\u00e7alo M Correia , Vlad Niculae , and Andr\u00e9 FT Martins . 2019. Adaptively sparse transformers. arXiv preprint arXiv:1909.00015 ( 2019 ). Gon\u00e7alo M Correia, Vlad Niculae, and Andr\u00e9 FT Martins. 2019. Adaptively sparse transformers. arXiv preprint arXiv:1909.00015 (2019)."},{"key":"e_1_3_2_1_22_1","unstructured":"Baiyun Cui Yingming Li Ming Chen and Zhongfei Zhang. 2019. Fine-Tune BERT with Sparse Self-Attention Mechanism. In EMNLP-IJCNLP.  Baiyun Cui Yingming Li Ming Chen and Zhongfei Zhang. 2019. Fine-Tune BERT with Sparse Self-Attention Mechanism. In EMNLP-IJCNLP."},{"key":"e_1_3_2_1_23_1","unstructured":"Zihang Dai Zhilin Yang Yiming Yang Jaime Carbonell Quoc V Le and Ruslan Salakhutdinov. 2019. Transformer-XL: Attentive Language Models Beyond a Fixed-length Context. arXiv preprint arXiv:1901.02860.  Zihang Dai Zhilin Yang Yiming Yang Jaime Carbonell Quoc V Le and Ruslan Salakhutdinov. 2019. Transformer-XL: Attentive Language Models Beyond a Fixed-length Context. arXiv preprint arXiv:1901.02860."},{"key":"e_1_3_2_1_24_1","volume-title":"Zissis Poulos, Mostafa Mahmoud, Sayeh Sharify, Milos Nikolic, Kevin Siu, and Andreas Moshovos.","author":"Lascorz Alberto Delmas","year":"2019","unstructured":"Alberto Delmas Lascorz , Patrick Judd , Dylan Malone Stuart , Zissis Poulos, Mostafa Mahmoud, Sayeh Sharify, Milos Nikolic, Kevin Siu, and Andreas Moshovos. 2019 . Bit-Tactical: A Software\/Hardware Approach to Exploiting Value and Bit Sparsity in Neural Networks. In ASPLOS. Alberto Delmas Lascorz, Patrick Judd, Dylan Malone Stuart, Zissis Poulos, Mostafa Mahmoud, Sayeh Sharify, Milos Nikolic, Kevin Siu, and Andreas Moshovos. 2019. Bit-Tactical: A Software\/Hardware Approach to Exploiting Value and Bit Sparsity in Neural Networks. In ASPLOS."},{"key":"e_1_3_2_1_25_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly Jakob Uszkoreit and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In ICLR.  Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly Jakob Uszkoreit and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In ICLR."},{"key":"e_1_3_2_1_26_1","volume-title":"Neural Cache: Bit-Serial In-Cache Acceleration of Deep Neural Networks. In ISCA.","author":"Eckert Charles","year":"2018","unstructured":"Charles Eckert , Xiaowei Wang , Jingcheng Wang , Arun Subramaniyan , Ravi Iyer , Dennis Sylvester , David Blaaauw , and Reetuparna Das . 2018 . Neural Cache: Bit-Serial In-Cache Acceleration of Deep Neural Networks. In ISCA. Charles Eckert, Xiaowei Wang, Jingcheng Wang, Arun Subramaniyan, Ravi Iyer, Dennis Sylvester, David Blaaauw, and Reetuparna Das. 2018. Neural Cache: Bit-Serial In-Cache Acceleration of Deep Neural Networks. In ISCA."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"crossref","unstructured":"Jeremy Fowers Kalin Ovtcharov Michael Papamichael Todd Massengill Ming Liu Daniel Lo Shlomi Alkalay Michael Haselman Logan Adams Mahdi Ghandi etal 2018. A Configurable Cloud-Scale DNN Processor for Real-Time AI. In ISCA.  Jeremy Fowers Kalin Ovtcharov Michael Papamichael Todd Massengill Ming Liu Daniel Lo Shlomi Alkalay Michael Haselman Logan Adams Mahdi Ghandi et al. 2018. A Configurable Cloud-Scale DNN Processor for Real-Time AI. In ISCA.","DOI":"10.1109\/ISCA.2018.00012"},{"key":"e_1_3_2_1_28_1","unstructured":"Trevor Gale Erich Elsen and Sara Hooker. 2019. The State of Sparsity in Deep Neural Networks. arXiv preprint arXiv:1902.09574.  Trevor Gale Erich Elsen and Sara Hooker. 2019. The State of Sparsity in Deep Neural Networks. arXiv preprint arXiv:1902.09574."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037702"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304014"},{"key":"e_1_3_2_1_31_1","volume-title":"Joon Kyung Kim, Sean Kinzer, Brahmendra Reddy Yatham, Navateja Alla, Hardik Sharma, Mohammad Alian, Eiman Ebrahimi, Nam Sung Kim, et al.","author":"Ghodrati Soroush","year":"2020","unstructured":"Soroush Ghodrati , Byung Hoon Ahn , Joon Kyung Kim, Sean Kinzer, Brahmendra Reddy Yatham, Navateja Alla, Hardik Sharma, Mohammad Alian, Eiman Ebrahimi, Nam Sung Kim, et al. 2020 . Planaria : Dynamic Architecture Fission for Spatial Multi-Tenant Acceleration of Deep Neural Networks. In MICRO. Soroush Ghodrati, Byung Hoon Ahn, Joon Kyung Kim, Sean Kinzer, Brahmendra Reddy Yatham, Navateja Alla, Hardik Sharma, Mohammad Alian, Eiman Ebrahimi, Nam Sung Kim, et al. 2020. Planaria: Dynamic Architecture Fission for Spatial Multi-Tenant Acceleration of Deep Neural Networks. In MICRO."},{"key":"e_1_3_2_1_32_1","volume-title":"Doug Burger, and Hadi Esmaeilzadeh.","author":"Ghodrati Soroush","year":"2020","unstructured":"Soroush Ghodrati , Hardik Sharma , Sean Kinzer , Amir Yazdanbakhsh , Jongse Park , Nam Sung Kim , Doug Burger, and Hadi Esmaeilzadeh. 2020 . Mixed-Signal Charge-Domain Acceleration of Deep Neural networks through Interleaved Bit-Partitioned Arithmetic. In PACT. Soroush Ghodrati, Hardik Sharma, Sean Kinzer, Amir Yazdanbakhsh, Jongse Park, Nam Sung Kim, Doug Burger, and Hadi Esmaeilzadeh. 2020. Mixed-Signal Charge-Domain Acceleration of Deep Neural networks through Interleaved Bit-Partitioned Arithmetic. In PACT."},{"key":"e_1_3_2_1_33_1","volume-title":"Nam Sung Kim, and Hadi Esmaeilzadeh","author":"Ghodrati Soroush","year":"2020","unstructured":"Soroush Ghodrati , Hardik Sharma , Cliff Young , Nam Sung Kim, and Hadi Esmaeilzadeh . 2020 . Bit-Parallel Vector Composability for Neural Acceleration. In DAC. Soroush Ghodrati, Hardik Sharma, Cliff Young, Nam Sung Kim, and Hadi Esmaeilzadeh. 2020. Bit-Parallel Vector Composability for Neural Acceleration. In DAC."},{"key":"e_1_3_2_1_34_1","volume-title":"Seonghak Kim, Young H Oh, Yeonhong Park, Yoonho Song, Jung-Hun Park, Sanghee Lee, Kyoung Park, Jae W Lee, et al.","author":"Ham Tae Jun","year":"2020","unstructured":"Tae Jun Ham , Sung Jun Jung , Seonghak Kim, Young H Oh, Yeonhong Park, Yoonho Song, Jung-Hun Park, Sanghee Lee, Kyoung Park, Jae W Lee, et al. 2020 . A\u02c63: Accelerating Attention Mechanisms in Neural Networks with Approximation. In HPCA. Tae Jun Ham, Sung Jun Jung, Seonghak Kim, Young H Oh, Yeonhong Park, Yoonho Song, Jung-Hun Park, Sanghee Lee, Kyoung Park, Jae W Lee, et al. 2020. A\u02c63: Accelerating Attention Mechanisms in Neural Networks with Approximation. In HPCA."},{"key":"e_1_3_2_1_35_1","volume-title":"Soosung Kim, Hyunji Choi, Sung Jun Jung, and Jae W Lee.","author":"Ham Tae Jun","year":"2021","unstructured":"Tae Jun Ham , Yejin Lee , Seong Hoon Seo , Soosung Kim, Hyunji Choi, Sung Jun Jung, and Jae W Lee. 2021 . ELSA : Hardware-Software Co-design for Efficient, Lightweight Self-Attention Mechanism in Neural Networks. In ISCA. Tae Jun Ham, Yejin Lee, Seong Hoon Seo, Soosung Kim, Hyunji Choi, Sung Jun Jung, and Jae W Lee. 2021. ELSA: Hardware-Software Co-design for Efficient, Lightweight Self-Attention Mechanism in Neural Networks. In ISCA."},{"key":"e_1_3_2_1_36_1","volume":"201","author":"Han Song","unstructured":"Song Han , Xingyu Liu , Huizi Mao , Jing Pu , Ardavan Pedram , Mark A Horowitz , and William J Dally. 201 6. EIE: Efficient Inference Engine on Compressed Deep Neural Network. In ISCA. Song Han, Xingyu Liu, Huizi Mao, Jing Pu, Ardavan Pedram, Mark A Horowitz, and William J Dally. 2016. EIE: Efficient Inference Engine on Compressed Deep Neural Network. In ISCA.","journal-title":"William J Dally."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"crossref","unstructured":"Shehzeen Hussain Mojan Javaheripi Paarth Neekhara Ryan Kastner and Fari- naz Koushanfar. 2019. FastWave: Accelerating Autoregressive Convolutional Neural Networks on FPGA. In ICCAD.  Shehzeen Hussain Mojan Javaheripi Paarth Neekhara Ryan Kastner and Fari- naz Koushanfar. 2019. FastWave: Accelerating Autoregressive Convolutional Neural Networks on FPGA. In ICCAD.","DOI":"10.1109\/ICCAD45719.2019.8942122"},{"key":"e_1_3_2_1_38_1","unstructured":"Norman P Jouppi Cliff Young Nishant Patil David Patterson Gaurav Agrawal Raminder Bajwa Sarah Bates Suresh Bhatia Nan Boden Al Borchers etal 2017. In-Datacenter Performance Analysis of a Tensor Processing Unit. In ISCA.  Norman P Jouppi Cliff Young Nishant Patil David Patterson Gaurav Agrawal Raminder Bajwa Sarah Bates Suresh Bhatia Nan Boden Al Borchers et al. 2017. In-Datacenter Performance Analysis of a Tensor Processing Unit. In ISCA."},{"key":"e_1_3_2_1_39_1","volume-title":"Stripes: Bit-serial Deep Neural Network Computing. In MICRO.","author":"Judd Patrick","year":"2016","unstructured":"Patrick Judd , Jorge Albericio , Tayler Hetherington , Tor M Aamodt , and Andreas Moshovos . 2016 . Stripes: Bit-serial Deep Neural Network Computing. In MICRO. Patrick Judd, Jorge Albericio, Tayler Hetherington, Tor M Aamodt, and Andreas Moshovos. 2016. Stripes: Bit-serial Deep Neural Network Computing. In MICRO."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"crossref","unstructured":"John Jumper Richard Evans Alexander Pritzel Tim Green Michael Figurnov Olaf Ronneberger Kathryn Tunyasuvunakool Russ Bates Augustin \u017d\u00eddek Anna Potapenko etal 2021. Highly Accurate Protein Structure Prediction with AlphaFold. Nature (2021).  John Jumper Richard Evans Alexander Pritzel Tim Green Michael Figurnov Olaf Ronneberger Kathryn Tunyasuvunakool Russ Bates Augustin \u017d\u00eddek Anna Potapenko et al. 2021. Highly Accurate Protein Structure Prediction with AlphaFold. Nature (2021).","DOI":"10.1038\/s41586-021-03819-2"},{"key":"e_1_3_2_1_41_1","unstructured":"Sheng-Chun Kao Suvinay Subramanian Gaurav Agrawal and Tushar Krishna. 2021. An Optimized Dataflow for Mitigating Attention Performance Bottlenecks. arXiv preprint arXiv:2107.06419.  Sheng-Chun Kao Suvinay Subramanian Gaurav Agrawal and Tushar Krishna. 2021. An Optimized Dataflow for Mitigating Attention Performance Bottlenecks. arXiv preprint arXiv:2107.06419."},{"key":"e_1_3_2_1_42_1","unstructured":"Angelos Katharopoulos Apoorv Vyas Nikolaos Pappas and Fran\u00e7ois Fleuret. 2020. Transformers are RNNs: Fast AutoRegressive Transformers with Linear Attention. In ICML.  Angelos Katharopoulos Apoorv Vyas Nikolaos Pappas and Fran\u00e7ois Fleuret. 2020. Transformers are RNNs: Fast AutoRegressive Transformers with Linear Attention. In ICML."},{"key":"e_1_3_2_1_43_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In NAACL-HLT.","author":"Ming-Wei Chang Jacob Devlin","year":"2019","unstructured":"Jacob Devlin Ming-Wei Chang Kenton and Lee Kristina Toutanova . 2019 . BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In NAACL-HLT. Jacob Devlin Ming-Wei Chang Kenton and Lee Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In NAACL-HLT."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001178"},{"key":"e_1_3_2_1_45_1","volume-title":"Adam: A Method for Stochastic Optimization. arXiv preprint arXiv:1412.6980.","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba . 2014 . Adam: A Method for Stochastic Optimization. arXiv preprint arXiv:1412.6980. Diederik P Kingma and Jimmy Ba. 2014. Adam: A Method for Stochastic Optimization. arXiv preprint arXiv:1412.6980."},{"key":"e_1_3_2_1_46_1","volume-title":"Reformer: The Efficient Transformer. arXiv preprint arXiv:2001.04451.","author":"Kitaev Nikita","year":"2020","unstructured":"Nikita Kitaev , \u0141ukasz Kaiser , and Anselm Levskaya . 2020 . Reformer: The Efficient Transformer. arXiv preprint arXiv:2001.04451. Nikita Kitaev, \u0141ukasz Kaiser, and Anselm Levskaya. 2020. Reformer: The Efficient Transformer. arXiv preprint arXiv:2001.04451."},{"key":"e_1_3_2_1_47_1","volume-title":"Kremer","author":"Kolen John F.","year":"2001","unstructured":"John F. Kolen and Stefan C . Kremer . 2001 . Gradient Flow in Recurrent Nets : The Difficulty of Learning LongTerm Dependencies . John F. Kolen and Stefan C. Kremer. 2001. Gradient Flow in Recurrent Nets: The Difficulty of Learning LongTerm Dependencies."},{"key":"e_1_3_2_1_48_1","volume-title":"Learning Multiple Layers of Features from Tiny Images. Computer Science Department","author":"Krizhevsky Alex","year":"2009","unstructured":"Alex Krizhevsky and Geoffrey Hinton . 2009. Learning Multiple Layers of Features from Tiny Images. Computer Science Department , University of Toronto , Tech. Rep ( 2009 ). Alex Krizhevsky and Geoffrey Hinton. 2009. Learning Multiple Layers of Features from Tiny Images. Computer Science Department, University of Toronto, Tech. Rep (2009)."},{"key":"e_1_3_2_1_49_1","unstructured":"Anders Krogh and John A Hertz. 1992. A Simple Weight Decay can Improve Generalization. In NIPS.  Anders Krogh and John A Hertz. 1992. A Simple Weight Decay can Improve Generalization. In NIPS."},{"key":"e_1_3_2_1_50_1","volume-title":"On Information and Sufficiency. The annals of mathematical statistics","author":"Kullback Solomon","year":"1951","unstructured":"Solomon Kullback and Richard A Leibler . 1951. On Information and Sufficiency. The annals of mathematical statistics ( 1951 ). Solomon Kullback and Richard A Leibler. 1951. On Information and Sufficiency. The annals of mathematical statistics (1951)."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"crossref","unstructured":"Hyoukjun Kwon Prasanth Chatarasi Michael Pellauer Angshuman Parashar Vivek Sarkar and Tushar Krishna. 2019. Understanding Reuse Performance and Hardware Cost of DNN Dataflow: A Data-Centric Approach. In MICRO.  Hyoukjun Kwon Prasanth Chatarasi Michael Pellauer Angshuman Parashar Vivek Sarkar and Tushar Krishna. 2019. Understanding Reuse Performance and Hardware Cost of DNN Dataflow: A Data-Centric Approach. In MICRO.","DOI":"10.1145\/3352460.3358252"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173162.3173176"},{"key":"e_1_3_2_1_53_1","volume-title":"ALBERT: A Lite BERT for Self-supervised Learning of Language Representations. In ICLR.","author":"Lan Zhenzhong","year":"2019","unstructured":"Zhenzhong Lan , Mingda Chen , Sebastian Goodman , Kevin Gimpel , Piyush Sharma , and Radu Soricut . 2019 . ALBERT: A Lite BERT for Self-supervised Learning of Language Representations. In ICLR. Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, and Radu Soricut. 2019. ALBERT: A Lite BERT for Self-supervised Learning of Language Representations. In ICLR."},{"key":"e_1_3_2_1_54_1","unstructured":"Dongwoo Lee Sungbum Kang and Kiyoung Choi. 2018. ComPEND: Computation Pruning through Early Negative Detection for ReLU in a deep neural network accelerator. In ICS.  Dongwoo Lee Sungbum Kang and Kiyoung Choi. 2018. ComPEND: Computation Pruning through Early Negative Detection for ReLU in a deep neural network accelerator. In ICS."},{"key":"e_1_3_2_1_55_1","volume-title":"UNPU: A 50.6 TOPS\/W Unified Deep Neural Network Accelerator with 1b-to-16b Fully-Variable Weight Bit-Precision. In ISSCC.","author":"Lee Jinmook","year":"2018","unstructured":"Jinmook Lee , Changhyeon Kim , Sanghoon Kang , Dongjoo Shin , Sangyeob Kim , and Hoi-Jun Yoo . 2018 . UNPU: A 50.6 TOPS\/W Unified Deep Neural Network Accelerator with 1b-to-16b Fully-Variable Weight Bit-Precision. In ISSCC. Jinmook Lee, Changhyeon Kim, Sanghoon Kang, Dongjoo Shin, Sangyeob Kim, and Hoi-Jun Yoo. 2018. UNPU: A 50.6 TOPS\/W Unified Deep Neural Network Accelerator with 1b-to-16b Fully-Variable Weight Bit-Precision. In ISSCC."},{"key":"e_1_3_2_1_56_1","unstructured":"Yingyan Lin Charbel Sakr Yongjune Kim and Naresh Shanbhag. 2017. PredictiveNet: An Energy-Efficient Convolutional Neural Network via Zero Prediction. In ISCAS.  Yingyan Lin Charbel Sakr Yongjune Kim and Naresh Shanbhag. 2017. PredictiveNet: An Energy-Efficient Convolutional Neural Network via Zero Prediction. In ISCAS."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001179"},{"key":"e_1_3_2_1_58_1","unstructured":"Yinhan Liu Myle Ott Naman Goyal Jingfei Du Mandar Joshi Danqi Chen Omer Levy Mike Lewis Luke Zettlemoyer and Veselin Stoyanov. 2019. RoBERTa: A Robustly Optimized BERT Pretraining Approach. arXiv preprint arXiv:1907.11692.  Yinhan Liu Myle Ott Naman Goyal Jingfei Du Mandar Joshi Danqi Chen Omer Levy Mike Lewis Luke Zettlemoyer and Veselin Stoyanov. 2019. RoBERTa: A Robustly Optimized BERT Pretraining Approach. arXiv preprint arXiv:1907.11692."},{"key":"e_1_3_2_1_59_1","unstructured":"Christos Louizos Max Welling and Diederik P Kingma. 2017. Learning Sparse Neural Networks through L_0 Regularization. arXiv preprint arXiv:1712.01312.  Christos Louizos Max Welling and Diederik P Kingma. 2017. Learning Sparse Neural Networks through L _0 Regularization. arXiv preprint arXiv:1712.01312."},{"key":"e_1_3_2_1_60_1","volume-title":"Sanger: A Co-Design Framework for Enabling Sparse Attention using Reconfigurable Architecture. In MICRO.","author":"Lu Liqiang","year":"2021","unstructured":"Liqiang Lu , Yicheng Jin , Hangrui Bi , Zizhang Luo , Peng Li , Tao Wang , and Yun Liang . 2021 . Sanger: A Co-Design Framework for Enabling Sparse Attention using Reconfigurable Architecture. In MICRO. Liqiang Lu, Yicheng Jin, Hangrui Bi, Zizhang Luo, Peng Li, Tao Wang, and Yun Liang. 2021. Sanger: A Co-Design Framework for Enabling Sparse Attention using Reconfigurable Architecture. In MICRO."},{"key":"e_1_3_2_1_61_1","unstructured":"Paul Michel Omer Levy and Graham Neubig. 2019. Are Sixteen Heads Really Better than One? arXiv preprint arXiv:1905.10650.  Paul Michel Omer Levy and Graham Neubig. 2019. Are Sixteen Heads Really Better than One? arXiv preprint arXiv:1905.10650."},{"key":"e_1_3_2_1_62_1","volume-title":"Machine Learning: A Probabilistic Perspective","author":"Murphy Kevin P","year":"2012","unstructured":"Kevin P Murphy . 2012 . Machine Learning: A Probabilistic Perspective . MIT press . Kevin P Murphy. 2012. Machine Learning: A Probabilistic Perspective. MIT press."},{"key":"e_1_3_2_1_63_1","volume":"201","author":"Parashar Angshuman","unstructured":"Angshuman Parashar , Minsoo Rhu , Anurag Mukkara , Antonio Puglielli , Rang- harajan Venkatesan, Brucek Khailany , Joel Emer , Stephen W Keckler , and William J Dally. 201 7. SCNN: An Accelerator for Compressed-sparse Con- volutional Neural Networks. In ISCA. Angshuman Parashar, Minsoo Rhu, Anurag Mukkara, Antonio Puglielli, Rang- harajan Venkatesan, Brucek Khailany, Joel Emer, Stephen W Keckler, and William J Dally. 2017. SCNN: An Accelerator for Compressed-sparse Con- volutional Neural Networks. In ISCA.","journal-title":"William J Dally."},{"key":"e_1_3_2_1_64_1","volume-title":"OPTIMUS: OPTImized matrix MUltiplication Structure for Transformer neural network accelerator. In MLSys.","author":"Park Junki","year":"2020","unstructured":"Junki Park , Hyunsung Yoon , Daehyun Ahn , Jungwook Choi , and Jae-Joon Kim . 2020 . OPTIMUS: OPTImized matrix MUltiplication Structure for Transformer neural network accelerator. In MLSys. Junki Park, Hyunsung Yoon, Daehyun Ahn, Jungwook Choi, and Jae-Joon Kim. 2020. OPTIMUS: OPTImized matrix MUltiplication Structure for Transformer neural network accelerator. In MLSys."},{"key":"e_1_3_2_1_65_1","volume-title":"PyTorch: An Imperative Style","author":"Paszke Adam","unstructured":"Adam Paszke , Sam Gross , Francisco Massa , Adam Lerer , James Bradbury , Gregory Chanan , Trevor Killeen , Zeming Lin , Natalia Gimelshein , Luca Antiga , Alban Desmaison , Andreas Kopf , Edward Yang , Zachary DeVito , Martin Raison , Alykhan Tejani , Sasank Chilamkurthy , Benoit Steiner , Lu Fang , Junjie Bai , and Soumith Chintala . 2019. PyTorch: An Imperative Style , High-Performance Deep Learning Library . In NeurIPS. Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In NeurIPS."},{"key":"e_1_3_2_1_66_1","volume-title":"SIGMA: A Sparse and Irregular GEMM Accelerator with Flexible Interconnects for DNN Training. In HPCA.","author":"Qin Eric","year":"2020","unstructured":"Eric Qin , Ananda Samajdar , Hyoukjun Kwon , Vineet Nadella , Sudarshan Srinivasan , Dipankar Das , Bharat Kaul , and Tushar Krishna . 2020 . SIGMA: A Sparse and Irregular GEMM Accelerator with Flexible Interconnects for DNN Training. In HPCA. Eric Qin, Ananda Samajdar, Hyoukjun Kwon, Vineet Nadella, Sudarshan Srinivasan, Dipankar Das, Bharat Kaul, and Tushar Krishna. 2020. SIGMA: A Sparse and Irregular GEMM Accelerator with Flexible Interconnects for DNN Training. In HPCA."},{"key":"e_1_3_2_1_67_1","volume-title":"Sinong Wang, and Jie Tang.","author":"Qiu Jiezhong","year":"2019","unstructured":"Jiezhong Qiu , Hao Ma , Omer Levy , Scott Wen-tau Yih , Sinong Wang, and Jie Tang. 2019 . Blockwise Self-Attention for Long Document Understanding . arXiv preprint arXiv:1911.02972. Jiezhong Qiu, Hao Ma, Omer Levy, Scott Wen-tau Yih, Sinong Wang, and Jie Tang. 2019. Blockwise Self-Attention for Long Document Understanding. arXiv preprint arXiv:1911.02972."},{"key":"e_1_3_2_1_68_1","unstructured":"Alec Radford Jeffrey Wu Rewon Child David Luan Dario Amodei Ilya Sutskever etal 2019. Language Models are Unsupervised Multitask Learners. OpenAI blog (2019).  Alec Radford Jeffrey Wu Rewon Child David Luan Dario Amodei Ilya Sutskever et al. 2019. Language Models are Unsupervised Multitask Learners. OpenAI blog (2019)."},{"key":"e_1_3_2_1_69_1","volume":"201","author":"Raffel Colin","unstructured":"Colin Raffel , Noam Shazeer , Adam Roberts , Katherine Lee , Sharan Narang , Michael Matena , Yanqi Zhou , Wei Li , and Peter J Liu. 201 9. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. arXiv preprint arXiv:1910.10683. Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, and Peter J Liu. 2019. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. arXiv preprint arXiv:1910.10683.","journal-title":"Peter J Liu."},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"crossref","unstructured":"Pranav Rajpurkar Jian Zhang Konstantin Lopyrev and Percy Liang. 2016. SQUAD: 100 000+ Questions for Machine Comprehension of Text. arXiv preprint arXiv:1606.05250.  Pranav Rajpurkar Jian Zhang Konstantin Lopyrev and Percy Liang. 2016. SQUAD: 100 000+ Questions for Machine Comprehension of Text. arXiv preprint arXiv:1606.05250.","DOI":"10.18653\/v1\/D16-1264"},{"key":"e_1_3_2_1_71_1","volume-title":"Jos\u00e9 Miguel Hern\u00e1ndez-Lobato, Gu-Yeon Wei, and David Brooks.","author":"Reagen Brandon","year":"2016","unstructured":"Brandon Reagen , Paul Whatmough , Robert Adolf , Saketh Rama , Hyunkwang Lee , Sae Kyu Lee , Jos\u00e9 Miguel Hern\u00e1ndez-Lobato, Gu-Yeon Wei, and David Brooks. 2016 . Minerva : Enabling Low-Power, Highly-Accurate Deep Neural Network Accelerators. In ISCA. Brandon Reagen, Paul Whatmough, Robert Adolf, Saketh Rama, Hyunkwang Lee, Sae Kyu Lee, Jos\u00e9 Miguel Hern\u00e1ndez-Lobato, Gu-Yeon Wei, and David Brooks. 2016. Minerva: Enabling Low-Power, Highly-Accurate Deep Neural Network Accelerators. In ISCA."},{"key":"e_1_3_2_1_72_1","volume-title":"A Stochastic Approximation Method. The annals of mathematical statistics","author":"Robbins Herbert","year":"1951","unstructured":"Herbert Robbins and Sutton Monro . 1951. A Stochastic Approximation Method. The annals of mathematical statistics ( 1951 ). Herbert Robbins and Sutton Monro. 1951. A Stochastic Approximation Method. The annals of mathematical statistics (1951)."},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"crossref","unstructured":"Bita Darvish Rouhani Mohammad Samragh Mojan Javaheripi Tara Javidi and Farinaz Koushanfar. 2018. DeepFense: Online Accelerated Defense against Adversarial Deep Learning. In ICCAD.  Bita Darvish Rouhani Mohammad Samragh Mojan Javaheripi Tara Javidi and Farinaz Koushanfar. 2018. DeepFense: Online Accelerated Defense against Adversarial Deep Learning. In ICCAD.","DOI":"10.1145\/3240765.3240791"},{"key":"e_1_3_2_1_74_1","volume-title":"Efficient Content-based Sparse Attention with Routing Transformers. Transactions of the Association for Computational Linguistics","author":"Roy Aurko","year":"2021","unstructured":"Aurko Roy , Mohammad Saffar , Ashish Vaswani , and David Grangier . 2021. Efficient Content-based Sparse Attention with Routing Transformers. Transactions of the Association for Computational Linguistics ( 2021 ). Aurko Roy, Mohammad Saffar, Ashish Vaswani, and David Grangier. 2021. Efficient Content-based Sparse Attention with Routing Transformers. Transactions of the Association for Computational Linguistics (2021)."},{"key":"e_1_3_2_1_75_1","volume":"198","author":"Rumelhart D. E.","unstructured":"D. E. Rumelhart , G. E. Hinton , and R. J. Williams. 198 6. Learning Internal Representations by Error Propagation. In Parallel Distributed Processing: Explorations in the Microstructure of Cognition. MIT Press. D. E. Rumelhart, G. E. Hinton, and R. J. Williams. 1986. Learning Internal Representations by Error Propagation. In Parallel Distributed Processing: Explorations in the Microstructure of Cognition. MIT Press.","journal-title":"J. Williams."},{"key":"e_1_3_2_1_76_1","unstructured":"Sungju Ryu Hyungjun Kim Wooseok Yi and Jae-Joon Kim. 2019. BitBlade: Area and Energy-Efficient Precision-Scalable Neural Network Accelerator with Bitwise Summation. In DAC.  Sungju Ryu Hyungjun Kim Wooseok Yi and Jae-Joon Kim. 2019. BitBlade: Area and Energy-Efficient Precision-Scalable Neural Network Accelerator with Bitwise Summation. In DAC."},{"key":"e_1_3_2_1_77_1","volume-title":"EncoDeep: Realizing Bit-Flexible Encoding for Deep Neural Networks. TECS","author":"Samragh Mohammad","year":"2019","unstructured":"Mohammad Samragh , Mojan Javaheripi , and Farinaz Koushanfar . 2019. EncoDeep: Realizing Bit-Flexible Encoding for Deep Neural Networks. TECS ( 2019 ). Mohammad Samragh, Mojan Javaheripi, and Farinaz Koushanfar. 2019. EncoDeep: Realizing Bit-Flexible Encoding for Deep Neural Networks. TECS (2019)."},{"key":"e_1_3_2_1_78_1","volume-title":"Miao Hu, R Stanley Williams, and Vivek Srikumar.","author":"Shafiee Ali","year":"2016","unstructured":"Ali Shafiee , Anirban Nag , Naveen Muralimanohar , Rajeev Balasubramonian , John Paul Strachan , Miao Hu, R Stanley Williams, and Vivek Srikumar. 2016 . ISAAC : A Convolutional Neural Network Accelerator with In-Situ Analog Arithmetic in Crossbars. In ISCA. Ali Shafiee, Anirban Nag, Naveen Muralimanohar, Rajeev Balasubramonian, John Paul Strachan, Miao Hu, R Stanley Williams, and Vivek Srikumar. 2016. ISAAC: A Convolutional Neural Network Accelerator with In-Situ Analog Arithmetic in Crossbars. In ISCA."},{"key":"e_1_3_2_1_79_1","volume-title":"Simba: Scaling Deep-Learning Inference with Multi-Chip-Module-Based Architecture. In MICRO.","author":"Shao Yakun Sophia","year":"2019","unstructured":"Yakun Sophia Shao , Jason Clemons , Rangharajan Venkatesan , Brian Zimmer , Matthew Fojtik , Nan Jiang , Ben Keller , Alicia Klinefelter , Nathaniel Pinckney , Priyanka Raina , 2019 . Simba: Scaling Deep-Learning Inference with Multi-Chip-Module-Based Architecture. In MICRO. Yakun Sophia Shao, Jason Clemons, Rangharajan Venkatesan, Brian Zimmer, Matthew Fojtik, Nan Jiang, Ben Keller, Alicia Klinefelter, Nathaniel Pinckney, Priyanka Raina, et al. 2019. Simba: Scaling Deep-Learning Inference with Multi-Chip-Module-Based Architecture. In MICRO."},{"key":"e_1_3_2_1_80_1","volume-title":"Mostafa Mahmoud, Milos Nikolic, Kevin Siu, Dylan Malone Stuart, Zissis Poulos, and Andreas Moshovos.","author":"Sharify Sayeh","year":"2019","unstructured":"Sayeh Sharify , Alberto Delmas Lascorz , Mostafa Mahmoud, Milos Nikolic, Kevin Siu, Dylan Malone Stuart, Zissis Poulos, and Andreas Moshovos. 2019 . Laconic Deep Learning Inference Acceleration. In ISCA. Sayeh Sharify, Alberto Delmas Lascorz, Mostafa Mahmoud, Milos Nikolic, Kevin Siu, Dylan Malone Stuart, Zissis Poulos, and Andreas Moshovos. 2019. Laconic Deep Learning Inference Acceleration. In ISCA."},{"key":"e_1_3_2_1_81_1","volume-title":"Kevin Siu, Patrick Judd, and Andreas Moshovos.","author":"Sharify Sayeh","year":"2018","unstructured":"Sayeh Sharify , Alberto Delmas Lascorz , Kevin Siu, Patrick Judd, and Andreas Moshovos. 2018 . Loom : Exploiting Weight and Activation Precisions to Accelerate Convolutional Neural Networks. In DAC. Sayeh Sharify, Alberto Delmas Lascorz, Kevin Siu, Patrick Judd, and Andreas Moshovos. 2018. Loom: Exploiting Weight and Activation Precisions to Accelerate Convolutional Neural Networks. In DAC."},{"key":"e_1_3_2_1_82_1","doi-asserted-by":"crossref","unstructured":"Hardik Sharma Jongse Park Divya Mahajan Emmanuel Amaro Joon Kim Chenkai Shao Asit Misra and Hadi Esmaeilzadeh. 2016. From High-Level Deep Neural Models to FPGAs. In MICRO.  Hardik Sharma Jongse Park Divya Mahajan Emmanuel Amaro Joon Kim Chenkai Shao Asit Misra and Hadi Esmaeilzadeh. 2016. From High-Level Deep Neural Models to FPGAs. In MICRO.","DOI":"10.1109\/MICRO.2016.7783720"},{"key":"e_1_3_2_1_83_1","volume-title":"Bit Fusion: Bit-Level Dynamically Composable Architecture for Accelerating Deep Neural Networks. In ISCA.","author":"Sharma Hardik","year":"2018","unstructured":"Hardik Sharma , Jongse Park , Naveen Suda , Liangzhen Lai , Benson Chau , Vikas Chandra , and Hadi Esmaeilzadeh . 2018 . Bit Fusion: Bit-Level Dynamically Composable Architecture for Accelerating Deep Neural Networks. In ISCA. Hardik Sharma, Jongse Park, Naveen Suda, Liangzhen Lai, Benson Chau, Vikas Chandra, and Hadi Esmaeilzadeh. 2018. Bit Fusion: Bit-Level Dynamically Composable Architecture for Accelerating Deep Neural Networks. In ISCA."},{"key":"e_1_3_2_1_84_1","doi-asserted-by":"crossref","unstructured":"Gil Shomron Ron Banner Moran Shkolnik and Uri Weiser. 2020. Thanks for Nothing: Predicting Zero-valued Activations with Lightweight Convolutional Neural Networks. In ECCV.  Gil Shomron Ron Banner Moran Shkolnik and Uri Weiser. 2020. Thanks for Nothing: Predicting Zero-valued Activations with Lightweight Convolutional Neural Networks. In ECCV.","DOI":"10.1007\/978-3-030-58607-2_14"},{"key":"e_1_3_2_1_85_1","doi-asserted-by":"crossref","unstructured":"Linghao Song Xuehai Qian Hai Li and Yiran Chen. 2017. PipeLayer: A pipelined ReRAM-based accelerator for deep learning. In HPCA.  Linghao Song Xuehai Qian Hai Li and Yiran Chen. 2017. PipeLayer: A pipelined ReRAM-based accelerator for deep learning. In HPCA.","DOI":"10.1109\/HPCA.2017.55"},{"key":"e_1_3_2_1_86_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.61"},{"key":"e_1_3_2_1_87_1","volume-title":"Dropout: A Simple Way to Prevent Neural Networks from Overfitting. The journal of machine learning research","author":"Srivastava Nitish","year":"2014","unstructured":"Nitish Srivastava , Geoffrey Hinton , Alex Krizhevsky , Ilya Sutskever , and Ruslan Salakhutdinov . 2014 . Dropout: A Simple Way to Prevent Neural Networks from Overfitting. The journal of machine learning research (2014). Nitish Srivastava, Geoffrey Hinton, Alex Krizhevsky, Ilya Sutskever, and Ruslan Salakhutdinov. 2014. Dropout: A Simple Way to Prevent Neural Networks from Overfitting. The journal of machine learning research (2014)."},{"key":"e_1_3_2_1_88_1","doi-asserted-by":"crossref","unstructured":"Ren\u00e9e St. Amant Amir Yazdanbakhsh Jongse Park Bradley Thwaites Hadi Esmaeilzadeh Arjang Hassibi Luis Ceze and Doug Burger. 2014. General-Purpose Code Acceleration with Limited-Precision Analog Computation. In ISCA.  Ren\u00e9e St. Amant Amir Yazdanbakhsh Jongse Park Bradley Thwaites Hadi Esmaeilzadeh Arjang Hassibi Luis Ceze and Doug Burger. 2014. General-Purpose Code Acceleration with Limited-Precision Analog Computation. In ISCA.","DOI":"10.1109\/ISCA.2014.6853213"},{"key":"e_1_3_2_1_89_1","volume-title":"Softermax: Hardware\/Software Co-Design of an Efficient Softmax for Transformers. arXiv preprint arXiv:2103.09301.","author":"Stevens Jacob R","year":"2021","unstructured":"Jacob R Stevens , Rangharajan Venkatesan , Steve Dai , Brucek Khailany , and Anand Raghunathan . 2021 . Softermax: Hardware\/Software Co-Design of an Efficient Softmax for Transformers. arXiv preprint arXiv:2103.09301. Jacob R Stevens, Rangharajan Venkatesan, Steve Dai, Brucek Khailany, and Anand Raghunathan. 2021. Softermax: Hardware\/Software Co-Design of an Efficient Softmax for Transformers. arXiv preprint arXiv:2103.09301."},{"key":"e_1_3_2_1_90_1","volume-title":"Scaling Equations for the Accurate Prediction of CMOS Device Performance from 180 nm to 7 nm. Integration","author":"Stillmaker Aaron","year":"2017","unstructured":"Aaron Stillmaker and Bevan Baas . 2017. Scaling Equations for the Accurate Prediction of CMOS Device Performance from 180 nm to 7 nm. Integration ( 2017 ). Aaron Stillmaker and Bevan Baas. 2017. Scaling Equations for the Accurate Prediction of CMOS Device Performance from 180 nm to 7 nm. Integration (2017)."},{"key":"e_1_3_2_1_91_1","volume-title":"Jason Weston, and Rob Fergus.","author":"Sukhbaatar Sainbayar","year":"2015","unstructured":"Sainbayar Sukhbaatar , arthur szlam , Jason Weston, and Rob Fergus. 2015 . End-To-End Memory Networks. In NIPS. Sainbayar Sukhbaatar, arthur szlam, Jason Weston, and Rob Fergus. 2015. End-To-End Memory Networks. In NIPS."},{"key":"e_1_3_2_1_92_1","doi-asserted-by":"crossref","unstructured":"Thierry Tambe Coleman Hooper Lillian Pentecost Tianyu Jia En-Yu Yang Marco Donato Victor Sanh Paul Whatmough Alexander M Rush David Brooks etal 2021. EdgeBERT: Sentence-level Energy Optimizations for Latency-Aware Multi-Task NLP Inference. In MICRO.  Thierry Tambe Coleman Hooper Lillian Pentecost Tianyu Jia En-Yu Yang Marco Donato Victor Sanh Paul Whatmough Alexander M Rush David Brooks et al. 2021. EdgeBERT: Sentence-level Energy Optimizations for Latency-Aware Multi-Task NLP Inference. In MICRO.","DOI":"10.1145\/3466752.3480095"},{"key":"e_1_3_2_1_93_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is All You Need. In NeurIPS.  Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is All You Need. In NeurIPS."},{"key":"e_1_3_2_1_94_1","unstructured":"Petar Veli\u010dkovi\u0107 Guillem Cucurull Arantxa Casanova Adriana Romero Pietro Lio and Yoshua Bengio. 2017. Graph Attention Networks. arXiv preprint arXiv:1710.10903.  Petar Veli\u010dkovi\u0107 Guillem Cucurull Arantxa Casanova Adriana Romero Pietro Lio and Yoshua Bengio. 2017. Graph Attention Networks. arXiv preprint arXiv:1710.10903."},{"key":"e_1_3_2_1_95_1","volume-title":"GLUE: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding. arXiv preprint arXiv:1804.07461.","author":"Wang Alex","year":"2018","unstructured":"Alex Wang , Amanpreet Singh , Julian Michael , Felix Hill , Omer Levy , and Samuel R Bowman . 2018 . GLUE: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding. arXiv preprint arXiv:1804.07461. Alex Wang, Amanpreet Singh, Julian Michael, Felix Hill, Omer Levy, and Samuel R Bowman. 2018. GLUE: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding. arXiv preprint arXiv:1804.07461."},{"key":"e_1_3_2_1_96_1","doi-asserted-by":"crossref","unstructured":"Hanrui Wang Zhekai Zhang and Song Han. 2021. SpAtten: Efficient Sparse Attention Architecture with Cascade Token and Head Pruning. In HPCA.  Hanrui Wang Zhekai Zhang and Song Han. 2021. SpAtten: Efficient Sparse Attention Architecture with Cascade Token and Head Pruning. In HPCA.","DOI":"10.1109\/HPCA51647.2021.00018"},{"key":"e_1_3_2_1_97_1","volume-title":"Linformer: Self-attention with Linear Complexity. arXiv preprint arXiv:2006.04768.","author":"Wang Sinong","year":"2020","unstructured":"Sinong Wang , Belinda Z Li , Madian Khabsa , Han Fang , and Hao Ma . 2020 . Linformer: Self-attention with Linear Complexity. arXiv preprint arXiv:2006.04768. Sinong Wang, Belinda Z Li, Madian Khabsa, Han Fang, and Hao Ma. 2020. Linformer: Self-attention with Linear Complexity. arXiv preprint arXiv:2006.04768."},{"key":"e_1_3_2_1_98_1","doi-asserted-by":"crossref","unstructured":"Ziheng Wang Jeremy Wohlwend and Tao Lei. 2019. Structured Pruning of Large Language Models. arXiv preprint arXiv:1910.04732.  Ziheng Wang Jeremy Wohlwend and Tao Lei. 2019. Structured Pruning of Large Language Models. arXiv preprint arXiv:1910.04732.","DOI":"10.18653\/v1\/2020.emnlp-main.496"},{"key":"e_1_3_2_1_99_1","unstructured":"Wei Wen Yuxiong He Samyam Rajbhandari Minjia Zhang Wenhan Wang Fang Liu Bin Hu Yiran Chen and Hai Li. 2017. Learning Intrinsic Sparse Structures within Long Short-Term Memory. arXiv preprint arXiv:1709.05027.  Wei Wen Yuxiong He Samyam Rajbhandari Minjia Zhang Wenhan Wang Fang Liu Bin Hu Yiran Chen and Hai Li. 2017. Learning Intrinsic Sparse Structures within Long Short-Term Memory. arXiv preprint arXiv:1709.05027."},{"key":"e_1_3_2_1_100_1","volume-title":"Towards AI-Complete Question Answering: A Set of Prerequisite Toy Tasks. arXiv preprint arXiv:1502.05698","author":"Weston Jason","year":"2015","unstructured":"Jason Weston , Antoine Bordes , Sumit Chopra , Alexander M Rush , Bart van Merri\u00ebnboer , Armand Joulin , and Tomas Mikolov . 2015. Towards AI-Complete Question Answering: A Set of Prerequisite Toy Tasks. arXiv preprint arXiv:1502.05698 ( 2015 ). Jason Weston, Antoine Bordes, Sumit Chopra, Alexander M Rush, Bart van Merri\u00ebnboer, Armand Joulin, and Tomas Mikolov. 2015. Towards AI-Complete Question Answering: A Set of Prerequisite Toy Tasks. arXiv preprint arXiv:1502.05698 (2015)."},{"key":"e_1_3_2_1_101_1","doi-asserted-by":"crossref","unstructured":"Thomas Wolf Lysandre Debut Victor Sanh Julien Chaumond Clement Delangue Anthony Moi Pierric Cistac Tim Rault R\u00e9mi Louf Morgan Funtowicz etal 2019. HuggingFace's Transformers: State-of-the-Art Natural Language Processing. arXiv preprint arXiv:1910.03771.  Thomas Wolf Lysandre Debut Victor Sanh Julien Chaumond Clement Delangue Anthony Moi Pierric Cistac Tim Rault R\u00e9mi Louf Morgan Funtowicz et al. 2019. HuggingFace's Transformers: State-of-the-Art Natural Language Processing. arXiv preprint arXiv:1910.03771.","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"e_1_3_2_1_102_1","volume-title":"Nam Sung Kim, and Hadi Esmaeilzadeh","author":"Yazdanbakhsh Amir","year":"2018","unstructured":"Amir Yazdanbakhsh , Michael Brzozowski , Behnam Khaleghi , Soroush Ghodrati , Kambiz Samadi , Nam Sung Kim, and Hadi Esmaeilzadeh . 2018 . FlexiGAN: An End-to-End Solution for FPGA Acceleration of Generative Adversarial Networks. In FCCM. Amir Yazdanbakhsh, Michael Brzozowski, Behnam Khaleghi, Soroush Ghodrati, Kambiz Samadi, Nam Sung Kim, and Hadi Esmaeilzadeh. 2018. FlexiGAN: An End-to-End Solution for FPGA Acceleration of Generative Adversarial Networks. In FCCM."},{"key":"e_1_3_2_1_103_1","volume-title":"GANAX: A Unified SIMD-MIMD Acceleration for Generative Adversarial Network. In ISCA.","author":"Yazdanbakhsh Amir","year":"2018","unstructured":"Amir Yazdanbakhsh , Hajar Falahati , Philip J. Wolfe , Kambiz Samadi , Hadi Esmaeilzadeh , and Nam Sung Kim . 2018 . GANAX: A Unified SIMD-MIMD Acceleration for Generative Adversarial Network. In ISCA. Amir Yazdanbakhsh, Hajar Falahati, Philip J. Wolfe, Kambiz Samadi, Hadi Esmaeilzadeh, and Nam Sung Kim. 2018. GANAX: A Unified SIMD-MIMD Acceleration for Generative Adversarial Network. In ISCA."},{"key":"e_1_3_2_1_104_1","doi-asserted-by":"crossref","unstructured":"Amir Yazdanbakhsh Jongse Park Hardik Sharma Pejman Lotfi-Kamran and Hadi Esmaeilzadeh. 2015. Neural Acceleration for GPU Throughput Processors. In MICRO.  Amir Yazdanbakhsh Jongse Park Hardik Sharma Pejman Lotfi-Kamran and Hadi Esmaeilzadeh. 2015. Neural Acceleration for GPU Throughput Processors. In MICRO.","DOI":"10.1145\/2830772.2830810"},{"key":"e_1_3_2_1_105_1","unstructured":"Amir Yazdanbakhsh Kiran Seshadri Berkin Akin James Laudon and Ravi Narayanaswami. 2021. An Evaluation of Edge TPU Accelerators for Convolutional Neural Networks. arXiv preprint arXiv:2102.10423.  Amir Yazdanbakhsh Kiran Seshadri Berkin Akin James Laudon and Ravi Narayanaswami. 2021. An Evaluation of Edge TPU Accelerators for Convolutional Neural Networks. arXiv preprint arXiv:2102.10423."},{"key":"e_1_3_2_1_106_1","unstructured":"Deming Ye Yankai Lin Yufei Huang and Maosong Sun. 2021. TR-BERT: Dynamic Token Reduction for Accelerating BERT Inference. arXiv preprint arXiv:2105.11618.  Deming Ye Yankai Lin Yufei Huang and Maosong Sun. 2021. TR-BERT: Dynamic Token Reduction for Accelerating BERT Inference. arXiv preprint arXiv:2105.11618."},{"key":"e_1_3_2_1_107_1","unstructured":"Zihao Ye Qipeng Guo Quan Gan Xipeng Qiu and Zheng Zhang. 2019. BP-Transformer: Modelling Long-Range Context via Binary Partitioning. arXiv preprint arXiv:1911.04070.  Zihao Ye Qipeng Guo Quan Gan Xipeng Qiu and Zheng Zhang. 2019. BP-Transformer: Modelling Long-Range Context via Binary Partitioning. arXiv preprint arXiv:1911.04070."},{"key":"e_1_3_2_1_108_1","volume-title":"Omar Mohamed Awad, and Andreas Moshovos","author":"Zadeh Ali Hadi","year":"2020","unstructured":"Ali Hadi Zadeh , Isak Edo , Omar Mohamed Awad, and Andreas Moshovos . 2020 . GOBO : Quantizing Attention-based NLP Models for Low Latency and Energy Efficient Inference. In MICRO. Ali Hadi Zadeh, Isak Edo, Omar Mohamed Awad, and Andreas Moshovos. 2020. GOBO: Quantizing Attention-based NLP Models for Low Latency and Energy Efficient Inference. In MICRO."},{"key":"e_1_3_2_1_109_1","doi-asserted-by":"crossref","unstructured":"Sergey Zagoruyko and Nikos Komodakis. 2016. Wide Residual Networks. arXiv preprint arXiv:1605.07146.  Sergey Zagoruyko and Nikos Komodakis. 2016. Wide Residual Networks. arXiv preprint arXiv:1605.07146.","DOI":"10.5244\/C.30.87"},{"key":"e_1_3_2_1_110_1","volume-title":"Joshua Ainslie, Chris Alberti, Santiago Ontanon, Philip Pham, Anirudh Ravula, Qifan Wang, Li Yang, et al.","author":"Zaheer Manzil","year":"2020","unstructured":"Manzil Zaheer , Guru Guruganesh , Kumar Avinava Dubey , Joshua Ainslie, Chris Alberti, Santiago Ontanon, Philip Pham, Anirudh Ravula, Qifan Wang, Li Yang, et al. 2020 . Big Bird : Transformers for Longer Sequences. In NeurIPS. Manzil Zaheer, Guru Guruganesh, Kumar Avinava Dubey, Joshua Ainslie, Chris Alberti, Santiago Ontanon, Philip Pham, Anirudh Ravula, Qifan Wang, Li Yang, et al. 2020. Big Bird: Transformers for Longer Sequences. In NeurIPS."},{"key":"e_1_3_2_1_111_1","doi-asserted-by":"crossref","unstructured":"Shijin Zhang Zidong Du Lei Zhang Huiying Lan Shaoli Liu Ling Li Qi Guo Tianshi Chen and Yunji Chen. 2016. Cambricon-X: An Accelerator for Sparse Neural Networks. In MICRO.  Shijin Zhang Zidong Du Lei Zhang Huiying Lan Shaoli Liu Ling Li Qi Guo Tianshi Chen and Yunji Chen. 2016. Cambricon-X: An Accelerator for Sparse Neural Networks. In MICRO.","DOI":"10.1109\/MICRO.2016.7783723"},{"key":"e_1_3_2_1_112_1","unstructured":"Guangxiang Zhao Junyang Lin Zhiyuan Zhang Xuancheng Ren Qi Su and Xu Sun. 2019. Explicit Sparse Transformer: Concentrated Attention through Explicit Selection. arXiv preprint arXiv:1912.11637.  Guangxiang Zhao Junyang Lin Zhiyuan Zhang Xuancheng Ren Qi Su and Xu Sun. 2019. Explicit Sparse Transformer: Concentrated Attention through Explicit Selection. arXiv preprint arXiv:1912.11637."},{"key":"e_1_3_2_1_113_1","volume-title":"Regularization and Variable Selection via the Elastic Net. Journal of the royal statistical society: series B (statistical methodology)","author":"Zou Hui","year":"2005","unstructured":"Hui Zou and Trevor Hastie . 2005. Regularization and Variable Selection via the Elastic Net. Journal of the royal statistical society: series B (statistical methodology) ( 2005 ). Hui Zou and Trevor Hastie. 2005. Regularization and Variable Selection via the Elastic Net. Journal of the royal statistical society: series B (statistical methodology) (2005)."}],"event":{"name":"ISCA '22: The 49th Annual International Symposium on Computer Architecture","location":"New York New York","acronym":"ISCA '22","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","IEEE CS TCAA IEEE CS technical committee on architectural acoustics"]},"container-title":["Proceedings of the 49th Annual International Symposium on Computer Architecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3470496.3527423","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3470496.3527423","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3470496.3527423","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:18:53Z","timestamp":1750191533000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3470496.3527423"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,11]]},"references-count":113,"alternative-id":["10.1145\/3470496.3527423","10.1145\/3470496"],"URL":"https:\/\/doi.org\/10.1145\/3470496.3527423","relation":{},"subject":[],"published":{"date-parts":[[2022,6,11]]},"assertion":[{"value":"2022-06-11","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}