{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T17:11:29Z","timestamp":1773249089688,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":99,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,17]],"date-time":"2023-06-17T00:00:00Z","timestamp":1686960000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,17]]},"DOI":"10.1145\/3579371.3589038","type":"proceedings-article","created":{"date-parts":[[2023,6,16]],"date-time":"2023-06-16T20:25:28Z","timestamp":1686947128000},"page":"1-15","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":121,"title":["OliVe: Accelerating Large Language Models via Hardware-friendly Outlier-Victim Pair Quantization"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4479-5525","authenticated-orcid":false,"given":"Cong","family":"Guo","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"},{"name":"Shanghai Qi Zhi Institute, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-4186-6561","authenticated-orcid":false,"given":"Jiaming","family":"Tang","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"},{"name":"Shanghai Qi Zhi Institute, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-5115-0498","authenticated-orcid":false,"given":"Weiming","family":"Hu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"},{"name":"Shanghai Qi Zhi Institute, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5660-5493","authenticated-orcid":false,"given":"Jingwen","family":"Leng","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"},{"name":"Shanghai Qi Zhi Institute, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2762-2726","authenticated-orcid":false,"given":"Chen","family":"Zhang","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0378-060X","authenticated-orcid":false,"given":"Fan","family":"Yang","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7352-8955","authenticated-orcid":false,"given":"Yunxin","family":"Liu","sequence":"additional","affiliation":[{"name":"Institute for AI Industry Research (AIR), Tsinghua University, Beijing, China"},{"name":"Shanghai Artificial Intelligence Laboratory, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0034-2302","authenticated-orcid":false,"given":"Minyi","family":"Guo","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"},{"name":"Shanghai Qi Zhi Institute, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2802-0578","authenticated-orcid":false,"given":"Yuhao","family":"Zhu","sequence":"additional","affiliation":[{"name":"University of Rochester, Rochester, New York, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,6,17]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2020. Nvidia ampere architecture whitepaper. https:\/\/images.nvidia.com\/aem-dam\/en-zz\/Solutions\/data-center\/nvidia-ampere-architecture-whitepaper.pdf.  2020. Nvidia ampere architecture whitepaper. https:\/\/images.nvidia.com\/aem-dam\/en-zz\/Solutions\/data-center\/nvidia-ampere-architecture-whitepaper.pdf."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001138"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"e_1_3_2_1_4_1","volume-title":"Post training 4-bit quantization of convolutional networks for rapid-deployment. Advances in Neural Information Processing Systems 32","author":"Banner Ron","year":"2019","unstructured":"Ron Banner , Yury Nahshan , and Daniel Soudry . 2019. Post training 4-bit quantization of convolutional networks for rapid-deployment. Advances in Neural Information Processing Systems 32 ( 2019 ). Ron Banner, Yury Nahshan, and Daniel Soudry. 2019. Post training 4-bit quantization of convolutional networks for rapid-deployment. Advances in Neural Information Processing Systems 32 (2019)."},{"key":"e_1_3_2_1_5_1","volume-title":"Estimating or propagating gradients through stochastic neurons for conditional computation. arXiv preprint arXiv:1308.3432","author":"Bengio Yoshua","year":"2013","unstructured":"Yoshua Bengio , Nicholas L\u00e9onard , and Aaron Courville . 2013. Estimating or propagating gradients through stochastic neurons for conditional computation. arXiv preprint arXiv:1308.3432 ( 2013 ). Yoshua Bengio, Nicholas L\u00e9onard, and Aaron Courville. 2013. Estimating or propagating gradients through stochastic neurons for conditional computation. arXiv preprint arXiv:1308.3432 (2013)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01318"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00242"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037700"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/2872362.2872368"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/2654822.2541967"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.5555\/3291168.3291211"},{"key":"e_1_3_2_1_12_1","volume-title":"2014 47th Annual IEEE\/ACM International Symposium on Microarchitecture. IEEE, 609--622","author":"Chen Yunji","year":"2014","unstructured":"Yunji Chen , Tao Luo , Shaoli Liu , Shijin Zhang , Liqiang He , Jia Wang , Ling Li , Tianshi Chen , Zhiwei Xu , Ninghui Sun , 2014 . Dadiannao: A machine-learning supercomputer . In 2014 47th Annual IEEE\/ACM International Symposium on Microarchitecture. IEEE, 609--622 . Yunji Chen, Tao Luo, Shaoli Liu, Shijin Zhang, Liqiang He, Jia Wang, Ling Li, Tianshi Chen, Zhiwei Xu, Ninghui Sun, et al. 2014. Dadiannao: A machine-learning supercomputer. In 2014 47th Annual IEEE\/ACM International Symposium on Microarchitecture. IEEE, 609--622."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001177"},{"key":"e_1_3_2_1_14_1","volume-title":"Vijayalakshmi Srinivasan, and Kailash Gopalakrishnan.","author":"Choi Jungwook","year":"2018","unstructured":"Jungwook Choi , Zhuo Wang , Swagath Venkataramani , Pierce I-Jen Chuang , Vijayalakshmi Srinivasan, and Kailash Gopalakrishnan. 2018 . Pact : Parameterized clipping activation for quantized neural networks. arXiv preprint arXiv:1805.06085 (2018). Jungwook Choi, Zhuo Wang, Swagath Venkataramani, Pierce I-Jen Chuang, Vijayalakshmi Srinivasan, and Kailash Gopalakrishnan. 2018. Pact: Parameterized clipping activation for quantized neural networks. arXiv preprint arXiv:1805.06085 (2018)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00049"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD46524.2019.00075"},{"key":"e_1_3_2_1_17_1","volume-title":"DVABatch: Diversity-aware Multi-Entry Multi-Exit Batching for Efficient Processing of DNN Services on GPUs. In 2022 USENIX Annual Technical Conference (USENIX ATC 22)","author":"Cui Weihao","year":"2022","unstructured":"Weihao Cui , Han Zhao , Quan Chen , Hao Wei , Zirui Li , Deze Zeng , Chao Li , and Minyi Guo . 2022 . DVABatch: Diversity-aware Multi-Entry Multi-Exit Batching for Efficient Processing of DNN Services on GPUs. In 2022 USENIX Annual Technical Conference (USENIX ATC 22) . 183--198. Weihao Cui, Han Zhao, Quan Chen, Hao Wei, Zirui Li, Deze Zeng, Chao Li, and Minyi Guo. 2022. DVABatch: Diversity-aware Multi-Entry Multi-Exit Batching for Efficient Processing of DNN Services on GPUs. In 2022 USENIX Annual Technical Conference (USENIX ATC 22). 183--198."},{"key":"e_1_3_2_1_18_1","volume-title":"int8 (): 8-bit matrix multiplication for transformers at scale. arXiv preprint arXiv:2208.07339","author":"Dettmers Tim","year":"2022","unstructured":"Tim Dettmers , Mike Lewis , Younes Belkada , and Luke Zettlemoyer . 2022. Llm. int8 (): 8-bit matrix multiplication for transformers at scale. arXiv preprint arXiv:2208.07339 ( 2022 ). Tim Dettmers, Mike Lewis, Younes Belkada, and Luke Zettlemoyer. 2022. Llm. int8 (): 8-bit matrix multiplication for transformers at scale. arXiv preprint arXiv:2208.07339 (2022)."},{"key":"e_1_3_2_1_19_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin , Ming-Wei Chang , Kenton Lee , and Kristina Toutanova . 2018 . Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018). Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"Jesse Dodge Maarten Sap Ana Marasovi\u0107 William Agnew Gabriel Ilharco Dirk Groeneveld Margaret Mitchell and Matt Gardner. 2021. Documenting Large Webtext Corpora: A Case Study on the Colossal Clean Crawled Corpus. arXiv:arXiv:2104.08758  Jesse Dodge Maarten Sap Ana Marasovi\u0107 William Agnew Gabriel Ilharco Dirk Groeneveld Margaret Mitchell and Matt Gardner. 2021. Documenting Large Webtext Corpora: A Case Study on the Colossal Clean Crawled Corpus. arXiv:arXiv:2104.08758","DOI":"10.18653\/v1\/2021.emnlp-main.98"},{"key":"e_1_3_2_1_21_1","volume-title":"Hawq-v2: Hessian aware trace-weighted quantization of neural networks. Advances in neural information processing systems 33","author":"Dong Zhen","year":"2020","unstructured":"Zhen Dong , Zhewei Yao , Daiyaan Arfeen , Amir Gholami , Michael W Mahoney , and Kurt Keutzer . 2020. Hawq-v2: Hessian aware trace-weighted quantization of neural networks. Advances in neural information processing systems 33 ( 2020 ), 18518--18529. Zhen Dong, Zhewei Yao, Daiyaan Arfeen, Amir Gholami, Michael W Mahoney, and Kurt Keutzer. 2020. Hawq-v2: Hessian aware trace-weighted quantization of neural networks. Advances in neural information processing systems 33 (2020), 18518--18529."},{"key":"e_1_3_2_1_22_1","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision. 293--302","author":"Dong Zhen","year":"2019","unstructured":"Zhen Dong , Zhewei Yao , Amir Gholami , Michael W Mahoney , and Kurt Keutzer . 2019 . Hawq: Hessian aware quantization of neural networks with mixed-precision . In Proceedings of the IEEE\/CVF International Conference on Computer Vision. 293--302 . Zhen Dong, Zhewei Yao, Amir Gholami, Michael W Mahoney, and Kurt Keutzer. 2019. Hawq: Hessian aware quantization of neural networks with mixed-precision. In Proceedings of the IEEE\/CVF International Conference on Computer Vision. 293--302."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750389"},{"key":"e_1_3_2_1_24_1","volume-title":"AI and Memory Wall. RiseLab Medium Post","author":"Gholami Amir","year":"2021","unstructured":"Amir Gholami , Zhewei Yao , Sehoon Kim , Michael W Mahoney , and Kurt Keutzer . 2021. AI and Memory Wall. RiseLab Medium Post ( 2021 ). Amir Gholami, Zhewei Yao, Sehoon Kim, Michael W Mahoney, and Kurt Keutzer. 2021. AI and Memory Wall. RiseLab Medium Post (2021)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2014.106"},{"key":"e_1_3_2_1_26_1","volume-title":"How Far Does BERT Look At: Distance-based Clustering and Analysis of BERT ' s Attention. arXiv preprint arXiv:2011.00943","author":"Guan Yue","year":"2020","unstructured":"Yue Guan , Jingwen Leng , Chao Li , Quan Chen , and Minyi Guo . 2020. How Far Does BERT Look At: Distance-based Clustering and Analysis of BERT ' s Attention. arXiv preprint arXiv:2011.00943 ( 2020 ). Yue Guan, Jingwen Leng, Chao Li, Quan Chen, and Minyi Guo. 2020. How Far Does BERT Look At: Distance-based Clustering and Analysis of BERT ' s Attention. arXiv preprint arXiv:2011.00943 (2020)."},{"key":"e_1_3_2_1_27_1","volume-title":"Transkimmer: Transformer Learns to Layer-wise Skim. arXiv preprint arXiv:2205.07324","author":"Guan Yue","year":"2022","unstructured":"Yue Guan , Zhengyi Li , Jingwen Leng , Zhouhan Lin , and Minyi Guo . 2022 . Transkimmer: Transformer Learns to Layer-wise Skim. arXiv preprint arXiv:2205.07324 (2022). Yue Guan, Zhengyi Li, Jingwen Leng, Zhouhan Lin, and Minyi Guo. 2022. Transkimmer: Transformer Learns to Layer-wise Skim. arXiv preprint arXiv:2205.07324 (2022)."},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","volume":"36","author":"Guan Yue","year":"2022","unstructured":"Yue Guan , Zhengyi Li , Zhouhan Lin , Yuhao Zhu , Jingwen Leng , and Minyi Guo . 2022 . Block-skim: Efficient question answering for transformer . In Proceedings of the AAAI Conference on Artificial Intelligence , Vol. 36 . 10710--10719. Yue Guan, Zhengyi Li, Zhouhan Lin, Yuhao Zhu, Jingwen Leng, and Minyi Guo. 2022. Block-skim: Efficient question answering for transformer. In Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 36. 10710--10719."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.5555\/3433701.3433722"},{"key":"e_1_3_2_1_30_1","volume-title":"International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=JXhROKNZzOc","author":"Guo Cong","year":"2022","unstructured":"Cong Guo , Yuxian Qiu , Jingwen Leng , Xiaotian Gao , Chen Zhang , Yunxin Liu , Fan Yang , Yuhao Zhu , and Minyi Guo . 2022 . SQuant: On-the-Fly Data-Free Quantization via Diagonal Hessian Approximation . In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=JXhROKNZzOc Cong Guo, Yuxian Qiu, Jingwen Leng, Xiaotian Gao, Chen Zhang, Yunxin Liu, Fan Yang, Yuhao Zhu, and Minyi Guo. 2022. SQuant: On-the-Fly Data-Free Quantization via Diagonal Hessian Approximation. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=JXhROKNZzOc"},{"key":"e_1_3_2_1_31_1","volume-title":"Nesting Forward Automatic Differentiation for Memory-Efficient Deep Neural Network Training. In 2022 IEEE 40th International Conference on Computer Design (ICCD). IEEE, 738--745","author":"Guo Cong","year":"2022","unstructured":"Cong Guo , Yuxian Qiu , Jingwen Leng , Chen Zhang , Ying Cao , Quanlu Zhang , Yunxin Liu , Fan Yang , and Minyi Guo . 2022 . Nesting Forward Automatic Differentiation for Memory-Efficient Deep Neural Network Training. In 2022 IEEE 40th International Conference on Computer Design (ICCD). IEEE, 738--745 . Cong Guo, Yuxian Qiu, Jingwen Leng, Chen Zhang, Ying Cao, Quanlu Zhang, Yunxin Liu, Fan Yang, and Minyi Guo. 2022. Nesting Forward Automatic Differentiation for Memory-Efficient Deep Neural Network Training. In 2022 IEEE 40th International Conference on Computer Design (ICCD). IEEE, 738--745."},{"key":"e_1_3_2_1_32_1","volume-title":"ANT: Exploiting Adaptive Numerical Data Type for Low-bit Deep Neural Network Quantization. In 2022 55th IEEE\/ACM International Symposium on Microarchitecture (MICRO). IEEE, 1414--1433","author":"Guo Cong","year":"2022","unstructured":"Cong Guo , Chen Zhang , Jingwen Leng , Zihan Liu , Fan Yang , Yunxin Liu , Minyi Guo , and Yuhao Zhu . 2022 . ANT: Exploiting Adaptive Numerical Data Type for Low-bit Deep Neural Network Quantization. In 2022 55th IEEE\/ACM International Symposium on Microarchitecture (MICRO). IEEE, 1414--1433 . Cong Guo, Chen Zhang, Jingwen Leng, Zihan Liu, Fan Yang, Yunxin Liu, Minyi Guo, and Yuhao Zhu. 2022. ANT: Exploiting Adaptive Numerical Data Type for Low-bit Deep Neural Network Quantization. In 2022 55th IEEE\/ACM International Symposium on Microarchitecture (MICRO). IEEE, 1414--1433."},{"key":"e_1_3_2_1_33_1","volume-title":"ANT github repository. https:\/\/github.com\/clevercool\/ANT_Micro22","author":"Guo Cong","unstructured":"Cong Guo , Chen Zhang , Jingwen Leng , Zihan Liu , Fan Yang , Yunxin Liu , Minyi Guo , and Yuhao Zhu . 2022. ANT github repository. https:\/\/github.com\/clevercool\/ANT_Micro22 . Cong Guo, Chen Zhang, Jingwen Leng, Zihan Liu, Fan Yang, Yunxin Liu, Minyi Guo, and Yuhao Zhu. 2022. ANT github repository. https:\/\/github.com\/clevercool\/ANT_Micro22."},{"key":"e_1_3_2_1_34_1","volume-title":"2020 57th ACM\/IEEE Design Automation Conference (DAC). 1--6.","author":"Guo Cong","year":"2020","unstructured":"Cong Guo , Yangjie Zhou , Jingwen Leng , Yuhao Zhu , Zidong Du , Quan Chen , Chao Li , Bin Yao , and Minyi Guo . 2020 . Balancing Efficiency and Flexibility for DNN Acceleration via Temporal GPU-Systolic Array Integration . In 2020 57th ACM\/IEEE Design Automation Conference (DAC). 1--6. Cong Guo, Yangjie Zhou, Jingwen Leng, Yuhao Zhu, Zidong Du, Quan Chen, Chao Li, Bin Yao, and Minyi Guo. 2020. Balancing Efficiency and Flexibility for DNN Acceleration via Temporal GPU-Systolic Array Integration. In 2020 57th ACM\/IEEE Design Automation Conference (DAC). 1--6."},{"key":"e_1_3_2_1_35_1","volume-title":"International conference on machine learning. PMLR, 1737--1746","author":"Gupta Suyog","year":"2015","unstructured":"Suyog Gupta , Ankur Agrawal , Kailash Gopalakrishnan , and Pritish Narayanan . 2015 . Deep learning with limited numerical precision . In International conference on machine learning. PMLR, 1737--1746 . Suyog Gupta, Ankur Agrawal, Kailash Gopalakrishnan, and Pritish Narayanan. 2015. Deep learning with limited numerical precision. In International conference on machine learning. PMLR, 1737--1746."},{"key":"e_1_3_2_1_36_1","volume-title":"Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149","author":"Han Song","year":"2015","unstructured":"Song Han , Huizi Mao , and William J Dally . 2015. Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149 ( 2015 ). Song Han, Huizi Mao, and William J Dally. 2015. Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149 (2015)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00286"},{"key":"e_1_3_2_1_39_1","volume-title":"2019 56th ACM\/IEEE Design Automation Conference (DAC). IEEE, 1--6.","author":"Jain Shubham","year":"2019","unstructured":"Shubham Jain , Swagath Venkataramani , Vijayalakshmi Srinivasan , Jungwook Choi , Kailash Gopalakrishnan , and Leland Chang . 2019 . BiScaled-DNN: Quantizing long-tailed datastructures with two scale factors for deep neural networks . In 2019 56th ACM\/IEEE Design Automation Conference (DAC). IEEE, 1--6. Shubham Jain, Swagath Venkataramani, Vijayalakshmi Srinivasan, Jungwook Choi, Kailash Gopalakrishnan, and Leland Chang. 2019. BiScaled-DNN: Quantizing long-tailed datastructures with two scale factors for deep neural networks. In 2019 56th ACM\/IEEE Design Automation Conference (DAC). IEEE, 1--6."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359630"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00010"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00448"},{"key":"e_1_3_2_1_44_1","unstructured":"Andrew Kerr Haicheng Wu Manish Gupta Dustyn Blasig Pradeep Ramini Duane Merrill Aniket Shivam Piotr Majcher Paul Springer Markus Hohnerbach Jin Wang and Matt Nicely. 2022. CUTLASS. https:\/\/github.com\/NVIDIA\/cutlass  Andrew Kerr Haicheng Wu Manish Gupta Dustyn Blasig Pradeep Ramini Duane Merrill Aniket Shivam Piotr Majcher Paul Springer Markus Hohnerbach Jin Wang and Matt Nicely. 2022. CUTLASS. https:\/\/github.com\/NVIDIA\/cutlass"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00047"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00047"},{"key":"e_1_3_2_1_47_1","volume-title":"Logic synthesis using Synopsys\u00ae","author":"Kurup Pran","unstructured":"Pran Kurup and Taher Abbasi . 2012. Logic synthesis using Synopsys\u00ae . Springer Science & Business Media . Pran Kurup and Taher Abbasi. 2012. Logic synthesis using Synopsys\u00ae. Springer Science & Business Media."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/2508148.2485964"},{"key":"e_1_3_2_1_49_1","volume-title":"Bart: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. arXiv preprint arXiv:1910.13461","author":"Lewis Mike","year":"2019","unstructured":"Mike Lewis , Yinhan Liu , Naman Goyal , Marjan Ghazvininejad , Abdelrahman Mohamed , Omer Levy , Ves Stoyanov , and Luke Zettlemoyer . 2019 . Bart: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. arXiv preprint arXiv:1910.13461 (2019). Mike Lewis, Yinhan Liu, Naman Goyal, Marjan Ghazvininejad, Abdelrahman Mohamed, Omer Levy, Ves Stoyanov, and Luke Zettlemoyer. 2019. Bart: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. arXiv preprint arXiv:1910.13461 (2019)."},{"key":"e_1_3_2_1_50_1","volume-title":"Efficient Activation Quantization via Adaptive Rounding Border for Post-Training Quantization. arXiv preprint arXiv:2208.11945","author":"Li Zhengyi","year":"2022","unstructured":"Zhengyi Li , Cong Guo , Zhanda Zhu , Yangjie Zhou , Yuxian Qiu , Xiaotian Gao , Jingwen Leng , and Minyi Guo . 2022. Efficient Activation Quantization via Adaptive Rounding Border for Post-Training Quantization. arXiv preprint arXiv:2208.11945 ( 2022 ). Zhengyi Li, Cong Guo, Zhanda Zhu, Yangjie Zhou, Yuxian Qiu, Xiaotian Gao, Jingwen Leng, and Minyi Guo. 2022. Efficient Activation Quantization via Adaptive Rounding Border for Post-Training Quantization. arXiv preprint arXiv:2208.11945 (2022)."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/2694344.2694358"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507752"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2749475"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155650"},{"key":"e_1_3_2_1_55_1","unstructured":"ModelTC. 2022. repositories. https:\/\/huggingface.co\/ModelTC.  ModelTC. 2022. repositories. https:\/\/huggingface.co\/ModelTC."},{"key":"e_1_3_2_1_56_1","volume-title":"A tool to model large caches. HP laboratories 27","author":"Muralimanohar Naveen","year":"2009","unstructured":"Naveen Muralimanohar , Rajeev Balasubramonian , and Norman P Jouppi . 2009. CACTI 6.0 : A tool to model large caches. HP laboratories 27 ( 2009 ), 28. Naveen Muralimanohar, Rajeev Balasubramonian, and Norman P Jouppi. 2009. CACTI 6.0: A tool to model large caches. HP laboratories 27 (2009), 28."},{"key":"e_1_3_2_1_57_1","volume-title":"Yelysei Bondarenko, Mart van Baalen, and Tijmen Blankevoort.","author":"Nagel Markus","year":"2021","unstructured":"Markus Nagel , Marios Fournarakis , Rana Ali Amjad , Yelysei Bondarenko, Mart van Baalen, and Tijmen Blankevoort. 2021 . A white paper on neural network quantization. arXiv preprint arXiv:2106.08295 (2021). Markus Nagel, Marios Fournarakis, Rana Ali Amjad, Yelysei Bondarenko, Mart van Baalen, and Tijmen Blankevoort. 2021. A white paper on neural network quantization. arXiv preprint arXiv:2106.08295 (2021)."},{"key":"e_1_3_2_1_58_1","unstructured":"Nvidia. 2017. NVIDIA Tesla V100 GPU Architecture. In Technical report. NVIDIA.  Nvidia. 2017. NVIDIA Tesla V100 GPU Architecture. In Technical report. NVIDIA."},{"key":"e_1_3_2_1_59_1","unstructured":"Nvidia. 2018. NVIDIA Turing GPU Architecture. In Technical report. NVIDIA.  Nvidia. 2018. NVIDIA Turing GPU Architecture. In Technical report. NVIDIA."},{"key":"e_1_3_2_1_60_1","unstructured":"Nvidia. 2020. NVIDIA A100 tensor core architecture. In Technical report. NVIDIA.  Nvidia. 2020. NVIDIA A100 tensor core architecture. In Technical report. NVIDIA."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00063"},{"key":"e_1_3_2_1_62_1","volume-title":"Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke , Sam Gross , Francisco Massa , Adam Lerer , James Bradbury , Gregory Chanan , Trevor Killeen , Zeming Lin , Natalia Gimelshein , Luca Antiga , 2019 . Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32 (2019), 8026--8037. Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, et al. 2019. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32 (2019), 8026--8037."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD.2013.6657019"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00015"},{"key":"e_1_3_2_1_65_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Qiu Yuxian","year":"2019","unstructured":"Yuxian Qiu , Jingwen Leng , Cong Guo , Quan Chen , Chao Li , Minyi Guo , and Yuhao Zhu . 2019 . Adversarial Defense Through Network Profiling Based Path Extraction . In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). Yuxian Qiu, Jingwen Leng, Cong Guo, Quan Chen, Chao Li, Minyi Guo, and Yuhao Zhu. 2019. Adversarial Defense Through Network Profiling Based Path Extraction. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_1_66_1","unstructured":"Alec Radford Jeff Wu Rewon Child David Luan Dario Amodei and Ilya Sutskever. 2019. Language Models are Unsupervised Multitask Learners. (2019).  Alec Radford Jeff Wu Rewon Child David Luan Dario Amodei and Ilya Sutskever. 2019. Language Models are Unsupervised Multitask Learners. (2019)."},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2019.00016"},{"key":"e_1_3_2_1_68_1","volume-title":"Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing. Association for Computational Linguistics.","author":"Rajpurkar Pranav","year":"2016","unstructured":"Pranav Rajpurkar , Jian Zhang , Konstantin Lopyrev , and Percy Liang . 2016 . SQuAD: 100,000+ Questions for Machine Comprehension of Text . In Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing. Association for Computational Linguistics. Pranav Rajpurkar, Jian Zhang, Konstantin Lopyrev, and Percy Liang. 2016. SQuAD: 100,000+ Questions for Machine Comprehension of Text. In Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing. Association for Computational Linguistics."},{"key":"e_1_3_2_1_69_1","volume-title":"DeepScaleTool: A Tool for the Accurate Estimation of Technology Scaling in the Deep-Submicron Era. In 2021 IEEE International Symposium on Circuits and Systems (ISCAS). IEEE, 1--5.","author":"Sarangi Satyabrata","year":"2021","unstructured":"Satyabrata Sarangi and Bevan Baas . 2021 . DeepScaleTool: A Tool for the Accurate Estimation of Technology Scaling in the Deep-Submicron Era. In 2021 IEEE International Symposium on Circuits and Systems (ISCAS). IEEE, 1--5. Satyabrata Sarangi and Bevan Baas. 2021. DeepScaleTool: A Tool for the Accurate Estimation of Technology Scaling in the Deep-Submicron Era. In 2021 IEEE International Symposium on Circuits and Systems (ISCAS). IEEE, 1--5."},{"key":"e_1_3_2_1_70_1","volume-title":"Fran\u00e7ois Yvon, Matthias Gall\u00e9, et al.","author":"Scao Teven Le","year":"2022","unstructured":"Teven Le Scao , Angela Fan , Christopher Akiki , Ellie Pavlick , Suzana Ili\u0107 , Daniel Hesslow , Roman Castagn\u00e9 , Alexandra Sasha Luccioni , Fran\u00e7ois Yvon, Matthias Gall\u00e9, et al. 2022 . BLOOM : A 176B-Parameter Open-Access Multilingual Language Model . arXiv preprint arXiv:2211.05100 (2022). Teven Le Scao, Angela Fan, Christopher Akiki, Ellie Pavlick, Suzana Ili\u0107, Daniel Hesslow, Roman Castagn\u00e9, Alexandra Sasha Luccioni, Fran\u00e7ois Yvon, Matthias Gall\u00e9, et al. 2022. BLOOM: A 176B-Parameter Open-Access Multilingual Language Model. arXiv preprint arXiv:2211.05100 (2022)."},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783720"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00069"},{"key":"e_1_3_2_1_73_1","unstructured":"Hardik Sharma Jongse Park Naveen Suda Liangzhen Lai Benson Chau Vikas Chandra and Hadi Esmaeilzadeh. 2018. Bitfusion github repository. https:\/\/github.com\/hsharma35\/bitfusion.  Hardik Sharma Jongse Park Naveen Suda Liangzhen Lai Benson Chau Vikas Chandra and Hadi Esmaeilzadeh. 2018. Bitfusion github repository. https:\/\/github.com\/hsharma35\/bitfusion."},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6409"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00086"},{"key":"e_1_3_2_1_76_1","volume-title":"2020 57th ACM\/IEEE Design Automation Conference (DAC). IEEE, 1--6.","author":"Tambe Thierry","year":"2020","unstructured":"Thierry Tambe , En-Yu Yang , Zishen Wan , Yuntian Deng , Vijay Janapa Reddi , Alexander Rush , David Brooks , and Gu-Yeon Wei . 2020 . Algorithm-hardware co-design of adaptive floating-point encodings for resilient deep learning inference . In 2020 57th ACM\/IEEE Design Automation Conference (DAC). IEEE, 1--6. Thierry Tambe, En-Yu Yang, Zishen Wan, Yuntian Deng, Vijay Janapa Reddi, Alexander Rush, David Brooks, and Gu-Yeon Wei. 2020. Algorithm-hardware co-design of adaptive floating-point encodings for resilient deep learning inference. In 2020 57th ACM\/IEEE Design Automation Conference (DAC). IEEE, 1--6."},{"key":"e_1_3_2_1_77_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani , Noam Shazeer , Niki Parmar , Jakob Uszkoreit , Llion Jones , Aidan N Gomez , \u0141ukasz Kaiser , and Illia Polosukhin . 2017. Attention is all you need. Advances in neural information processing systems 30 ( 2017 ). Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_78_1","volume-title":"GLUE: A multi-task benchmark and analysis platform for natural language understanding. arXiv preprint arXiv:1804.07461","author":"Wang Alex","year":"2018","unstructured":"Alex Wang , Amanpreet Singh , Julian Michael , Felix Hill , Omer Levy , and Samuel R Bowman . 2018 . GLUE: A multi-task benchmark and analysis platform for natural language understanding. arXiv preprint arXiv:1804.07461 (2018). Alex Wang, Amanpreet Singh, Julian Michael, Felix Hill, Omer Levy, and Samuel R Bowman. 2018. GLUE: A multi-task benchmark and analysis platform for natural language understanding. arXiv preprint arXiv:1804.07461 (2018)."},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00881"},{"key":"e_1_3_2_1_80_1","volume-title":"2021 ACM\/IEEE 48th Annual International Symposium on Computer Architecture (ISCA). IEEE, 1083--1095","author":"Wang Yang","year":"2021","unstructured":"Yang Wang , Chen Zhang , Zhiqiang Xie , Cong Guo , Yunxin Liu , and Jingwen Leng . 2021 . Dual-side sparse tensor core . In 2021 ACM\/IEEE 48th Annual International Symposium on Computer Architecture (ISCA). IEEE, 1083--1095 . Yang Wang, Chen Zhang, Zhiqiang Xie, Cong Guo, Yunxin Liu, and Jingwen Leng. 2021. Dual-side sparse tensor core. In 2021 ACM\/IEEE 48th Annual International Symposium on Computer Architecture (ISCA). IEEE, 1083--1095."},{"key":"e_1_3_2_1_81_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 568--577","author":"Wang Ziwei","year":"2019","unstructured":"Ziwei Wang , Jiwen Lu , Chenxin Tao , Jie Zhou , and Qi Tian . 2019 . Learning channel-wise interactions for binary convolutional neural networks . In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 568--577 . Ziwei Wang, Jiwen Lu, Chenxin Tao, Jie Zhou, and Qi Tian. 2019. Learning channel-wise interactions for binary convolutional neural networks. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 568--577."},{"key":"e_1_3_2_1_82_1","unstructured":"Xiuying Wei Yunchen Zhang Xiangguo Zhang Ruihao Gong Shanghang Zhang Qi Zhang Fengwei Yu and Xianglong Liu. 2022. Outlier Suppression: Pushing the Limit of Low-bit Transformer Language Models. In Advances in Neural Information Processing Systems Alice H. Oh Alekh Agarwal Danielle Belgrave and Kyunghyun Cho (Eds.). https:\/\/openreview.net\/forum?id=yW5zeRSFdZ  Xiuying Wei Yunchen Zhang Xiangguo Zhang Ruihao Gong Shanghang Zhang Qi Zhang Fengwei Yu and Xianglong Liu. 2022. Outlier Suppression: Pushing the Limit of Low-bit Transformer Language Models. In Advances in Neural Information Processing Systems Alice H. Oh Alekh Agarwal Danielle Belgrave and Kyunghyun Cho (Eds.). https:\/\/openreview.net\/forum?id=yW5zeRSFdZ"},{"key":"e_1_3_2_1_83_1","volume-title":"The Free Encyclopedia. [Online]","author":"Wikipedia Wikipedia","unstructured":"Wikipedia contributors. 2022. 68-95-99.7 rule --- Wikipedia , The Free Encyclopedia. [Online] . Wikipedia contributors. 2022. 68-95-99.7 rule --- Wikipedia, The Free Encyclopedia. [Online]."},{"key":"e_1_3_2_1_84_1","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485974"},{"key":"e_1_3_2_1_85_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00071"},{"key":"e_1_3_2_1_86_1","doi-asserted-by":"publisher","DOI":"10.1109\/EMC2-NIPS53020.2019.00016"},{"key":"e_1_3_2_1_87_1","doi-asserted-by":"publisher","DOI":"10.1145\/2684746.2689060"},{"key":"e_1_3_2_1_88_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01237-3_23"},{"key":"e_1_3_2_1_89_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783723"},{"key":"e_1_3_2_1_90_1","volume-title":"Xi Victoria Lin, et al","author":"Zhang Susan","year":"2022","unstructured":"Susan Zhang , Stephen Roller , Naman Goyal , Mikel Artetxe , Moya Chen , Shuohui Chen , Christopher Dewan , Mona Diab , Xian Li , Xi Victoria Lin, et al . 2022 . Opt : Open pre-trained transformer language models. arXiv preprint arXiv:2205.01068 (2022). Susan Zhang, Stephen Roller, Naman Goyal, Mikel Artetxe, Moya Chen, Shuohui Chen, Christopher Dewan, Mona Diab, Xian Li, Xi Victoria Lin, et al. 2022. Opt: Open pre-trained transformer language models. arXiv preprint arXiv:2205.01068 (2022)."},{"key":"e_1_3_2_1_91_1","doi-asserted-by":"publisher","DOI":"10.5555\/3488766.3488815"},{"key":"e_1_3_2_1_92_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378508"},{"key":"e_1_3_2_1_93_1","volume-title":"Dorefa-net: Training low bitwidth convolutional neural networks with low bitwidth gradients. arXiv preprint arXiv:1606.06160","author":"Zhou Shuchang","year":"2016","unstructured":"Shuchang Zhou , Yuxin Wu , Zekun Ni , Xinyu Zhou , He Wen , and Yuheng Zou . 2016 . Dorefa-net: Training low bitwidth convolutional neural networks with low bitwidth gradients. arXiv preprint arXiv:1606.06160 (2016). Shuchang Zhou, Yuxin Wu, Zekun Ni, Xinyu Zhou, He Wen, and Yuheng Zou. 2016. Dorefa-net: Training low bitwidth convolutional neural networks with low bitwidth gradients. arXiv preprint arXiv:1606.06160 (2016)."},{"key":"e_1_3_2_1_94_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00011"},{"key":"e_1_3_2_1_95_1","volume-title":"Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","volume":"2","author":"Zhou Yangjie","year":"2023","unstructured":"Yangjie Zhou , Jingwen Leng , Yaoxu Song , Shuwen Lu , Mian Wang , Chao Li , Minyi Guo , Wenting Shen , Yong Li , Wei Lin , 2023 . uGrapher: High-Performance Graph Operator Computation via Unified Abstraction for Graph Neural Networks . In Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems , Volume 2 . 878--891. Yangjie Zhou, Jingwen Leng, Yaoxu Song, Shuwen Lu, Mian Wang, Chao Li, Minyi Guo, Wenting Shen, Yong Li, Wei Lin, et al. 2023. uGrapher: High-Performance Graph Operator Computation via Unified Abstraction for Graph Neural Networks. In Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2. 878--891."},{"key":"e_1_3_2_1_96_1","volume-title":"2021 IEEE International Symposium on Workload Characterization (IISWC). IEEE, 214--225","author":"Zhou Yangjie","year":"2021","unstructured":"Yangjie Zhou , Mengtian Yang , Cong Guo , Jingwen Leng , Yun Liang , Quan Chen , Minyi Guo , and Yuhao Zhu . 2021 . Characterizing and demystifying the implicit convolution algorithm on commercial matrix-multiplication accelerators . In 2021 IEEE International Symposium on Workload Characterization (IISWC). IEEE, 214--225 . Yangjie Zhou, Mengtian Yang, Cong Guo, Jingwen Leng, Yun Liang, Quan Chen, Minyi Guo, and Yuhao Zhu. 2021. Characterizing and demystifying the implicit convolution algorithm on commercial matrix-multiplication accelerators. In 2021 IEEE International Symposium on Workload Characterization (IISWC). IEEE, 214--225."},{"key":"e_1_3_2_1_97_1","volume-title":"ROLLER: Fast and Efficient Tensor Compilation for Deep Learning. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Zhu Hongyu","year":"2022","unstructured":"Hongyu Zhu , Ruofan Wu , Yijia Diao , Shanbin Ke , Haoyu Li , Chen Zhang , Jilong Xue , Lingxiao Ma , Yuqing Xia , Wei Cui , Fan Yang , Mao Yang , Lidong Zhou , Asaf Cidon , and Gennady Pekhimenko . 2022 . ROLLER: Fast and Efficient Tensor Compilation for Deep Learning. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22) . 233--248. Hongyu Zhu, Ruofan Wu, Yijia Diao, Shanbin Ke, Haoyu Li, Chen Zhang, Jilong Xue, Lingxiao Ma, Yuqing Xia, Wei Cui, Fan Yang, Mao Yang, Lidong Zhou, Asaf Cidon, and Gennady Pekhimenko. 2022. ROLLER: Fast and Efficient Tensor Compilation for Deep Learning. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22). 233--248."},{"key":"e_1_3_2_1_98_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358269"},{"key":"e_1_3_2_1_99_1","volume-title":"Effective training of convolutional neural networks with low-bitwidth weights and activations","author":"Zhuang Bohan","year":"2021","unstructured":"Bohan Zhuang , Mingkui Tan , Jing Liu , Lingqiao Liu , Ian Reid , and Chunhua Shen . 2021. Effective training of convolutional neural networks with low-bitwidth weights and activations . IEEE Transactions on Pattern Analysis and Machine Intelligence ( 2021 ). Bohan Zhuang, Mingkui Tan, Jing Liu, Lingqiao Liu, Ian Reid, and Chunhua Shen. 2021. Effective training of convolutional neural networks with low-bitwidth weights and activations. IEEE Transactions on Pattern Analysis and Machine Intelligence (2021)."}],"event":{"name":"ISCA '23: 50th Annual International Symposium on Computer Architecture","location":"Orlando FL USA","acronym":"ISCA '23","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","IEEE"]},"container-title":["Proceedings of the 50th Annual International Symposium on Computer Architecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3579371.3589038","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:46:38Z","timestamp":1750178798000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3579371.3589038"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,17]]},"references-count":99,"alternative-id":["10.1145\/3579371.3589038","10.1145\/3579371"],"URL":"https:\/\/doi.org\/10.1145\/3579371.3589038","relation":{},"subject":[],"published":{"date-parts":[[2023,6,17]]},"assertion":[{"value":"2023-06-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}