{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:44:25Z","timestamp":1766220265232,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":52,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,9,8]]},"DOI":"10.1145\/3754598.3754660","type":"proceedings-article","created":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:34:32Z","timestamp":1766219672000},"page":"428-438","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["SmartBlock: Adaptive Block Floating Point Quantization for Efficient DNN Acceleration"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-0875-196X","authenticated-orcid":false,"given":"Xin","family":"Ju","sequence":"first","affiliation":[{"name":"National University of Defense Technology, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-1462-0397","authenticated-orcid":false,"given":"Jingkui","family":"Yang","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5875-3297","authenticated-orcid":false,"given":"Mei","family":"Wen","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-9830-2663","authenticated-orcid":false,"given":"Jun","family":"He","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3854-5336","authenticated-orcid":false,"given":"Jing","family":"Feng","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-2254-4912","authenticated-orcid":false,"given":"Minjin","family":"Tang","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1552-8396","authenticated-orcid":false,"given":"Zhaoyun","family":"Chen","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5786-3171","authenticated-orcid":false,"given":"Yang","family":"Shi","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, Changsha, China"}]}],"member":"320","published-online":{"date-parts":[[2025,12,20]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"2008. IEEE Standard for Floating-Point Arithmetic. IEEE Std 754-2008 (2008) 1\u201370."},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","unstructured":"Yu-Hsin Chen Tien-Ju Yang Joel\u00a0S. Emer et\u00a0al. 2019. Eyeriss v2: A Flexible Accelerator for Emerging Deep Neural Networks on Mobile Devices. IEEE J. Emerg. Sel. Topics Circuits Syst. 9 2 292\u2013308. 10.1109\/JETCAS.2019.2910232","DOI":"10.1109\/JETCAS.2019.2910232"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"crossref","unstructured":"Jack Choquette Wishwesh Gandhi Olivier Giroux et\u00a0al. 2021. NVIDIA A100 Tensor Core GPU: Performance and Innovation. IEEE Micro 41 2 (2021) 29\u201335.","DOI":"10.1109\/MM.2021.3061394"},{"key":"e_1_3_3_1_5_2","volume-title":"Proceedings of the Fourth Conference on Machine Learning and Systems, MLSys","author":"Dai Steve","year":"2021","unstructured":"Steve Dai, Rangharajan Venkatesan, Mark Ren, et\u00a0al. 2021. VS-Quant: Per-vector Scaled Quantization for Accurate Low-Precision Neural Network Inference. In Proceedings of the Fourth Conference on Machine Learning and Systems, MLSys. mlsys.org."},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3614246"},{"key":"e_1_3_3_1_8_2","volume-title":"Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS","author":"Dettmers Tim","year":"2022","unstructured":"Tim Dettmers, Mike Lewis, Younes Belkada, et\u00a0al. 2022. GPT3.int8(): 8-bit Matrix Multiplication for Transformers at Scale. In Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS, Sanmi Koyejo, S.\u00a0Mohamed, A.\u00a0Agarwal, Danielle Belgrave, K.\u00a0Cho, and A.\u00a0Oh (Eds.)."},{"key":"e_1_3_3_1_9_2","first-page":"4171","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, et\u00a0al. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT, Jill Burstein, Christy Doran, and Thamar Solorio (Eds.). Association for Computational Linguistics, 4171\u20134186."},{"key":"e_1_3_3_1_10_2","first-page":"332","volume-title":"IEEE International Solid- State Circuits Conference, ISSCC","author":"Du Cheng-Yan","year":"2023","unstructured":"Cheng-Yan Du, Chieh-Fu Tsai, Wen-Ching Chen, et\u00a0al. 2023. A 28nm 11.2TOPS\/W Hardware-Utilization-Aware Neural-Network Accelerator with Dynamic Dataflow. In IEEE International Solid- State Circuits Conference, ISSCC. IEEE, 332\u2013333."},{"key":"e_1_3_3_1_11_2","first-page":"1","volume-title":"5th IEEE International Conference on Artificial Intelligence Circuits and Systems, AICAS","author":"Filippas Dionysios","year":"2023","unstructured":"Dionysios Filippas, Christodoulos Peltekis, Giorgos Dimitrakopoulos, et\u00a0al. 2023. Reduced-Precision Floating-Point Arithmetic in Systolic Arrays with Skewed Pipelines. In 5th IEEE International Conference on Artificial Intelligence Circuits and Systems, AICAS. IEEE, 1\u20135."},{"key":"e_1_3_3_1_12_2","first-page":"3:1\u20133:15","volume-title":"Proceedings of the 50th Annual International Symposium on Computer Architecture, ISCA","author":"Guo Cong","year":"2023","unstructured":"Cong Guo, Jiaming Tang, Weiming Hu, et\u00a0al. 2023. OliVe: Accelerating Large Language Models via Hardware-friendly Outlier-Victim Pair Quantization. In Proceedings of the 50th Annual International Symposium on Computer Architecture, ISCA, Yan Solihin and Mark\u00a0A. Heinrich (Eds.). ACM, 3:1\u20133:15."},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO56248.2022.00095"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_3_1_15_2","first-page":"674","volume-title":"45th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA","author":"Hegde Kartik","year":"2018","unstructured":"Kartik Hegde, Jiyong Yu, Rohit Agrawal, et\u00a0al. 2018. UCNN: Exploiting Computational Reuse in Deep Neural Networks via Weight Repetition. In 45th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA, Murali Annavaram, Timothy\u00a0Mark Pinkston, and Babak Falsafi (Eds.). IEEE Computer Society, 674\u2013687."},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"crossref","unstructured":"Pouya Houshmand Giuseppe\u00a0Maria Sarda Vikram Jain et\u00a0al. 2023. DIANA: An End-to-End Hybrid DIgital and ANAlog Neural Network SoC for the Edge. IEEE J. Solid State Circuits 58 1 (2023) 203\u2013215.","DOI":"10.1109\/JSSC.2022.3214064"},{"key":"e_1_3_3_1_17_2","first-page":"2704","volume-title":"2018 IEEE Conference on Computer Vision and Pattern Recognition, CVPR","author":"Jacob Benoit","year":"2018","unstructured":"Benoit Jacob, Skirmantas Kligys, Bo Chen, et\u00a0al. 2018. Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference. In 2018 IEEE Conference on Computer Vision and Pattern Recognition, CVPR. Computer Vision Foundation \/ IEEE Computer Society, 2704\u20132713."},{"key":"e_1_3_3_1_18_2","first-page":"82:1\u201382:14","volume-title":"Proceedings of the 50th Annual International Symposium on Computer Architecture, ISCA","author":"Jouppi Norman\u00a0P.","year":"2023","unstructured":"Norman\u00a0P. Jouppi, George Kurian, Sheng Li, et\u00a0al. 2023. TPU v4: An Optically Reconfigurable Supercomputer for Machine Learning with Hardware Support for Embeddings. In Proceedings of the 50th Annual International Symposium on Computer Architecture, ISCA, Yan Solihin and Mark\u00a0A. Heinrich (Eds.). ACM, 82:1\u201382:14."},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_3_1_20_2","first-page":"1","volume-title":"Design, Automation & Test in Europe Conference, DATE","author":"Ju Xin","year":"2025","unstructured":"Xin Ju, Jun He, Mei Wen, et\u00a0al. 2025. WinAcc: Window-based Acceleration of Neural Networks Using Block Floating Point. In Design, Automation & Test in Europe Conference, DATE. IEEE, 1\u20137."},{"key":"e_1_3_3_1_21_2","unstructured":"Dhiraj\u00a0D. Kalamkar Dheevatsa Mudigere Naveen Mellempudi et\u00a0al. 2019. A Study of BFLOAT16 for Deep Learning Training. CoRR abs\/1905.12322 (2019)."},{"key":"e_1_3_3_1_22_2","first-page":"1","volume-title":"2023 IFIP\/IEEE 31st International Conference on Very Large Scale Integration (VLSI-SoC)","author":"Karimzadeh Foroozan","year":"2023","unstructured":"Foroozan Karimzadeh, Mohsen Imani, Bahar Asgari, et\u00a0al. 2023. Memory-Based Computing for Energy-Efficient AI: Grand Challenges. In 2023 IFIP\/IEEE 31st International Conference on Very Large Scale Integration (VLSI-SoC). 1\u20138."},{"key":"e_1_3_3_1_23_2","unstructured":"Paresh Kharya. 2020. TensorFloat-32 in the A100 GPU Accelerates AI Training HPC up to 20x. NVIDIA Corporation Tech. Rep (2020)."},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA57654.2024.00069"},{"key":"e_1_3_3_1_25_2","volume-title":"Proceedings of the Seventh Annual Conference on Machine Learning and Systems, MLSys","author":"Lin Ji","year":"2024","unstructured":"Ji Lin, Jiaming Tang, Haotian Tang, et\u00a0al. 2024. AWQ: Activation-aware Weight Quantization for On-Device LLM Compression and Acceleration. In Proceedings of the Seventh Annual Conference on Machine Learning and Systems, MLSys. mlsys.org."},{"key":"e_1_3_3_1_26_2","first-page":"1029","volume-title":"IEEE International Symposium on High-Performance Computer Architecture, HPCA","author":"Liu Fangxin","year":"2024","unstructured":"Fangxin Liu, Ning Yang, Haomin Li, et\u00a0al. 2024. SPARK: Scalable and Precision-Aware Acceleration of Neural Networks via Efficient Encoding. In IEEE International Symposium on High-Performance Computer Architecture, HPCA. IEEE, 1029\u20131042."},{"key":"e_1_3_3_1_27_2","first-page":"140:1\u2013140:6","volume-title":"Proceedings of the 61st ACM\/IEEE Design Automation Conference, DAC","author":"Liu Lian","year":"2024","unstructured":"Lian Liu, Zhaohui Xu, Yintao He, et\u00a0al. 2024. Drift: Leveraging Distribution-based Dynamic Precision Quantization for Efficient Deep Neural Network Acceleration. In Proceedings of the 61st ACM\/IEEE Design Automation Conference, DAC. ACM, 140:1\u2013140:6."},{"key":"e_1_3_3_1_28_2","volume-title":"The Eleventh International Conference on Learning Representations, ICLR","author":"Lo Yun-Chen","year":"2023","unstructured":"Yun-Chen Lo, Tse-Kuang Lee, and Ren-Shuo Liu. 2023. Block and Subword-Scaling Floating-Point (BSFP) : An Efficient Non-Uniform Quantization For Low Precision Inference. In The Eleventh International Conference on Learning Representations, ICLR. OpenReview.net."},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3614249"},{"key":"e_1_3_3_1_30_2","first-page":"1","volume-title":"60th ACM\/IEEE Design Automation Conference, DAC","author":"Lo Yun-Chen","year":"2023","unstructured":"Yun-Chen Lo and Ren-Shuo Liu. 2023. Morphable CIM: Improving Operation Intensity and Depthwise Capability for SRAM-CIM Architecture. In 60th ACM\/IEEE Design Automation Conference, DAC. IEEE, 1\u20136."},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"crossref","unstructured":"Sparsh Mittal. 2020. A survey on modeling and improving reliability of DNN algorithms and accelerators. Journal of Systems Architecture 104 (2020) 101689.","DOI":"10.1016\/j.sysarc.2019.101689"},{"key":"e_1_3_3_1_32_2","unstructured":"Naveen Muralimanohar Rajeev Balasubramonian and Norman\u00a0P. Jouppi. 2009. CACTI 6.0: A Tool to Model Large Caches."},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00063"},{"key":"e_1_3_3_1_34_2","first-page":"8024","volume-title":"Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, et\u00a0al. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS, Hanna\u00a0M. Wallach, Hugo Larochelle, Alina Beygelzimer, Florence d\u2019Alch\u00e9-Buc, Emily\u00a0B. Fox, and Roman Garnett (Eds.). 8024\u20138035."},{"key":"e_1_3_3_1_35_2","first-page":"1","volume-title":"Design, Automation & Test in Europe Conference & Exhibition, DATE","author":"Peltekis Christodoulos","year":"2023","unstructured":"Christodoulos Peltekis, Dionysios Filippas, Giorgos Dimitrakopoulos, et\u00a0al. 2023. ArrayFlex: A Systolic Array Architecture with Configurable Transparent Pipelining. In Design, Automation & Test in Europe Conference & Exhibition, DATE. IEEE, 1\u20136."},{"key":"e_1_3_3_1_36_2","first-page":"436 \u2013 447","volume-title":"Advanced Signal Processing Algorithms, Architectures, and Implementations XII","author":"Pillmeier Matthew\u00a0R.","year":"2002","unstructured":"Matthew\u00a0R. Pillmeier, Michael\u00a0J. Schulte, and Eugene George\u00a0Walters III. 2002. Design alternatives for barrel shifters. In Advanced Signal Processing Algorithms, Architectures, and Implementations XII, Franklin\u00a0T. Luk (Ed.), Vol.\u00a04791. International Society for Optics and Photonics, SPIE, 436 \u2013 447."},{"key":"e_1_3_3_1_37_2","first-page":"1353","volume-title":"57th IEEE\/ACM International Symposium on Microarchitecture, MICRO","author":"Prabhakar Raghu","year":"2024","unstructured":"Raghu Prabhakar, Ram Sivaramakrishnan, Darshan Gandhi, et\u00a0al. 2024. SambaNova SN40L: Scaling the AI Memory Wall with Dataflow and Composition of Experts. In 57th IEEE\/ACM International Symposium on Microarchitecture, MICRO. IEEE, 1353\u20131366."},{"key":"e_1_3_3_1_38_2","first-page":"784","volume-title":"Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics, ACL","author":"Rajpurkar Pranav","year":"2018","unstructured":"Pranav Rajpurkar, Robin Jia, and Percy Liang. 2018. Know What You Don\u2019t Know: Unanswerable Questions for SQuAD. In Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics, ACL, Iryna Gurevych and Yusuke Miyao (Eds.). Association for Computational Linguistics, 784\u2013789."},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"crossref","first-page":"2383","DOI":"10.18653\/v1\/D16-1264","volume-title":"Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing, EMNLP","author":"Rajpurkar Pranav","year":"2016","unstructured":"Pranav Rajpurkar, Jian Zhang, Konstantin Lopyrev, et\u00a0al. 2016. SQuAD: 100, 000+ Questions for Machine Comprehension of Text. In Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing, EMNLP, Jian Su, Xavier Carreras, and Kevin Duh (Eds.). The Association for Computational Linguistics, 2383\u20132392."},{"key":"e_1_3_3_1_40_2","volume-title":"Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020","author":"Rouhani Bita\u00a0Darvish","year":"2020","unstructured":"Bita\u00a0Darvish Rouhani, Daniel Lo, Ritchie Zhao, et\u00a0al. 2020. Pushing the Limits of Narrow Precision Inferencing at Cloud Scale with Microsoft Floating Point. In Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020."},{"key":"e_1_3_3_1_41_2","first-page":"83:1\u201383:13","volume-title":"Proceedings of the 50th Annual International Symposium on Computer Architecture, ISCA","author":"Rouhani Bita\u00a0Darvish","year":"2023","unstructured":"Bita\u00a0Darvish Rouhani, Ritchie Zhao, Venmugil Elango, et\u00a0al. 2023. With Shared Microexponents, A Little Shifting Goes a Long Way. In Proceedings of the 50th Annual International Symposium on Computer Architecture, ISCA. ACM, 83:1\u201383:13."},{"key":"e_1_3_3_1_42_2","first-page":"58","volume-title":"IEEE International Symposium on Performance Analysis of Systems and Software, ISPASS","author":"Samajdar Ananda","year":"2020","unstructured":"Ananda Samajdar, Jan\u00a0Moritz Joseph, Yuhao Zhu, et\u00a0al. 2020. A Systematic Methodology for Characterizing Scalability of DNN Accelerators using SCALE-Sim. In IEEE International Symposium on Performance Analysis of Systems and Software, ISPASS. IEEE, 58\u201368."},{"key":"e_1_3_3_1_43_2","first-page":"17:1\u201317:12","volume-title":"49th Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO","author":"Sharma Hardik","year":"2016","unstructured":"Hardik Sharma, Jongse Park, Divya Mahajan, et\u00a0al. 2016. From high-level deep neural models to FPGAs. In 49th Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO. IEEE Computer Society, 17:1\u201317:12."},{"key":"e_1_3_3_1_44_2","first-page":"764","volume-title":"45th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA","author":"Sharma Hardik","year":"2018","unstructured":"Hardik Sharma, Jongse Park, Naveen Suda, et\u00a0al. 2018. Bit Fusion: Bit-Level Dynamically Composable Architecture for Accelerating Deep Neural Network. In 45th ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA, Murali Annavaram, Timothy\u00a0Mark Pinkston, and Babak Falsafi (Eds.). IEEE Computer Society, 764\u2013775."},{"key":"e_1_3_3_1_45_2","doi-asserted-by":"crossref","first-page":"1010","DOI":"10.1109\/ISCA45697.2020.00086","volume-title":"2020 ACM\/IEEE 47th Annual International Symposium on Computer Architecture (ISCA)","author":"Song Zhuoran","year":"2020","unstructured":"Zhuoran Song, Bangqi Fu, Feiyang Wu, et\u00a0al. 2020. DRQ: Dynamic Region-based Quantization for Deep Neural Network Acceleration. In 2020 ACM\/IEEE 47th Annual International Symposium on Computer Architecture (ISCA). 1010\u20131021."},{"key":"e_1_3_3_1_46_2","doi-asserted-by":"crossref","first-page":"353","DOI":"10.18653\/v1\/W18-5446","volume-title":"Proceedings of the Workshop: Analyzing and Interpreting Neural Networks for NLP, BlackboxNLP@EMNLP","author":"Wang Alex","year":"2018","unstructured":"Alex Wang, Amanpreet Singh, Julian Michael, et\u00a0al. 2018. GLUE: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding. In Proceedings of the Workshop: Analyzing and Interpreting Neural Networks for NLP, BlackboxNLP@EMNLP, Tal Linzen, Grzegorz Chrupala, and Afra Alishahi (Eds.). Association for Computational Linguistics, 353\u2013355."},{"key":"e_1_3_3_1_47_2","first-page":"44","volume-title":"Information Processing, Proceedings of the 1st International Conference on Information Processing, UNESCO","author":"Wilkinson James\u00a0Hardy","year":"1959","unstructured":"James\u00a0Hardy Wilkinson. 1959. Rounding errors in algebraic processes. In Information Processing, Proceedings of the 1st International Conference on Information Processing, UNESCO. UNESCO (Paris), 44\u201353."},{"key":"e_1_3_3_1_48_2","series-title":"Proceedings of Machine Learning Research","first-page":"38087","volume-title":"International Conference on Machine Learning, ICML","volume":"202","author":"Xiao Guangxuan","year":"2023","unstructured":"Guangxuan Xiao, Ji Lin, Micka\u00ebl Seznec, et\u00a0al. 2023. SmoothQuant: Accurate and Efficient Post-Training Quantization for Large Language Models. In International Conference on Machine Learning, ICML(Proceedings of Machine Learning Research, Vol.\u00a0202), Andreas Krause, Emma Brunskill, Kyunghyun Cho, Barbara Engelhardt, Sivan Sabato, and Jonathan Scarlett (Eds.). PMLR, 38087\u201338099."},{"key":"e_1_3_3_1_49_2","first-page":"811","volume-title":"53rd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO","author":"Zadeh Ali\u00a0Hadi","year":"2020","unstructured":"Ali\u00a0Hadi Zadeh, Isak Edo, Omar\u00a0Mohamed Awad, et\u00a0al. 2020. GOBO: Quantizing Attention-Based NLP Models for Low Latency and Energy Efficient Inference. In 53rd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO. IEEE, 811\u2013824."},{"key":"e_1_3_3_1_50_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA53966.2022.00067"},{"key":"e_1_3_3_1_51_2","first-page":"861","volume-title":"IEEE International Symposium on High-Performance Computer Architecture, HPCA","author":"Zhang Sai\u00a0Qian","year":"2024","unstructured":"Sai\u00a0Qian Zhang, Thierry Tambe, Nestor Cuevas, et\u00a0al. 2024. CAMEL: Co-Designing AI Models and eDRAMs for Efficient On-Device Learning. In IEEE International Symposium on High-Performance Computer Architecture, HPCA. IEEE, 861\u2013875."},{"key":"e_1_3_3_1_52_2","volume-title":"Proceedings of the Seventh Annual Conference on Machine Learning and Systems, MLSys","author":"Zhao Yilong","year":"2024","unstructured":"Yilong Zhao, Chien-Yu Lin, Kan Zhu, et\u00a0al. 2024. Atom: Low-Bit Quantization for Efficient and Accurate LLM Serving. In Proceedings of the Seventh Annual Conference on Machine Learning and Systems, MLSys, Phillip\u00a0B. Gibbons, Gennady Pekhimenko, and Christopher\u00a0De Sa (Eds.). mlsys.org."},{"key":"e_1_3_3_1_53_2","first-page":"124","volume-title":"IEEE International Symposium on High-Performance Computer Architecture, HPCA","author":"Zhu Zeyu","year":"2024","unstructured":"Zeyu Zhu, Fanrong Li, Gang Li, et\u00a0al. 2024. MEGA: A Memory-Efficient GNN Accelerator Exploiting Degree-Aware Mixed-Precision Quantization. In IEEE International Symposium on High-Performance Computer Architecture, HPCA. IEEE, 124\u2013138."}],"event":{"name":"ICPP '25: 54th International Conference on Parallel Processing","location":"San Diego CA USA","acronym":"ICPP '25"},"container-title":["Proceedings of the 54th International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3754598.3754660","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:39:32Z","timestamp":1766219972000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3754598.3754660"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,8]]},"references-count":52,"alternative-id":["10.1145\/3754598.3754660","10.1145\/3754598"],"URL":"https:\/\/doi.org\/10.1145\/3754598.3754660","relation":{},"subject":[],"published":{"date-parts":[[2025,9,8]]},"assertion":[{"value":"2025-12-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}