{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T16:17:34Z","timestamp":1758125854966,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":15,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,23]],"date-time":"2024-06-23T00:00:00Z","timestamp":1719100800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Research Foundation of Korea (NRF)","award":["NRF-2022R1C1C1008074"],"award-info":[{"award-number":["NRF-2022R1C1C1008074"]}]},{"name":"Institute of Information and Communications Technology Planning and Evaluation (IITP)","award":["2021-0-00105","2021-0-00106","RS-2022-00155911"],"award-info":[{"award-number":["2021-0-00105","2021-0-00106","RS-2022-00155911"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,23]]},"DOI":"10.1145\/3649329.3655907","type":"proceedings-article","created":{"date-parts":[[2024,11,7]],"date-time":"2024-11-07T19:27:22Z","timestamp":1731007642000},"page":"1-6","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["MERSIT: A Hardware-Efficient 8-bit Data Format with Enhanced Post-Training Quantization DNN Accuracy"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3470-835X","authenticated-orcid":false,"given":"Nguyen-Dong","family":"Ho","sequence":"first","affiliation":[{"name":"Kyunghee University, Yongin-si, Gyeonggi-do, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-3534-8452","authenticated-orcid":false,"given":"Gyujun","family":"Jeong","sequence":"additional","affiliation":[{"name":"Kyunghee University, Yongin-si, Gyeonggi-do, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5157-5126","authenticated-orcid":false,"given":"Cheol-Min","family":"Kang","sequence":"additional","affiliation":[{"name":"Kyunghee University, Yongin-si, Gyeonggi-do, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3125-9707","authenticated-orcid":false,"given":"Seungkyu","family":"Choi","sequence":"additional","affiliation":[{"name":"Kyung Hee University, Yongin-si, Gyeonggi-do, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8871-8695","authenticated-orcid":false,"given":"Ik-Joon","family":"Chang","sequence":"additional","affiliation":[{"name":"Kyunghee University, Yongin-si, Gyeonggi-do, Republic of Korea"}]}],"member":"320","published-online":{"date-parts":[[2024,11,7]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Michael Andersch et al. 2022. NVIDIA Hopper Architecture In-Depth. Technical Report. NVIDIA Corporation."},{"volume-title":"Deep Positron: A Deep Neural Network Using the Posit Number System. In DATE '19","author":"Zachariah","key":"e_1_3_2_1_2_1","unstructured":"Zachariah Carmichael et al. 2019. Deep Positron: A Deep Neural Network Using the Posit Number System. In DATE '19."},{"key":"e_1_3_2_1_3_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arXiv:1810.04805 [cs.CL]","author":"Jacob Devlin","year":"2019","unstructured":"Jacob Devlin et al. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arXiv:1810.04805 [cs.CL]"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Amir Gholami et al. 2021. A Survey of Quantization Methods for Efficient Neural Network Inference. arXiv:2103.13630 [cs.CV]","DOI":"10.1201\/9781003162810-13"},{"volume-title":"ANT: Exploiting Adaptive Numerical Data Type for Low-bit Deep Neural Network Quantization. In MICRO '22","author":"Cong","key":"e_1_3_2_1_5_1","unstructured":"Cong Guo et al. 2022. ANT: Exploiting Adaptive Numerical Data Type for Low-bit Deep Neural Network Quantization. In MICRO '22."},{"key":"e_1_3_2_1_6_1","unstructured":"Gustafson and Yonemoto. 2017. Beating Floating Point at Its Own Game: Posit Arithmetic. Supercomput. Front. Innov.: Int. J. (2017)."},{"volume-title":"Computer Arithmetic and Validity: Theory, Implementation, and Applications. De Gruyter","author":"Kulisch Ulrich","key":"e_1_3_2_1_7_1","unstructured":"Ulrich Kulisch. 2012. Computer Arithmetic and Validity: Theory, Implementation, and Applications. De Gruyter, Berlin, Boston."},{"key":"e_1_3_2_1_8_1","volume-title":"Langroudi et al","author":"Hamed","year":"2020","unstructured":"Hamed F. Langroudi et al. 2020. Adaptive Posit: Parameter aware numerical format for deep learning inference on the edge. In CVPR '20."},{"key":"e_1_3_2_1_9_1","volume-title":"AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration. arXiv:2306.00978 [cs.CL]","author":"Ji Lin","year":"2023","unstructured":"Ji Lin et al. 2023. AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration. arXiv:2306.00978 [cs.CL]"},{"volume-title":"PD-Quant: Post-Training Quantization Based on Prediction Difference Metric. In CVPR '23","author":"Jiawei","key":"e_1_3_2_1_10_1","unstructured":"Jiawei Liu et al. 2023. PD-Quant: Post-Training Quantization Based on Prediction Difference Metric. In CVPR '23."},{"volume-title":"Algorithm-Hardware Co-Design of Adaptive Floating-Point Encodings for Resilient Deep Learning Inference. In DAC '20","author":"Thierry","key":"e_1_3_2_1_11_1","unstructured":"Thierry Tambe et al. 2020. Algorithm-Hardware Co-Design of Adaptive Floating-Point Encodings for Resilient Deep Learning Inference. In DAC '20."},{"key":"e_1_3_2_1_12_1","volume-title":"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks. In ICML '19","author":"Tan Mingxing","year":"2019","unstructured":"Mingxing Tan and Quoc Le. 2019. EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks. In ICML '19."},{"key":"e_1_3_2_1_13_1","unstructured":"Mart van Baalen et al. 2023. FP8 versus INT8 for efficient deep learning inference. arXiv:2303.17951 [cs.LG]"},{"volume-title":"QDrop: Randomly Dropping Quantization for Extremely Low-bit Post-Training Quantization. In ICLR '22","author":"Xiuying","key":"e_1_3_2_1_14_1","unstructured":"Xiuying Wei et al. 2022. QDrop: Randomly Dropping Quantization for Extremely Low-bit Post-Training Quantization. In ICLR '22."},{"volume-title":"Be Like Water: Adaptive Floating Point for Machine Learning. In ICML '22","author":"Thomas","key":"e_1_3_2_1_15_1","unstructured":"Thomas Yeh et al. 2022. Be Like Water: Adaptive Floating Point for Machine Learning. In ICML '22."}],"event":{"name":"DAC '24: 61st ACM\/IEEE Design Automation Conference","sponsor":["SIGDA ACM Special Interest Group on Design Automation","IEEE-CEDA","SIGBED ACM Special Interest Group on Embedded Systems"],"location":"San Francisco CA USA","acronym":"DAC '24"},"container-title":["Proceedings of the 61st ACM\/IEEE Design Automation Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3649329.3655907","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3649329.3655907","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:48Z","timestamp":1750295868000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3649329.3655907"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,23]]},"references-count":15,"alternative-id":["10.1145\/3649329.3655907","10.1145\/3649329"],"URL":"https:\/\/doi.org\/10.1145\/3649329.3655907","relation":{},"subject":[],"published":{"date-parts":[[2024,6,23]]},"assertion":[{"value":"2024-11-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}