{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:40:08Z","timestamp":1750297208751,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":32,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,27]],"date-time":"2024-10-27T00:00:00Z","timestamp":1729987200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"name":"Samsung Advanced Institute of Technology, Samsung Electronics Co., Ltd."},{"name":"IITP Grant through Artificial Intelligence Graduate School Program","award":["UNIST","2020-0-01336"],"award-info":[{"award-number":["UNIST","2020-0-01336"]}]},{"name":"NRF Grants through National R&D Program","award":["RS-2024-00360300","RS-2023-00258527"],"award-info":[{"award-number":["RS-2024-00360300","RS-2023-00258527"]}]},{"name":"Brain Korea 21 Four"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,27]]},"DOI":"10.1145\/3676536.3676790","type":"proceedings-article","created":{"date-parts":[[2025,4,9]],"date-time":"2025-04-09T12:53:56Z","timestamp":1744203236000},"page":"1-7","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["FlexInt: A New Number Format for Robust Sub-8-Bit Neural Network Inference"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-2498-2467","authenticated-orcid":false,"given":"Minuk","family":"Hong","sequence":"first","affiliation":[{"name":"Ulsan National Institute of Science and Technology (UNIST), Ulsan, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7214-0506","authenticated-orcid":false,"given":"Hyeonuk","family":"Sim","sequence":"additional","affiliation":[{"name":"Samsung Advanced Institute of Technology (SAIT), Suwon, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3092-6501","authenticated-orcid":false,"given":"Sugil","family":"Lee","sequence":"additional","affiliation":[{"name":"Dept. of Electrical Engineering, Ulsan National Institute of Science and Technology (UNIST), Ulsan, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1523-2974","authenticated-orcid":false,"given":"Jongeun","family":"Lee","sequence":"additional","affiliation":[{"name":"Dept. of Electrical Engineering, Ulsan National Institute of Science and Technology (UNIST), Ulsan, Republic of Korea"}]}],"member":"320","published-online":{"date-parts":[[2025,4,9]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Memory-efficient fine-tuning of compressed large language models via sub-4-bit integer quantization,\" Advances in Neural Information Processing Systems","author":"Kim J.","year":"2024","unstructured":"J. Kim, J. H. Lee, S. Kim, J. Park, K. M. Yoo, S. J. Kwon, and D. Lee, \"Memory-efficient fine-tuning of compressed large language models via sub-4-bit integer quantization,\" Advances in Neural Information Processing Systems, vol. 36, 2024."},{"key":"e_1_3_2_1_2_1","first-page":"2704","article-title":"Quantization and training of neural networks for efficient integer-arithmetic-only inference","author":"Jacob B.","year":"2018","unstructured":"B. Jacob, S. Kligys, B. Chen, M. Zhu, M. Tang, A. Howard, H. Adam, and D. Kalenichenko, \"Quantization and training of neural networks for efficient integer-arithmetic-only inference,\" in Proceedings of the IEEE conference on computer vision and pattern recognition, 2018, pp. 2704--2713.","journal-title":"Proceedings of the IEEE conference on computer vision and pattern recognition"},{"key":"e_1_3_2_1_3_1","volume-title":"Integer quantization for deep learning inference: Principles and empirical evaluation,\" arXiv preprint arXiv:2004.09602","author":"Wu H.","year":"2020","unstructured":"H. Wu, P. Judd, X. Zhang, M. Isaev, and P. Micikevicius, \"Integer quantization for deep learning inference: Principles and empirical evaluation,\" arXiv preprint arXiv:2004.09602, 2020."},{"key":"e_1_3_2_1_4_1","volume-title":"Fp8 versus int8 for efficient deep learning inference,\" arXiv preprint arXiv:2303.17951","author":"van Baalen M.","year":"2023","unstructured":"M. van Baalen, A. Kuzmin, S. S. Nair, Y. Ren, E. Mahurin, C. Patel, S. Subramanian, S. Lee, M. Nagel, J. Soriaga, and T. Blankevoort, \"Fp8 versus int8 for efficient deep learning inference,\" arXiv preprint arXiv:2303.17951, 2023."},{"key":"e_1_3_2_1_5_1","first-page":"3091","article-title":"Pareto-optimal quantized resnet is mostly 4-bit","author":"Abdolrashidi A.","year":"2021","unstructured":"A. Abdolrashidi, L. Wang, S. Agrawal, J. Malmaud, O. Rybakov, C. Leichner, and L. Lew, \"Pareto-optimal quantized resnet is mostly 4-bit,\" in Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2021, pp. 3091--3099.","journal-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_6_1","DOI":"10.1609\/aaai.v37i9.26354"},{"key":"e_1_3_2_1_7_1","first-page":"4852","article-title":"Differentiable soft quantization: Bridging full-precision and low-bit neural networks","author":"Gong R.","year":"2019","unstructured":"R. Gong, X. Liu, S. Jiang, T. Li, P. Hu, J. Lin, F. Yu, and J. Yan, \"Differentiable soft quantization: Bridging full-precision and low-bit neural networks,\" in Proceedings of the IEEE\/CVF international conference on computer vision, 2019, pp. 4852--4861.","journal-title":"Proceedings of the IEEE\/CVF international conference on computer vision"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_8_1","DOI":"10.1109\/MM.2021.3061394"},{"key":"e_1_3_2_1_9_1","volume-title":"H. Wallach, H. Larochelle, A. Beygelzimer, F. d'Alch\u00e9-Buc","author":"Sun X.","year":"2019","unstructured":"X. Sun, J. Choi, C.-Y. Chen, N. Wang, S. Venkataramani, V. V. Srinivasan, X. Cui, W. Zhang, and K. Gopalakrishnan, \"Hybrid 8-bit floating point (hfp8) training and inference for deep neural networks,\" in Advances in Neural Information Processing Systems, H. Wallach, H. Larochelle, A. Beygelzimer, F. d'Alch\u00e9-Buc, E. Fox, and R. Garnett, Eds., vol. 32. Curran Associates, Inc., 2019. [Online]. Available: https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2019\/file\/65fc9fb4897a89789352e211ca2d398f-Paper.pdf"},{"key":"e_1_3_2_1_10_1","volume-title":"Fp8 formats for deep learning,\" arXiv preprint arXiv:2209.05433","author":"Micikevicius P.","year":"2022","unstructured":"P. Micikevicius, D. Stosic, N. Burgess, M. Cornea, P. Dubey, R. Grisenthwaite, S. Ha, A. Heinecke, P. Judd, J. Kamalu, N. Mellempudi, S. Oberman, M. Shoeybi, M. Siu, and H. Wu, \"Fp8 formats for deep learning,\" arXiv preprint arXiv:2209.05433, 2022."},{"volume-title":"Beating floating point at its own game: Posit arithmetic,\" Supercomputing frontiers and innovations","author":"Gustafson J. L.","unstructured":"J. L. Gustafson and I. T. Yonemoto, \"Beating floating point at its own game: Posit arithmetic,\" Supercomputing frontiers and innovations, vol. 4, no. 2, pp. 71--86, 2017.","key":"e_1_3_2_1_11_1"},{"key":"e_1_3_2_1_12_1","volume-title":"Additive powers-of-two quantization: An efficient non-uniform discretization for neural networks,\" in International Conference on Learning Representations","author":"Li Y.","year":"2019","unstructured":"Y. Li, X. Dong, and W. Wang, \"Additive powers-of-two quantization: An efficient non-uniform discretization for neural networks,\" in International Conference on Learning Representations, 2019."},{"key":"e_1_3_2_1_13_1","volume-title":"Non-uniform step size quantization for accurate post-training quantization,\" in Proceedings of the 17th European Conference on Computer Vision (ECCV)","author":"Oh S.","year":"2022","unstructured":"S. Oh, H. Sim, J. Kim, and J. Lee, \"Non-uniform step size quantization for accurate post-training quantization,\" in Proceedings of the 17th European Conference on Computer Vision (ECCV). Springer International Publishing, 2022."},{"key":"e_1_3_2_1_14_1","first-page":"742","article-title":"Automated log-scale quantization for low-cost deep neural networks","author":"Oh S.","year":"2021","unstructured":"S. Oh, H. Sim, S. Lee, and J. Lee, \"Automated log-scale quantization for low-cost deep neural networks,\" in Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), June 2021, pp. 742--751.","journal-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"},{"key":"e_1_3_2_1_15_1","first-page":"1","volume-title":"IEEE","author":"Lee S.","year":"2019","unstructured":"S. Lee, H. Sim, J. Choi, and J. Lee, \"Successive log quantization for cost-efficient neural networks using stochastic computing,\" in 2019 56th ACM\/IEEE Design Automation Conference (DAC). IEEE, 2019, pp. 1--6."},{"key":"e_1_3_2_1_16_1","first-page":"451","volume-title":"NIPS'18","author":"Drumond M.","year":"2018","unstructured":"M. Drumond, T. Lin, M. Jaggi, and B. Falsafi, \"Training dnns with hybrid block floating point,\" in Proceedings of the 32nd International Conference on Neural Information Processing Systems, ser. NIPS'18. Red Hook, NY, USA: Curran Associates Inc., 2018, p. 451--461."},{"key":"e_1_3_2_1_17_1","first-page":"764","volume-title":"Bit fusion: Bit-level dynamically composable architecture for accelerating deep neural network,\" in 2018 ACM\/IEEE 45th Annual International Symposium on Computer Architecture (ISCA)","author":"Sharma H.","year":"2018","unstructured":"H. Sharma, J. Park, N. Suda, L. Lai, B. Chau, J. K. Kim, V. Chandra, and H. Esmaeilzadeh, \"Bit fusion: Bit-level dynamically composable architecture for accelerating deep neural network,\" in 2018 ACM\/IEEE 45th Annual International Symposium on Computer Architecture (ISCA), 2018, pp. 764--775."},{"key":"e_1_3_2_1_18_1","first-page":"1","volume-title":"Jun. 2018","author":"Sim H.","unstructured":"H. Sim, S. Kenzhegulov, and J. Lee**, \"DPS: Dynamic precision scaling for stochastic computing-based deep neural networks,\" in the 55th Annual ACM\/IEEE Design Automation Conference (DAC), Jun. 2018, pp. 13:1--13:6."},{"key":"e_1_3_2_1_19_1","first-page":"353","volume-title":"GLUE: A multi-task benchmark and analysis platform for natural language understanding,\" in Proceedings of the 2018 EMNLP Workshop BlackboxNLP: Analyzing and Interpreting Neural Networks for NLP","author":"Wang A.","year":"2018","unstructured":"A. Wang, A. Singh, J. Michael, F. Hill, O. Levy, and S. Bowman, \"GLUE: A multi-task benchmark and analysis platform for natural language understanding,\" in Proceedings of the 2018 EMNLP Workshop BlackboxNLP: Analyzing and Interpreting Neural Networks for NLP, T. Linzen, G. Chrupa\u0142a, and A. Alishahi, Eds. Brussels, Belgium: Association for Computational Linguistics, Nov. 2018, pp. 353--355. [Online]. Available: https:\/\/aclanthology.org\/W18-5446"},{"key":"e_1_3_2_1_20_1","first-page":"651","article-title":"Fp8 quantization: The power of the exponent","volume":"35","author":"Kuzmin A.","year":"2022","unstructured":"A. Kuzmin, M. Van Baalen, Y. Ren, M. Nagel, J. Peters, and T. Blankevoort, \"Fp8 quantization: The power of the exponent,\" Advances in Neural Information Processing Systems, vol. 35, pp. 14 651--14 662, 2022.","journal-title":"Advances in Neural Information Processing Systems"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_21_1","DOI":"10.1515\/9783110203196"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_22_1","DOI":"10.1016\/0743-7315(88)90020-2"},{"key":"e_1_3_2_1_23_1","first-page":"770","article-title":"Deep residual learning for image recognition","author":"He K.","year":"2016","unstructured":"K. He, X. Zhang, S. Ren, and J. Sun, \"Deep residual learning for image recognition,\" in Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2016, pp. 770--778.","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)"},{"key":"e_1_3_2_1_24_1","first-page":"1314","volume-title":"Searching for mobilenetv3,\" in 2019 IEEE\/CVF International Conference on Computer Vision (ICCV)","author":"Howard A.","year":"2019","unstructured":"A. Howard, M. Sandler, B. Chen, W. Wang, L.-C. Chen, M. Tan, G. Chu, V. Vasudevan, Y. Zhu, R. Pang, H. Adam, and Q. Le, \"Searching for mobilenetv3,\" in 2019 IEEE\/CVF International Conference on Computer Vision (ICCV), 2019, pp. 1314--1324."},{"key":"e_1_3_2_1_25_1","first-page":"934","article-title":"Efficientformer: Vision transformers at mobilenet speed","volume":"35","author":"Li Y.","year":"2022","unstructured":"Y. Li, G. Yuan, Y. Wen, J. Hu, G. Evangelidis, S. Tulyakov, Y. Wang, and J. Ren, \"Efficientformer: Vision transformers at mobilenet speed,\" Advances in Neural Information Processing Systems, vol. 35, pp. 12 934--12 949, 2022.","journal-title":"Advances in Neural Information Processing Systems"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_26_1","DOI":"10.18653\/v1\/n19-1423"},{"volume-title":"PyTorch's computer vision library,\" https:\/\/github.com\/pytorch\/vision","year":"2016","unstructured":"\"TorchVision: PyTorch's computer vision library,\" https:\/\/github.com\/pytorch\/vision, 2016.","key":"e_1_3_2_1_27_1"},{"key":"e_1_3_2_1_28_1","first-page":"38","volume-title":"Eds. Online: Association for Computational Linguistics","author":"Wolf T.","year":"2020","unstructured":"T. Wolf, L. Debut, V. Sanh, J. Chaumond, C. Delangue, A. Moi, P. Cistac, T. Rault, R. Louf, M. Funtowicz, J. Davison, S. Shleifer, P. von Platen, C. Ma, Y. Jernite, J. Plu, C. Xu, T. Le Scao, S. Gugger, M. Drame, Q. Lhoest, and A. Rush, \"Transformers: State-of-the-art natural language processing,\" in Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, Q. Liu and D. Schlangen, Eds. Online: Association for Computational Linguistics, Oct. 2020, pp. 38--45. [Online]. Available: https:\/\/aclanthology.org\/2020.emnlp-demos.6"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_29_1","DOI":"10.1145\/2654822.2541967"},{"key":"e_1_3_2_1_30_1","first-page":"15","volume-title":"IEEE","author":"Jang J.-W.","year":"2021","unstructured":"J.-W. Jang, S. Lee, D. Kim, H. Park, A. S. Ardestani, Y. Choi, C. Kim, Y. Kim, H. Yu, H. Abdel-Aziz et al., \"Sparsity-aware and re-configurable npu architecture for samsung flagship mobile soc,\" in 2021 ACM\/IEEE 48th Annual International Symposium on Computer Architecture (ISCA). IEEE, 2021, pp. 15--28."},{"volume-title":"Performance indicators in multiobjective optimization,\" European journal of operational research","author":"Audet C.","unstructured":"C. Audet, J. Bigeon, D. Cartier, S. Le Digabel, and L. Salomon, \"Performance indicators in multiobjective optimization,\" European journal of operational research, vol. 292, no. 2, pp. 397--422, 2021.","key":"e_1_3_2_1_31_1"},{"key":"e_1_3_2_1_32_1","first-page":"530","volume-title":"IEEE","author":"Ishibuchi H.","year":"2009","unstructured":"H. Ishibuchi, N. Tsukamoto, Y. Sakane, and Y. Nojima, \"Hypervolume approximation using achievement scalarizing functions for evolutionary many-objective optimization,\" in 2009 IEEE Congress on Evolutionary Computation. IEEE, 2009, pp. 530--537."}],"event":{"sponsor":["SIGDA ACM Special Interest Group on Design Automation","IEEE CAS","IEEE CEDA","IEEE EDS"],"acronym":"ICCAD '24","name":"ICCAD '24: 43rd IEEE\/ACM International Conference on Computer-Aided Design","location":"Newark Liberty International Airport Marriott New York NY USA"},"container-title":["Proceedings of the 43rd IEEE\/ACM International Conference on Computer-Aided Design"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3676536.3676790","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3676536.3676790","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:44Z","timestamp":1750295924000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3676536.3676790"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,27]]},"references-count":32,"alternative-id":["10.1145\/3676536.3676790","10.1145\/3676536"],"URL":"https:\/\/doi.org\/10.1145\/3676536.3676790","relation":{},"subject":[],"published":{"date-parts":[[2024,10,27]]},"assertion":[{"value":"2025-04-09","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}