{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,14]],"date-time":"2025-05-14T18:10:08Z","timestamp":1747246208479,"version":"3.40.5"},"reference-count":45,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"Eindhoven Engine"},{"DOI":"10.13039\/100017850","name":"NXP Semiconductors","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100017850","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Brainport Eindhoven"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/access.2025.3567046","type":"journal-article","created":{"date-parts":[[2025,5,5]],"date-time":"2025-05-05T17:57:11Z","timestamp":1746467831000},"page":"81434-81449","source":"Crossref","is-referenced-by-count":0,"title":["POQ: Is There a Pareto-Optimal Quantization Strategy for Deep Neural Networks?"],"prefix":"10.1109","volume":"13","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-9369-6532","authenticated-orcid":false,"given":"Floran","family":"De Putter","sequence":"first","affiliation":[{"name":"Department of Electrical Engineering, Electronic Systems Group, Eindhoven University of Technology, Eindhoven, The Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8210-2323","authenticated-orcid":false,"given":"Sherif","family":"Eissa","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering, Electronic Systems Group, Eindhoven University of Technology, Eindhoven, The Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4506-5732","authenticated-orcid":false,"given":"Henk","family":"Corporaal","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering, Electronic Systems Group, Eindhoven University of Technology, Eindhoven, The Netherlands"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14539"},{"key":"ref2","article-title":"A white paper on neural network quantization","author":"Nagel","year":"2021","journal-title":"arXiv:2106.08295"},{"key":"ref3","first-page":"3085","article-title":"Pareto-optimal quantized ResNet is mostly 4-bit","volume-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit. Workshops (CVPRW)","author":"Abdolrashidi"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-68238-5_7"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ACSSC.2017.8335699"},{"key":"ref6","first-page":"1","article-title":"Pruning vs quantization: Which is better?","volume-title":"Proc. 37th Int. Conf. Neural Inf. Process. Syst.","author":"Kuzmin"},{"key":"ref7","first-page":"373","article-title":"Quantization: How far should we go?","volume-title":"Proc. 25th Euromicro Conf. Digit. Syst. Design (DSD)","author":"De Putter"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"9785","DOI":"10.1109\/ACCESS.2021.3050670","article-title":"Efficiency versus accuracy: A review of design techniques for DNN hardware accelerators","volume":"9","author":"Latotzke","year":"2021","journal-title":"IEEE Access"},{"key":"ref9","first-page":"4510","article-title":"MobileNetV2: Inverted residuals and linear bottlenecks","volume-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit.","author":"Sandler"},{"key":"ref10","first-page":"1","article-title":"Learned step size quantization","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Esser"},{"key":"ref11","first-page":"1","article-title":"MQBench: Towards reproducible and deployable model quantization benchmark","volume-title":"Proc. Neural Inf. Process. Syst.","author":"Li"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00286"},{"key":"ref13","first-page":"873","article-title":"VS-quant: Per-vector scaled quantization for accurate low-precision neural network inference","volume-title":"Proc. Mach. Learn. Syst.","author":"Dai"},{"key":"ref14","first-page":"16","article-title":"A 17\u201395.6 TOPS\/W deep learning inference accelerator with per-vector scaled 4-bit quantization for transformers in 5nm","volume-title":"Proc. IEEE Symp. VLSI Technol. Circuits (VLSI Technol. Circuits)","author":"Keller"},{"key":"ref15","first-page":"578","article-title":"TVM: An automated end-to-end optimizing compiler for deep learning","volume-title":"Proc. 13th USENIX Conf. Operating Syst. Design Implement.","author":"Chen"},{"key":"ref16","article-title":"FBGEMM: Enabling high-performance low-precision deep learning inference","author":"Khudia","year":"2021","journal-title":"arXiv:2101.05615"},{"key":"ref17","first-page":"355","article-title":"Stream: A modeling framework for fine-grained layer fusion on multi-core DNN accelerators","volume-title":"Proc. IEEE Int. Symp. Perform. Anal. Syst. Softw. (ISPASS)","author":"Symons"},{"issue":"8","key":"ref18","doi-asserted-by":"crossref","first-page":"1160","DOI":"10.1109\/TC.2021.3059962","article-title":"ZigZag: Enlarging joint architecture-mapping design space exploration for DNN accelerators","volume":"70","author":"Mei","year":"2021","journal-title":"IEEE Trans. Comput."},{"key":"ref19","first-page":"01","article-title":"System-level design and integration of a prototype AR\/VR hardware featuring a custom low-power DNN accelerator chip in 7nm technology for codec avatars","volume-title":"Proc. IEEE Custom Integr. Circuits Conf. (CICC)","author":"Sumbul"},{"key":"ref20","first-page":"363","article-title":"Understanding the energy consumption of dynamic random access memories","volume-title":"Proc. 43rd Annu. IEEE\/ACM Int. Symp. Microarchitecture","author":"Vogelsang"},{"key":"ref21","first-page":"751","article-title":"Tetris","volume-title":"Proc. 22nd Int. Conf. Architectural Support Program. Lang. Operating Syst.","author":"L\u00fc"},{"issue":"11","key":"ref22","doi-asserted-by":"crossref","first-page":"2461","DOI":"10.1109\/TCSVT.2016.2592330","article-title":"Origami: A 803-GOp\/s\/W convolutional network accelerator","volume":"27","author":"Cavigelli","year":"2017","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"ref23","first-page":"1","article-title":"Ten lessons from three generations shaped Google\u2019s TPUv4i: Industrial product","volume-title":"Proc. ACM\/IEEE 48th Annu. Int. Symp. Comput. Archit. (ISCA)","author":"Jouppi"},{"key":"ref24","first-page":"248","article-title":"ImageNet: A large-scale hierarchical image database","volume-title":"Proc. IEEE Conf. Comput. Vis. Pattern Recognit.","author":"Deng"},{"key":"ref25","first-page":"770","article-title":"Deep residual learning for image recognition","volume-title":"Proc. IEEE Conf. Comput. Vis. Pattern Recognit. (CVPR)","author":"He"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-39932-9_5"},{"key":"ref27","first-page":"747","article-title":"Bi-real net: Enhancing the performance of 1-bit CNNs with improved representational capability and advanced training algorithm","volume-title":"Proc. Eur. Conf. Comput. Vis. (ECCV)","author":"Liu"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58568-6_9"},{"key":"ref29","first-page":"12465","article-title":"PokeBNN: A binary pursuit of lightweight accuracy","volume-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit. (CVPR)","author":"Zhang"},{"key":"ref30","first-page":"1","article-title":"Training binary neural networks with real-to-binary convolutions","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Mart\u00ednez"},{"key":"ref31","first-page":"78","article-title":"BrainTTA: A 28.6 TOPS\/W compiler programmable transport-triggered NN SoC","volume-title":"Proc. IEEE 41st Int. Conf. Comput. Design (ICCD)","author":"Molendijk"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/VLSIT.2012.6242474"},{"issue":"2","key":"ref33","doi-asserted-by":"crossref","first-page":"692","DOI":"10.1109\/TCSI.2020.3037892","article-title":"A 96-MB 3D-stacked SRAM using inductive coupling with 0.4-V transmitter, termination scheme and 12:1 SerDes in 40-nm CMOS","volume":"68","author":"Shiba","year":"2021","journal-title":"IEEE Trans. Circuits Syst. I, Reg. Papers"},{"key":"ref34","first-page":"340","article-title":"RANA: Towards efficient neural acceleration with refresh-optimized embedded DRAM","volume-title":"Proc. ACM\/IEEE 45th Annu. Int. Symp. Comput. Archit. (ISCA)","author":"Tu"},{"key":"ref35","first-page":"1","article-title":"The efficiency misnomer","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Dehghani"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"volume-title":"Segmentation Models Pytorch","year":"2019","author":"Iakubovskii","key":"ref37"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-009-0275-4"},{"volume-title":"Open Compute Project \u2022 OCP Microscaling Formats (MX) Specification OCP Microscaling Formats (MX) Specification","year":"2023","author":"Rouhani et al","key":"ref39"},{"volume-title":"NVIDIA Blackwell Architecture Technical Overview","year":"2025","key":"ref40"},{"key":"ref41","article-title":"XNOR-Net++: Improved binary neural networks","author":"Bulat","year":"2019","journal-title":"arXiv:1909.13863"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00517"},{"key":"ref43","article-title":"1-bit AI infra: Part 1.1, fast and lossless BitNet b1.58 inference on CPUs","author":"Wang","year":"2024","journal-title":"arXiv:2410.16144"},{"key":"ref44","first-page":"1","article-title":"MobileViT: Light-weight, general-purpose, and mobile-friendly vision transformer","volume-title":"Proc. 10th Int. Conf. Learn. Represent.","author":"Mehta"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/SOCC62300.2024.10737844"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6287639\/10820123\/10988610.pdf?arnumber=10988610","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,14]],"date-time":"2025-05-14T17:33:34Z","timestamp":1747244014000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10988610\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":45,"URL":"https:\/\/doi.org\/10.1109\/access.2025.3567046","relation":{},"ISSN":["2169-3536"],"issn-type":[{"type":"electronic","value":"2169-3536"}],"subject":[],"published":{"date-parts":[[2025]]}}}