{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T14:47:49Z","timestamp":1776955669924,"version":"3.51.4"},"reference-count":78,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T00:00:00Z","timestamp":1675209600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T00:00:00Z","timestamp":1675209600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100002809","name":"Generalitat de Catalunya","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100007170","name":"Ministry of Economy","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100007170","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,2]]},"DOI":"10.1109\/hpca56546.2023.10071076","type":"proceedings-article","created":{"date-parts":[[2023,3,24]],"date-time":"2023-03-24T17:42:55Z","timestamp":1679679775000},"page":"1085-1098","source":"Crossref","is-referenced-by-count":29,"title":["Mix-GEMM: An efficient HW-SW Architecture for Mixed-Precision Quantized Deep Neural Networks Inference on Edge Devices"],"prefix":"10.1109","author":[{"given":"Enrico","family":"Reggiani","sequence":"first","affiliation":[{"name":"Polytechnic University of Catalonia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alessandro","family":"Pappalardo","sequence":"additional","affiliation":[{"name":"AMD AECG Research Labs,Dublin,Ireland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Max","family":"Doblas","sequence":"additional","affiliation":[{"name":"Polytechnic University of Catalonia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Miquel","family":"Moreto","sequence":"additional","affiliation":[{"name":"Polytechnic University of Catalonia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mauro","family":"Olivieri","sequence":"additional","affiliation":[{"name":"Barcelona Supercomputing Center"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Osman Sabri","family":"Unsal","sequence":"additional","affiliation":[{"name":"Barcelona Supercomputing Center"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Adri\u00e1n","family":"Cristal","sequence":"additional","affiliation":[{"name":"Polytechnic University of Catalonia"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","volume-title":"Deep learning networks"},{"key":"ref2","article-title":"PyTorch Conv2d layer"},{"key":"ref3","article-title":"TensorFlow: Large-scale machine learning on heterogeneous systems","author":"Abadi"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW53098.2021.00345"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2017.2682138"},{"key":"ref6","first-page":"1074","article-title":"Batchquant: Quantized-for-all architecture search with robust quantizer","volume":"34","author":"Bai","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref7","article-title":"Onnx: Open neural network exchange","author":"Bai","year":"2019"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW50498.2020.00356"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/0885-064X(86)90001-4"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3242897"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3387902.3394038"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2021.3066883"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TCSII.2020.2983648"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/S0747-7171(89)80004-5"},{"key":"ref15","article-title":"High performance convolutional neural networks for document processing","volume-title":"Tenth international workshop on frontiers in handwriting recognition","author":"Chellapilla"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2019.2921977"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2016.2616357"},{"key":"ref18","article-title":"cudnn: Efficient primitives for deep learning","author":"Chetlur","year":"2014"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2020.2976475"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-10-5209-5"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ipdpsw50202.2020.00154"},{"key":"ref23","article-title":"Qnnpack: Open source library for optimized mobile deep learning","author":"Dukhan","year":"2018"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1962"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ASAP.2018.8445101"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1098\/rsta.2019.0155"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TETC.2021.3072337"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00069"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1201\/9781003162810-13"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/MDAT.2021.3069952"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref32","article-title":"Mobilenets: Efficient convolutional neural networks for mobile vision applications","author":"Howard","year":"2017"},{"key":"ref33","article-title":"gemmlowp: A small self-contained low-precision gemm library","author":"Jacob","year":"2022"},{"key":"ref34","first-page":"112","article-title":"Trained quantization thresholds for accurate and efficient fixed-point inference of deep neural networks","volume-title":"Proceedings of Machine Learning and Systems","volume":"2","author":"Jain"},{"key":"ref35","article-title":"Low-bit quantization of neural networks for efficient inference","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) Workshops","author":"Kravchik"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.5555\/2999134.2999257"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/3240765.3243473"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.435"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/355841.355847"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1038\/nature14539"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2018.2865489"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CGO51591.2021.9370335"},{"key":"ref43","article-title":"Brecq: Pushing the limit of post-training quantization by block reconstruction","author":"Li","year":"2021"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.07.045"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1145\/2925987"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1874254"},{"key":"ref47","article-title":"Fast training of convolutional networks through ffts","author":"Mathieu","year":"2014","journal-title":"CoRR"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/tpds.2022.3146257"},{"key":"ref49","article-title":"Efficient winograd convolution via integer arithmetic","author":"Meng","year":"2019"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00141"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2896880"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ISVLSI49217.2020.000-5"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-13866-8"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1016\/0898-1221(93)90144-K"},{"key":"ref55","article-title":"Xilinx\/brevitas","author":"Pappalardo","year":"2021"},{"key":"ref56","first-page":"8024","article-title":"Pytorch: An imperative style, high-performance deep learning library","volume-title":"Advances in Neural Information Processing Systems 32","author":"Paszke","year":"2019"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1145\/3461478"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507746"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS51556.2021.9401196"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-11607-9_1"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2020.2987314"},{"key":"ref69","article-title":"Degree-quant: Quantization-aware training for graph neural networks","volume-title":"International Conference on Learning Representations","author":"Tailor"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00529"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6409"},{"key":"ref66","article-title":"Very deep convolutional networks for large-scale image recognition","author":"Simonyan","year":"2015","journal-title":"CoRR"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2014.110"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/DSD57027.2022.00042"},{"key":"ref70","first-page":"6105","article-title":"Efficientnet: Rethinking model scaling for convolutional neural networks","volume-title":"International conference on machine learning","author":"Tan"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00075"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1117\/12.2587045"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1145\/2755561"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1145\/2764454"},{"key":"ref75","first-page":"9847","article-title":"Towards accurate post-training network quantization via bit-split and stitching","volume-title":"International Conference on Machine Learning","author":"Wang"},{"key":"ref76","article-title":"Integer quantization for deep learning inference: Principles and empirical evaluation","author":"Wu","year":"2020"},{"key":"ref77","article-title":"Openblas","volume":"88","author":"Xianyi","year":"2012"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3158966"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC53511.2021.00029"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2019.2960488"}],"event":{"name":"2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA)","location":"Montreal, QC, Canada","start":{"date-parts":[[2023,2,25]]},"end":{"date-parts":[[2023,3,1]]}},"container-title":["2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10070856\/10070923\/10071076.pdf?arnumber=10071076","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,13]],"date-time":"2024-02-13T13:03:29Z","timestamp":1707829409000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10071076\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2]]},"references-count":78,"URL":"https:\/\/doi.org\/10.1109\/hpca56546.2023.10071076","relation":{},"subject":[],"published":{"date-parts":[[2023,2]]}}}