{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,27]],"date-time":"2026-05-27T18:38:22Z","timestamp":1779907102565,"version":"3.53.1"},"reference-count":43,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,8,6]],"date-time":"2025-08-06T00:00:00Z","timestamp":1754438400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,8,6]],"date-time":"2025-08-06T00:00:00Z","timestamp":1754438400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003836","name":"IC Design Education Center","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003836","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,8,6]]},"DOI":"10.1109\/islped65674.2025.11261809","type":"proceedings-article","created":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T18:39:13Z","timestamp":1764787153000},"page":"1-7","source":"Crossref","is-referenced-by-count":2,"title":["Jack Unit: An Area- and Energy-Efficient Multiply-Accumulate (MAC) Unit Supporting Diverse Data Formats"],"prefix":"10.1109","author":[{"given":"Seock-Hwan","family":"Noh","sequence":"first","affiliation":[{"name":"DGIST,Daegu,Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sungju","family":"Kim","sequence":"additional","affiliation":[{"name":"Yonsei University,Seoul,Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Seohyun","family":"Kim","sequence":"additional","affiliation":[{"name":"DGIST,Daegu,Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Daehoon","family":"Kim","sequence":"additional","affiliation":[{"name":"Yonsei University,Seoul,Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jaeha","family":"Kung","sequence":"additional","affiliation":[{"name":"Korea University,Seoul,Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yeseong","family":"Kim","sequence":"additional","affiliation":[{"name":"DGIST,Daegu,Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/SIECPC.2011.5876905"},{"key":"ref2","volume-title":"Ethos-N78","year":"2023"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.33140\/jeee.02.04.19"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00212"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-3302"},{"key":"ref6","article-title":"Microscaling data formats for deep learning","volume-title":"Proc. of NeurIPS","author":"Rouhani"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref8","article-title":"Gpt3.int8(): 8-bit matrix multiplication for transformers at scale","volume-title":"Proc. of NeurIPS","author":"Dettmers"},{"key":"ref9","article-title":"BERT: pre-training of deep bidirectional transformers for language understanding","volume-title":"Proc. of NAACL-HTL","author":"Devlin"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00012"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ARITH48897.2020.00029"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-11021-5_20"},{"key":"ref13","year":"2015","journal-title":"Standard High Bandwidth Memory (HBM) DRAM"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589350"},{"key":"ref15","article-title":"A study of bfloat16 for deep learning training","author":"Kalamkar","year":"2019"},{"key":"ref16","article-title":"Fp8 quantization: The power of the exponent","volume-title":"Proc. of NeurIPS","author":"Kuzmin"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2018.8310262"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3614249"},{"key":"ref20","article-title":"Pointer sentinel mixture models","author":"Merity","year":"2016"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_24"},{"key":"ref22","volume-title":"Nerf","author":"Mildenhall","year":"2020"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS48785.2022.9937437"},{"key":"ref24","volume-title":"CACTI 6.0: A Tool to Model Large Caches","author":"Muralimanohar","year":"2009"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2023.3253050"},{"key":"ref26","volume-title":"NVIDIA H100 Tensor Core GPU Architecture","year":"2022"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC42614.2022.9731639"},{"key":"ref28","year":"2023","journal-title":"Ocp microscaling formats (mx) specification version 1.0"},{"key":"ref29","article-title":"Language models are unsupervised multitask learners","author":"Radford","year":"2019"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589351"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3316781.3317784"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS48437.2020.00016"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3195970.3196072"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00069"},{"key":"ref35","article-title":"Hybrid 8-bit floating point (hfp8) training and inference for deep neural networks","volume-title":"Proc. of NeurIPS","author":"Sun"},{"key":"ref36","year":"2016","journal-title":"IC Compiler II Implementation User Guide: Version L-2016.03"},{"key":"ref37","year":"2017","journal-title":"Design Compiler User Guide: Version N-2017.09"},{"key":"ref38","volume-title":"Synopsys primetime px power analysis solution achieves broad market adoption","year":"2024"},{"key":"ref39","doi-asserted-by":"crossref","volume-title":"Lynx: A smartnic-driven accelerator-centric architecture for network servers","author":"Tork","DOI":"10.1145\/3373376.3378528"},{"key":"ref40","author":"Touvron","year":"2023","journal-title":"Llama: Open and efficient foundation language models"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00021"},{"key":"ref42","article-title":"Stable and low-precision training for large-scale vision-language models","volume-title":"Proc. of NeurIPS","author":"Wortsman"},{"key":"ref43","article-title":"Understanding int4 quantization for language models: latency speedup, composability, and failure cases","volume-title":"Proc. of ICML","author":"Wu"}],"event":{"name":"2025 IEEE\/ACM International Symposium on Low Power Electronics and Design (ISLPED)","location":"Reykjav\u00edk, Iceland","start":{"date-parts":[[2025,8,6]]},"end":{"date-parts":[[2025,8,8]]}},"container-title":["2025 IEEE\/ACM International Symposium on Low Power Electronics and Design (ISLPED)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11261694\/11261695\/11261809.pdf?arnumber=11261809","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,4]],"date-time":"2025-12-04T07:51:18Z","timestamp":1764834678000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11261809\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,6]]},"references-count":43,"URL":"https:\/\/doi.org\/10.1109\/islped65674.2025.11261809","relation":{},"subject":[],"published":{"date-parts":[[2025,8,6]]}}}