{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T14:23:34Z","timestamp":1773843814978,"version":"3.50.1"},"reference-count":52,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2025,9,1]],"date-time":"2025-09-01T00:00:00Z","timestamp":1756684800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,9,1]],"date-time":"2025-09-01T00:00:00Z","timestamp":1756684800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,9,1]],"date-time":"2025-09-01T00:00:00Z","timestamp":1756684800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Spanish Agencia Estatal de Investigaci&#x00F3;n","award":["FUN4DATE (PID2022-136684OB-C22)"],"award-info":[{"award-number":["FUN4DATE (PID2022-136684OB-C22)"]}]},{"name":"Spanish Agencia Estatal de Investigaci&#x00F3;n","award":["SMARTY (PCI2024-153434)"],"award-info":[{"award-number":["SMARTY (PCI2024-153434)"]}]},{"name":"TUCAN6-CM","award":["(TEC-2024\/COM-460)"],"award-info":[{"award-number":["(TEC-2024\/COM-460)"]}]},{"name":"CM","award":["(ORDEN 5696\/2024)"],"award-info":[{"award-number":["(ORDEN 5696\/2024)"]}]},{"name":"European Commission through the Chips Act Joint Undertaking project SMARTY","award":["101140087"],"award-info":[{"award-number":["101140087"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62474030"],"award-info":[{"award-number":["62474030"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Circuits Syst. Artif. Intel."],"published-print":{"date-parts":[[2025,9]]},"DOI":"10.1109\/tcasai.2025.3569511","type":"journal-article","created":{"date-parts":[[2025,5,13]],"date-time":"2025-05-13T13:46:21Z","timestamp":1747143981000},"page":"248-261","source":"Crossref","is-referenced-by-count":1,"title":["A Configurable Floating-Point Fused Multiply-Add Design With Mixed Precision for AI Accelerators"],"prefix":"10.1109","volume":"2","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4062-3638","authenticated-orcid":false,"given":"Farzad","family":"Niknia","sequence":"first","affiliation":[{"name":"Department of Electrical and Computer Engineering, Northeastern University, Boston, MA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9668-7318","authenticated-orcid":false,"given":"Ziheng","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, Northeastern University, Boston, MA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6226-2880","authenticated-orcid":false,"given":"Shanshan","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2540-5234","authenticated-orcid":false,"given":"Pedro","family":"Reviriego","sequence":"additional","affiliation":[{"name":"Departamento de Ingenier&#x00ED;a de Sistemas Telem&#x00E1;ticos, Escuela T&#x00E9;cnica Superior de Ingenier&#x00ED;a de Telecomunicaci&#x00F3;n, Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9887-1418","authenticated-orcid":false,"given":"Zhen","family":"Gao","sequence":"additional","affiliation":[{"name":"School of Electrical and Information Engineering, Tianjin University, Tianjin, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2563-2250","authenticated-orcid":false,"given":"Paolo","family":"Montuschi","sequence":"additional","affiliation":[{"name":"Dipartimento di Automatica e Informatica, Politecnico di Torino, Torina, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3152-3245","authenticated-orcid":false,"given":"Fabrizio","family":"Lombardi","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, Northeastern University, Boston, MA, USA"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1126\/science.aaa8415"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3059968"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3578938"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCC57788.2023.10233394"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/0743-7315(92)90068-X"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.heliyon.2018.e00938"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3233300"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/MCAS.2021.3092533"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TNANO.2024.3367916"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ACSSC.2007.4487224"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ARITH.2005.22"},{"key":"ref12","article-title":"Nvidia hopper architecture in-depth","author":"Michael","year":"2025"},{"key":"ref13","volume-title":"Intel\u00ae 64 and IA-32 Archit.s Optim. Reference Manual","year":"2023"},{"key":"ref14","volume-title":"5TH Gen AMD EPYC Processor Architecture","year":"2024"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ACSSC.2011.6189977"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA52953.2021.00161"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/MNANO.2022.3208757"},{"key":"ref18","first-page":"1737","article-title":"Deep learning with limited numerical precision","volume-title":"Proc. Int. Conf. Mach. Learn. (PMLR)","author":"Gupta","year":"2015"},{"key":"ref19","first-page":"7675","article-title":"Training deep neural networks with 8-bit floating point numbers","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Wang","year":"2018"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ARITH.2019.00023"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ARITH.2019.00022"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.7717\/peerj-cs.330"},{"key":"ref23","article-title":"FP8 formats for deep learning","author":"Micikevicius","year":"2022"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3316279.3316282"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3583781.3590269"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TCSII.2022.3161005"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS.2018.8351354"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2016.2584067"},{"key":"ref29","article-title":"Hybrid 8-bit floating point (HFP8) training and inference for deep neural networks","volume-title":"Proc. 33rd Int. Conf. Neural Inf. Process. Syst.","author":"Sun","year":"2019"},{"key":"ref30","article-title":"Mixed precision training","author":"Micikevicius","year":"2017"},{"key":"ref31","article-title":"A study of bfloat16 for deep learning training","author":"Kalamkar","year":"2019"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-8176-4705-6"},{"key":"ref33","volume-title":"754\u20132008 IEEE Standard for Floating-Point Arithmetic","year":"2008"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1147\/rd.341.0059"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2004.44"},{"key":"ref36","article-title":"Mixed precision training with 8-bit floating point","author":"Mellempudi","year":"2019"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00069"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10071076"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO56248.2022.00095"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00021"},{"key":"ref41","first-page":"1002","article-title":"Bucket getter: A bucket-based processing engine for low-bit block floating point (BFP) DNNs","volume-title":"Proc. 56th Annu. IEEE\/ACM Int. Symp. Microarchit.","author":"Lo","year":"2023"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1145\/3587095"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ARITH.2001.930098"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/icecs.2010.5724440"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ARITH.2007.5"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/j.micpro.2017.12.009"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1016\/j.mejo.2015.10.012"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2019.2895031"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2021.3128435"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2022.3226185"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/TCSI.2024.3497724"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/TCSII.2024.3359678"}],"container-title":["IEEE Transactions on Circuits and Systems for Artificial Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10495160\/11159306\/11002754.pdf?arnumber=11002754","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T18:32:14Z","timestamp":1765305134000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11002754\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9]]},"references-count":52,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/tcasai.2025.3569511","relation":{},"ISSN":["2996-6647"],"issn-type":[{"value":"2996-6647","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9]]}}}