{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,16]],"date-time":"2026-04-16T00:00:22Z","timestamp":1776297622020,"version":"3.50.1"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2023,5,16]],"date-time":"2023-05-16T00:00:00Z","timestamp":1684195200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,5,16]],"date-time":"2023-05-16T00:00:00Z","timestamp":1684195200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100010426","name":"UGC-DAE Consortium for Scientific Research, University Grants Commission","doi-asserted-by":"publisher","award":["22745\/(NET-DEC. 2015)"],"award-info":[{"award-number":["22745\/(NET-DEC. 2015)"]}],"id":[{"id":"10.13039\/501100010426","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2023,10]]},"DOI":"10.1007\/s00034-023-02387-2","type":"journal-article","created":{"date-parts":[[2023,5,16]],"date-time":"2023-05-16T09:02:24Z","timestamp":1684227744000},"page":"6089-6115","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Designing a Performance-Centric MAC Unit with Pipelined Architecture for DNN Accelerators"],"prefix":"10.1007","volume":"42","author":[{"given":"Gopal","family":"Raut","sequence":"first","affiliation":[]},{"given":"Jogesh","family":"Mukala","sequence":"additional","affiliation":[]},{"given":"Vishal","family":"Sharma","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4223-0077","authenticated-orcid":false,"given":"Santosh Kumar","family":"Vishvakarma","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,5,16]]},"reference":[{"key":"2387_CR1","doi-asserted-by":"crossref","unstructured":"R. Andraka, A survey of CORDIC algorithms for FPGA based computers. In: Proceedings of the 1998 ACM\/SIGDA sixth international symposium on field programmable gate arrays, pp. 191-200 (1998)","DOI":"10.1145\/275107.275139"},{"key":"2387_CR2","doi-asserted-by":"crossref","unstructured":"A. Ardakani, F. Leduc-Primeau, N. Onizawa, T. Hanyu, and W. J. Gross, VLSI implementation of deep neural network using integral stochastic computing. IEEE Trans Very Large Scale Integr (VLSI) Syst 25(10): 2688-2699 (2017)","DOI":"10.1109\/TVLSI.2017.2654298"},{"key":"2387_CR3","doi-asserted-by":"crossref","unstructured":"Z. Carmichael, H.F. Langroudi, C. Khazanov, J. Lillie, J. L. Gustafson, and D. Kudithipudi, Performancefficiency trade-off of low-precision numerical formats in deep neural networks. In: Proceedings of the conference for next generation arithmetic, pp. 1-9 (2019)","DOI":"10.1145\/3316279.3316282"},{"key":"2387_CR4","doi-asserted-by":"crossref","unstructured":"H. Chhajed, G. Raut, N. Dhakad, S. Vishwakarma, and S. K. Vishvakarma, BitMAC: bit-serial computation-based efficient multiply-accumulate unit for DNN accelerator. Circuits Syst Signal Process, pp. 1-16(2022)","DOI":"10.1007\/s00034-021-01873-9"},{"key":"2387_CR5","doi-asserted-by":"crossref","unstructured":"F.U.D. Farrukh, C. Zhang, Y. Jiang, Z. Zhang, Z. Wang, Z. Wang, and H. Jiang, Power efficient tiny yolo CNN using reduced hardware resources based on booth multiplier and wallace tree adders. IEEE Open J. Circuits Syst. 1 (2020): 76-87","DOI":"10.1109\/OJCAS.2020.3007334"},{"key":"2387_CR6","doi-asserted-by":"crossref","unstructured":"T. Fujii, S. Sato, H. Nakahara, and M. Motomura, An FPGA realization of a deep convolutional neural network using a threshold neuron pruning. In: Applied reconfigurable computing: 13th international symposium, ARC 2017, Delft, The Netherlands, April 3-7, 2017, Proceedings 13, pp. 268-280. Springer International Publishing (2017)","DOI":"10.1007\/978-3-319-56258-2_23"},{"key":"2387_CR7","doi-asserted-by":"crossref","unstructured":"M. Gao, Q. Wang, and G. Qu, Energy and error reduction using variable bit-width optimization on dynamic fixed point format. In: 2019 IEEE computer society annual symposium on VLSI (ISVLSI), pp. 152-157. IEEE (2019)","DOI":"10.1109\/ISVLSI.2019.00036"},{"issue":"3","key":"2387_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3233300","volume":"15","author":"J Garland","year":"2018","unstructured":"J. Garland, D. Gregg, Low complexity multiply-accumulate units for convolutional neural networks with weight-sharing. ACM Trans. Architect. Code Optim. (TACO) 15(3), 1\u201324 (2018)","journal-title":"ACM Trans. Architect. Code Optim. (TACO)"},{"key":"2387_CR9","unstructured":"D.A. Gudovskiy, and L. Rigazio, Shiftcnn: generalized low-precision architecture for inference of convolutional neural networks. arXiv preprint arXiv:1706.02393 (2017)"},{"key":"2387_CR10","doi-asserted-by":"crossref","unstructured":"M.A. Hanif, R. Hafiz, and M. Shafique, Error resilience analysis for systematically employing approximate computing in convolutional neural networks. In: 2018 design, automation and test in Europe conference and exhibition (DATE), pp. 913-916. IEEE (2018)","DOI":"10.23919\/DATE.2018.8342139"},{"key":"2387_CR11","unstructured":"https:\/\/www.synopsys.com\/implementation-and-signoff\/rtl-synthesis-test\/design-compiler-graphical.html"},{"key":"2387_CR12","unstructured":"ISO\/IEC\/IEEE International Standard - Floating-point arithmetic, ISO\/IEC 60559:2020(E) IEEE Std 754-2019, pp.1-86 (2020)"},{"key":"2387_CR13","doi-asserted-by":"crossref","unstructured":"S. Jain, S. Venkataramani, V. Srinivasan, J. Choi, P. Chuang, and L. Chang, Compensated-DNN: energy efficient low-precision deep neural networks by compensating quantization errors. In: Proceedings of the 55th annual design automation conference, pp. 1-6 (2018)","DOI":"10.1145\/3195970.3196012"},{"issue":"1","key":"2387_CR14","doi-asserted-by":"publisher","first-page":"189","DOI":"10.1109\/TCSI.2018.2856245","volume":"66","author":"H Jiang","year":"2018","unstructured":"H. Jiang, C. Liu, F. Lombardi, J. Han, Low-power approximate unsigned multipliers with configurable error recovery. IEEE Trans. Circuits Syst. I Regul. Pap. 66(1), 189\u2013202 (2018)","journal-title":"IEEE Trans. Circuits Syst. I Regul. Pap."},{"key":"2387_CR15","doi-asserted-by":"crossref","unstructured":"R.B. S. Kesava, B. L. Rao, K. B. Sindhuri, and N. U. Kumar, Low power and area efficient Wallace tree multiplier using carry select adder with binary to excess-1 converter. In: 2016 conference on advances in signal processing (CASP), pp. 248-253. IEEE (2016)","DOI":"10.1109\/CASP.2016.7746174"},{"issue":"7553","key":"2387_CR16","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"Y. LeCun, Y. Bengio, G. Hinton, Deep learning. Nature 521(7553), 436\u2013444 (2015)","journal-title":"Nature"},{"key":"2387_CR17","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1016\/j.neucom.2016.12.038","volume":"234","author":"W Liu","year":"2017","unstructured":"W. Liu, Z. Wang, X. Liu, N. Zeng, Y. Liu, F.E. Alsaadi, A survey of deep neural network architectures and their applications. Neurocomputing 234, 11\u201326 (2017)","journal-title":"Neurocomputing"},{"key":"2387_CR18","doi-asserted-by":"publisher","first-page":"147129","DOI":"10.1109\/ACCESS.2019.2946513","volume":"7","author":"M Masadeh","year":"2019","unstructured":"M. Masadeh, O. Hasan, S. Tahar, Input-conscious approximate multiply-accumulate (mac) unit for energy-efficiency. IEEE Access 7, 147129\u2013147142 (2019)","journal-title":"IEEE Access"},{"issue":"4","key":"2387_CR19","doi-asserted-by":"publisher","first-page":"532","DOI":"10.1109\/JETCAS.2021.3129415","volume":"11","author":"AN Mazumder","year":"2021","unstructured":"A.N. Mazumder, J. Meng, H.A. Rashid, U. Kallakuri, X. Zhang, J.S. Seo, T. Mohsenin, A survey on the optimization of neural network accelerators for micro-ai on-device inference. IEEE J. Emerging Sel. Topics Circuits Syst. 11(4), 532\u2013547 (2021)","journal-title":"IEEE J. Emerging Sel. Topics Circuits Syst."},{"key":"2387_CR20","doi-asserted-by":"crossref","unstructured":"L. Mei, M. Dandekar, D. Rodopoulos, J. Constantin, P. Debacker, R. Lauwereins, and M. Verhelst, Sub-word parallel precision-scalable MAC engines for efficient embedded DNN inference. In: 2019 IEEE international conference on artificial intelligence circuits and systems (AICAS), pp. 6-10. IEEE (2019)","DOI":"10.1109\/AICAS.2019.8771481"},{"key":"2387_CR21","doi-asserted-by":"crossref","unstructured":"E. Monmasson, L. Idkhajine, M. N. Cirstea, I. Bahri, A. Tisan, and M. W. Naouar, FPGAs in industrial control applications. IEEE Trans. Ind. Inf. 7(2): 224-243 (2011)","DOI":"10.1109\/TII.2011.2123908"},{"key":"2387_CR22","doi-asserted-by":"crossref","unstructured":"V. Mrazek, L. Sekanina, and Z. Vasicek, Libraries of approximate circuits: automated design and application in CNN accelerators. IEEE J. Emerging Sel. Topics Circuits Syst. 10(4): 406-418 (2020)","DOI":"10.1109\/JETCAS.2020.3032495"},{"key":"2387_CR23","doi-asserted-by":"crossref","unstructured":"H. Nakahara, and T. Sasao, A high-speed low-power deep neural network on an FPGA based on the nested RNS: applied to an object detector. In: 2018 IEEE international symposium on circuits and systems (ISCAS), pp. 1-5. IEEE (2018)","DOI":"10.1109\/ISCAS.2018.8351850"},{"key":"2387_CR24","doi-asserted-by":"crossref","unstructured":"M. Nazemi, G. Pasandi, and M. Pedram, Energy-efficient, low-latency realization of neural networks through Boolean logic minimization. In: Proceedings of the 24th Asia and South Pacific design automation conference, pp. 274-279 (2019)","DOI":"10.1145\/3287624.3287722"},{"key":"2387_CR25","doi-asserted-by":"crossref","unstructured":"V. Rajagopal, C. K. Ramasamy, A. Vishnoi, R. N. Gadde, N. R. Miniskar, and S. K. Pasupuleti, Accurate and efficient fixed point inference for deep neural networks. In: 2018 25th IEEE international conference on image processing (ICIP), pp. 1847-1851. IEEE (2018)","DOI":"10.1109\/ICIP.2018.8451268"},{"key":"2387_CR26","doi-asserted-by":"crossref","unstructured":"G. Raut, S. Rai, S. K. Vishvakarma, and A. Kumar, A CORDIC based configurable activation function for ANN applications. In: 2020 IEEE computer society annual symposium on VLSI (ISVLSI), pp. 78-83. IEEE (2020)","DOI":"10.1109\/ISVLSI49217.2020.00024"},{"key":"2387_CR27","doi-asserted-by":"crossref","unstructured":"G. Raut, A. Biasizzo, N. Dhakad, N. Gupta, G. Papa, and S. K. Vishvakarma, Data multiplexed and hardware reused architecture for deep neural network accelerator. Neurocomputing 486: 147-159 (2022)","DOI":"10.1016\/j.neucom.2021.11.018"},{"key":"2387_CR28","doi-asserted-by":"publisher","first-page":"170","DOI":"10.1109\/OJCAS.2020.3042743","volume":"2","author":"G Raut","year":"2021","unstructured":"G. Raut, S. Rai, S.K. Vishvakarma, A. Kumar, RECON: resource-efficient CORDIC-based neuron architecture. IEEE Open J. Circuits Syst. 2, 170\u2013181 (2021)","journal-title":"IEEE Open J. Circuits Syst."},{"key":"2387_CR29","doi-asserted-by":"crossref","unstructured":"T. Sato, and T. Ukezono, A dynamically configurable approximate array multiplier with exact mode. In: 2020 5th international conference on computer and communication systems (ICCCS), pp. 917-921. IEEE (2020)","DOI":"10.1109\/ICCCS49078.2020.9118432"},{"key":"2387_CR30","doi-asserted-by":"publisher","first-page":"152","DOI":"10.1016\/j.neunet.2019.04.017","volume":"117","author":"H Sim","year":"2019","unstructured":"H. Sim, J. Lee, Cost-effective stochastic MAC circuits for deep neural networks. Neural Netw. 117, 152\u2013162 (2019)","journal-title":"Neural Netw."},{"issue":"12","key":"2387_CR31","doi-asserted-by":"publisher","first-page":"2295","DOI":"10.1109\/JPROC.2017.2761740","volume":"105","author":"V Sze","year":"2017","unstructured":"V. Sze, Y.-H. Chen, T.-J. Yang, J.S. Emer, Efficient processing of deep neural networks: a tutorial and survey. Proc. IEEE 105(12), 2295\u20132329 (2017)","journal-title":"Proc. IEEE"},{"key":"2387_CR32","doi-asserted-by":"crossref","unstructured":"R. Thomas, V. DeBrunner, and L. DeBrunner, Fixed-point implementation of discrete Hirschman transform. In: 2018 52nd Asilomar conference on signals, systems, and computers, pp. 1507-1511. IEEE (2018)","DOI":"10.1109\/ACSSC.2018.8645136"},{"key":"2387_CR33","doi-asserted-by":"crossref","unstructured":"A. Thomas, G. Trivedi, and P. Guha, Design of a low power bfloat16 pipelined mac unit for deep neural network applications. In: 2021 IEEE region 10 symposium (TENSYMP), pp. 1-8. IEEE (2021)","DOI":"10.1109\/TENSYMP52854.2021.9550912"},{"key":"2387_CR34","doi-asserted-by":"crossref","unstructured":"Y. Umuroglu, N. J. Fraser, G. Gambardella, M. Blott, P. Leong, M. Jahre, and K. Vissers, Finn: a framework for fast, scalable binarized neural network inference. In: Proceedings of the 2017 ACM\/SIGDA international symposium on field-programmable gate arrays, pp. 65-74 (2017)","DOI":"10.1145\/3020078.3021744"},{"key":"2387_CR35","doi-asserted-by":"crossref","unstructured":"A. Vamsi, S. Krishna, and S. R. Ramesh, An efficient design of 16 bit mac unit using vedic mathematics. In: 2019 international conference on communication and signal processing (ICCSP), pp. 0319-0322. IEEE (2019)","DOI":"10.1109\/ICCSP.2019.8697985"},{"key":"2387_CR36","doi-asserted-by":"publisher","first-page":"25481","DOI":"10.1109\/ACCESS.2020.2970968","volume":"8","author":"N Van Toan","year":"2020","unstructured":"N. Van Toan, J.-G. Lee, FPGA-based multi-level approximate multipliers for high-performance error-resilient applications. IEEE Access 8, 25481\u201325497 (2020)","journal-title":"IEEE Access"},{"issue":"6","key":"2387_CR37","first-page":"1733","volume":"55","author":"S Yin","year":"2020","unstructured":"S. Yin, Z. Jiang, J.-S. Seo, M. Seok, XNOR-SRAM: in-memory computing SRAM macro for binary\/ternary deep neural networks. IEEE J. Solid-State Circuits 55(6), 1733\u20131743 (2020)","journal-title":"IEEE J. Solid-State Circuits"},{"key":"2387_CR38","doi-asserted-by":"crossref","unstructured":"Yugandhar, K., V. Ganesh Raja, M. Tejkumar, and D. Siva. \u201cHigh performance array multiplier using reversible logic structure.\u201d In 2018 international conference on current trends towards converging technologies (ICCTCT), pp. 1-5. IEEE, 2018","DOI":"10.1109\/ICCTCT.2018.8550872"},{"key":"2387_CR39","doi-asserted-by":"crossref","unstructured":"S. M. A. Zeinolabedin, F. M. Sch\u00fcffny, R. George, F. Kelber, H. Bauer, S. Scholze, S. H\u00e4nzsche et al. A 16-channel fully configurable neural SoC with 1.52$$\\mu $$ W\/Ch signal acquisition, 2.79$$\\mu $$ W\/Ch real-time spike classifier, and 1.79 TOPS\/W deep neural network accelerator in 22 nm FDSOI. IEEE Trans. Biomed. Circuits Syst. 16(1): 94-107 (2022)","DOI":"10.1109\/TBCAS.2022.3142987"},{"issue":"1","key":"2387_CR40","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1109\/TC.2019.2936192","volume":"69","author":"H Zhang","year":"2019","unstructured":"H. Zhang, D. Chen, S.-B. Ko, New flexible multiple-precision multiply-accumulate unit for deep neural network training and inference. IEEE Trans. Comput. 69(1), 26\u201338 (2019)","journal-title":"IEEE Trans. Comput."}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-023-02387-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00034-023-02387-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-023-02387-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T13:00:33Z","timestamp":1729429233000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00034-023-02387-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,16]]},"references-count":40,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2023,10]]}},"alternative-id":["2387"],"URL":"https:\/\/doi.org\/10.1007\/s00034-023-02387-2","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"value":"0278-081X","type":"print"},{"value":"1531-5878","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,5,16]]},"assertion":[{"value":"1 August 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 April 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 April 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 May 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"There is no conflict of interest from the authors","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}