{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T21:34:20Z","timestamp":1767994460589,"version":"3.49.0"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"11","license":[{"start":{"date-parts":[[2023,6,8]],"date-time":"2023-06-08T00:00:00Z","timestamp":1686182400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,6,8]],"date-time":"2023-06-08T00:00:00Z","timestamp":1686182400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2023,11]]},"DOI":"10.1007\/s00034-023-02412-4","type":"journal-article","created":{"date-parts":[[2023,6,8]],"date-time":"2023-06-08T14:02:34Z","timestamp":1686232954000},"page":"6660-6683","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Performance-Driven LSTM Accelerator Hardware Using Split-Matrix-Based MVM"],"prefix":"10.1007","volume":"42","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2624-376X","authenticated-orcid":false,"given":"Tresa","family":"Joseph","sequence":"first","affiliation":[]},{"given":"T. S.","family":"Bindiya","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,6,8]]},"reference":[{"key":"2412_CR1","doi-asserted-by":"crossref","unstructured":"A. Alzahrani, N. Alalwan, M. Sarrab, Mobile cloud computing: advantage, disadvantage and open challenge, in Proceedings of the 7th Euro American Conference on Telematics and Information Systems (2014), pp. 1\u20134","DOI":"10.1145\/2590651.2590670"},{"issue":"4","key":"2412_CR2","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3529650","volume":"15","author":"A Arora","year":"2022","unstructured":"A. Arora, M. Ghosh, S. Mehta, V. Betz, L.K. John, Tensor slices: FPGA building blocks for the deep learning era. ACM Trans. Reconfigurable Technol. Syst. 15(4), 1\u201334 (2022)","journal-title":"ACM Trans. Reconfigurable Technol. Syst."},{"issue":"3","key":"2412_CR3","doi-asserted-by":"publisher","first-page":"838","DOI":"10.1109\/TVLSI.2019.2947639","volume":"28","author":"E Bank-Tavakoli","year":"2020","unstructured":"E. Bank-Tavakoli, S.A. Ghasemzadeh, M. Kamal, A. Afzali-Kusha, M. Pedram, POLAR: a pipelined\/overlapped FPGA-based LSTM accelerator. IEEE Trans. Very Large Scale Integr. Syst. 28(3), 838\u2013842 (2020)","journal-title":"IEEE Trans. Very Large Scale Integr. Syst."},{"issue":"20","key":"2412_CR4","doi-asserted-by":"publisher","first-page":"1","DOI":"10.3390\/electronics10202470","volume":"10","author":"D Bhatt","year":"2021","unstructured":"D. Bhatt, C. Patel, H. Talsania, J. Patel, R. Vaghela, S. Pandya, K. Modi, H. Ghayvat, CNN variants for computer vision: history, architecture, application, challenges, and future scope. Electronics 10(20), 1\u201328 (2021)","journal-title":"Electronics"},{"issue":"6","key":"2412_CR5","doi-asserted-by":"publisher","first-page":"1803","DOI":"10.1063\/1.1144830","volume":"65","author":"M Bishop","year":"1994","unstructured":"M. Bishop, Neural networks and their applications. Rev. Sci. Instrum. 65(6), 1803\u20131832 (1994)","journal-title":"Rev. Sci. Instrum."},{"key":"2412_CR6","doi-asserted-by":"crossref","unstructured":"F. Conti, L. Cavigelli, G. Paulin, I. Susmelj, L. Benini, Chipmunk: a systolically scalable 0.9 mm 2, 3.08 Gop\/s\/mW@ 1.2 mW accelerator for near-sensor recurrent neural network inference, in 2018 IEEE Custom Integrated Circuits Conference (CICC) (2018), pp. 1\u20134","DOI":"10.1109\/CICC.2018.8357068"},{"key":"2412_CR7","unstructured":"Design Ware Building Block IP User Guide, Synposys, Inc., Mountain View, CA, USA, 06-SP2 (2012)"},{"key":"2412_CR8","unstructured":"C. Ding, S. Liao, Y. Wang, Z. Li, N. Liu, Y. Zhuo, C. Wang, X. Qian, Y. Bai, G. Yuan, X. Ma, Y. Zhang, J. Tang, Q. Qiu, X. Lin, B. Yuan, CIRCNN: accelerating and compressing deep neural networks using block-circulant weight matrices, in Proceedings of the Annual International Symposium on Micro-architecture (2017), pp. 395\u2013408"},{"key":"2412_CR9","doi-asserted-by":"crossref","unstructured":"C. Ding, A. Ren, G. Yuan, X. Ma, J. Li, N. Liu, B. Yuan, Y. Wang, Structured weight matrices-based hardware accelerators in deep neural networks: FPGAs and ASICs, in Proceedings of the ACM Great Lakes Symposium on VLSI (2018), pp. 353\u2013358","DOI":"10.1145\/3194554.3194625"},{"key":"2412_CR10","doi-asserted-by":"crossref","unstructured":"C. Ding, S. Wang, N. Liu, K. Xu, Y. Wang, Y. Liang, REQ-YOLO: a resource-aware, efficient quantization framework for object detection on FPGAS, in FPGA 2019\u2014Proceedings of the 2019 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays (2019), pp. 33-42","DOI":"10.1145\/3289602.3293904"},{"key":"2412_CR11","doi-asserted-by":"publisher","first-page":"102701","DOI":"10.1016\/j.parco.2020.102701","volume":"100","author":"S Dong","year":"2020","unstructured":"S. Dong, P. Zhao, X. Lin, D. Kaeli, Exploring GPU acceleration of deep neural networks using block circulant matrices. Parallel Comput. 100, 102701 (2020)","journal-title":"Parallel Comput."},{"issue":"2","key":"2412_CR12","doi-asserted-by":"publisher","first-page":"422","DOI":"10.1109\/JETCAS.2022.3170152","volume":"12","author":"A Garofalo","year":"2022","unstructured":"A. Garofalo, G. Ottavi, F. Conti, G. Karunaratne, I. Boybat, L. Benini, D. Rossi, A heterogeneous in-memory computing cluster for flexible end-to-end inference of real-world deep neural networks. IEEE J. Emerg. Sel. Top. Circuits Syst. 12(2), 422\u2013435 (2022)","journal-title":"IEEE J. Emerg. Sel. Top. Circuits Syst."},{"issue":"1","key":"2412_CR13","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3408062","volume":"26","author":"A Goel","year":"2021","unstructured":"A. Goel, S. Aghajanzadeh, C. Tung, S.H. Chen, G.K. Thiruvathukal, Y.H. Lu, Modular neural networks for low-power image classification on embedded devices. ACM Trans. Des. Autom. Electron. Syst. 26(1), 1\u201335 (2021)","journal-title":"ACM Trans. Des. Autom. Electron. Syst."},{"issue":"10","key":"2412_CR14","doi-asserted-by":"publisher","first-page":"2222","DOI":"10.1109\/TNNLS.2016.2582924","volume":"28","author":"K Greff","year":"2017","unstructured":"K. Greff, R.K. Srivastava, J. Koutnik, B.R. Steunebrink, J. Schmidhuber, LSTM: a search space odyssey. IEEE Trans. Neural Netw. Learn. Syst. 28(10), 2222\u20132232 (2017)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"2412_CR15","doi-asserted-by":"crossref","unstructured":"S. Han, X. Liu, H. Mao, J. Pu, A. Pedram, M.A. Horowitz, W.J. Dally, EIE: efficient inference engine on compressed deep neural network, in Proceedings on 43rd International Symposium on Computer Architecture (ISCA) (2016), vol. 16 (2016), pp. 243\u2013254","DOI":"10.1109\/ISCA.2016.30"},{"key":"2412_CR16","doi-asserted-by":"crossref","unstructured":"Y. He, J. Yue, Y. Liu, H. Yang, Block-circulant neural network accelerator featuring fine-grained frequency-domain quantization and reconfigurable FFT modules, in 2021 26th Asia and South Pacific Design Automation Conference (ASP-DAC). IEEE (2021), pp. 813\u2013818","DOI":"10.1145\/3394885.3431532"},{"issue":"6","key":"2412_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.3390\/electronics10060681","volume":"10","author":"D He","year":"2021","unstructured":"D. He, J. He, J. Liu, J. Yang, Q. Yan, Y. Yang, An FPGA-based LSTM acceleration engine for deep learning frameworks. Electronics 10(6), 1\u201315 (2021)","journal-title":"Electronics"},{"issue":"2","key":"2412_CR18","doi-asserted-by":"publisher","first-page":"630","DOI":"10.1109\/TCSI.2018.2867291","volume":"66","author":"MT Khan","year":"2019","unstructured":"M.T. Khan, R.A. Shaik, Optimal complexity architectures for pipelined distributed arithmetic-based LMS adaptive filter. IEEE Trans. Circuits Syst I Regul. Pap. 66(2), 630\u2013642 (2019)","journal-title":"IEEE Trans. Circuits Syst I Regul. Pap."},{"key":"2412_CR19","doi-asserted-by":"crossref","unstructured":"S. Konwer, M. Sojan, P.A. Kenz, S.K. Santhosh, T. Joseph, T. Bindiya, Hardware realization of sigmoid and hyperbolic tangent activation functions, in 2022 IEEE International Conference on Industry 4.0, Artificial Intelligence, and Communications Technology (IAICT). IEEE (2022), pp. 84\u201389","DOI":"10.1109\/IAICT55358.2022.9887382"},{"issue":"4","key":"2412_CR20","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3448974","volume":"54","author":"VS Lalapura","year":"2021","unstructured":"V.S. Lalapura, J. Amudha, H.S. Satheesh, Recurrent neural networks for edge intelligence: a survey. ACM Comput. Surv. 54(4), 1\u201338 (2021)","journal-title":"ACM Comput. Surv."},{"key":"2412_CR21","unstructured":"Z. Li, S. Wang, C. Ding, Q. Qiu, Y. Wang, Y. Liang, Efficient recurrent neural networks using structured matrices in FPGAS, in 6th International Conference on Learning Representations (ICLR 2018)\u2014Workshop Track Proceedings (2018)"},{"issue":"1","key":"2412_CR22","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3275243","volume":"24","author":"J Li","year":"2019","unstructured":"J. Li, G. Yan, W. Lu, S. Gong, S. Jiang, J. Wu, X. Li, SynergyFlow: an elastic accelerator architecture supporting batch processing of large-scale deep neural networks. ACM Trans. Des. Autom. Electron. Syst. 24(1), 1\u201327 (2019)","journal-title":"ACM Trans. Des. Autom. Electron. Syst."},{"key":"2412_CR23","doi-asserted-by":"crossref","unstructured":"S. Liao, Z. Li, X. Lin, Q. Qiu, Y. Wang, B. Yuan, Energy-efficient, high-performance, highly-compressed deep neural network design using block-circulant matrices, in 2017 IEEE\/ACM International Conference on Computer-Aided Design (ICCAD). IEEE (2017), pp. 458\u2013465","DOI":"10.1109\/ICCAD.2017.8203813"},{"issue":"5","key":"2412_CR24","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3214304","volume":"51","author":"M Liu","year":"2019","unstructured":"M. Liu, Z. Xue, X. Xu, C. Zhong, J. Chen, Host-based intrusion detection system with system calls: review and future trends. ACM Comput. Surv. 51(5), 1\u201336 (2019)","journal-title":"ACM Comput. Surv."},{"issue":"8","key":"2412_CR25","doi-asserted-by":"publisher","first-page":"707","DOI":"10.1109\/TCSII.2006.877277","volume":"53","author":"PK Meher","year":"2006","unstructured":"P.K. Meher, Hardware-efficient systolization of DA-based calculation of finite digital convolution. IEEE Trans. Circuits Syst. II Express Briefs 53(8), 707\u2013711 (2006)","journal-title":"IEEE Trans. Circuits Syst. II Express Briefs"},{"issue":"2","key":"2412_CR26","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3439950","volume":"54","author":"G Pang","year":"2021","unstructured":"G. Pang, C. Shen, L. Cao, A.V.D. Hengel, Deep learning for anomaly detection: a review. ACM Comput. Surv. 54(2), 1\u201338 (2021)","journal-title":"ACM Comput. Surv."},{"issue":"6","key":"2412_CR27","first-page":"346","volume":"60","author":"SY Park","year":"2013","unstructured":"S.Y. Park, P.K. Meher, Adaptive FIR filter based on distributed arithmetic. IEEE Trans. Circuits Syst. II Express Briefs 60(6), 346\u2013350 (2013)","journal-title":"IEEE Trans. Circuits Syst. II Express Briefs"},{"issue":"7","key":"2412_CR28","first-page":"511","volume":"61","author":"SY Park","year":"2014","unstructured":"S.Y. Park, P.K. Meher, Efficient FPGA and ASIC realizations of a DA-based reconfigurable FIR digital filter. IEEE Trans. Circuits Syst. II Express Briefs 61(7), 511\u2013515 (2014)","journal-title":"IEEE Trans. Circuits Syst. II Express Briefs"},{"issue":"1","key":"2412_CR29","doi-asserted-by":"publisher","first-page":"1","DOI":"10.3390\/electronics8010078","volume":"8","author":"Z Qin","year":"2019","unstructured":"Z. Qin, D. Zhu, X. Zhu, X. Chen, Y. Shi, Y. Gao, Z. Lu, Q. Shen, L. Li, H. Pan, Accelerating deep neural networks by combining block-circulant matrices and low-precision weights. Electronics 8(1), 1\u201318 (2019)","journal-title":"Electronics"},{"issue":"2","key":"2412_CR30","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1109\/TVLSI.2021.3135353","volume":"30","author":"Z Que","year":"2021","unstructured":"Z. Que, H. Nakahara, E. Nurvitadhi, A. Boutros, H. Fan, C. Zeng, J. Meng, K.H. Tsoi, X. Niu, W. Luk, Recurrent neural networks with column-wise matrix-vector multiplication on FPGAs. IEEE Trans. Very Large Scale Integr. Syst. 30(2), 227\u2013237 (2021)","journal-title":"IEEE Trans. Very Large Scale Integr. Syst."},{"key":"2412_CR31","doi-asserted-by":"publisher","first-page":"57967","DOI":"10.1109\/ACCESS.2020.2982416","volume":"8","author":"NM Rezk","year":"2020","unstructured":"N.M. Rezk, M. Purnaprajna, T. Nordstrom, Z. Ul-Abdin, Recurrent neural networks: an embedded computing perspective. IEEE Access 8, 57967\u201357996 (2020)","journal-title":"IEEE Access"},{"issue":"3","key":"2412_CR32","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3532863","volume":"19","author":"M Soltaniyeh","year":"2022","unstructured":"M. Soltaniyeh, R.P. Martin, S. Nagarakatte, An accelerator for sparse convolutional neural networks leveraging systolic general matrix-matrix multiplication. ACM Trans. Archit. Code Optim. 19(3), 1\u201326 (2022)","journal-title":"ACM Trans. Archit. Code Optim."},{"key":"2412_CR33","doi-asserted-by":"crossref","unstructured":"Q.T. Truong, H.W. Lauw, Visual sentiment analysis for review images with item-oriented and user-oriented CNN, in Proceedings of 25th ACM international conference on Multimedia (2017), pp. 1274\u20131282","DOI":"10.1145\/3123266.3123374"},{"key":"2412_CR34","doi-asserted-by":"crossref","unstructured":"S. Wang, Z. Li, C. Ding, B. Yuan, Q. Qiu, Y. Wang, Y. Liang, C-LSTM: enabling efficient LSTM using structured compression techniques on FPGAs, in Proceedings of the 2018 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays (2018), pp. 11\u201320","DOI":"10.1145\/3174243.3174253"},{"issue":"10","key":"2412_CR35","doi-asserted-by":"publisher","first-page":"2763","DOI":"10.1109\/TVLSI.2017.2717950","volume":"25","author":"Z Wang","year":"2017","unstructured":"Z. Wang, J. Lin, Z. Wang, Accelerating recurrent neural networks: a memory-efficient approach. IEEE Trans. Very Large Scale Integr. Syst. 25(10), 2763\u20132775 (2017)","journal-title":"IEEE Trans. Very Large Scale Integr. Syst."},{"issue":"2","key":"2412_CR36","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3214306","volume":"52","author":"E Wang","year":"2019","unstructured":"E. Wang, J.J. Davis, R. Zhao, X. Niu, W. Luk, P.Y. Cheung, Deep neural network approximation for custom hardware: where we\u2019ve been, where we\u2019re going. ACM Comput. Surv. (CSUR) 52(2), 1\u201339 (2019)","journal-title":"ACM Comput. Surv. (CSUR)"},{"issue":"2","key":"2412_CR37","doi-asserted-by":"publisher","first-page":"280","DOI":"10.1109\/JETCAS.2019.2911739","volume":"9","author":"M Wang","year":"2019","unstructured":"M. Wang, Z. Wang, J. Lu, J. Lin, Z. Wang, E-LSTM: an efficient hardware architecture for long short-term memory. IEEE J. Emerg. Sel. Top. Circuits Syst. 9(2), 280\u2013291 (2019)","journal-title":"IEEE J. Emerg. Sel. Top. Circuits Syst."},{"key":"2412_CR38","doi-asserted-by":"crossref","unstructured":"C. Xiong, N. Xu, Performance comparison of BLAS on CPU, GPU and FPGA, in 2020 IEEE 9th Joint International Information Technology and Artificial Intelligence Conference (ITAIC), vol. 9. IEEE (2020), pp. 193\u2013197","DOI":"10.1109\/ITAIC49862.2020.9338793"},{"issue":"2","key":"2412_CR39","doi-asserted-by":"publisher","first-page":"329","DOI":"10.1109\/TVLSI.2019.2941921","volume":"28","author":"KP Yalamarthy","year":"2020","unstructured":"K.P. Yalamarthy, S. Dhall, M.T. Khan, R.A. Shaik, Low-complexity distributed-arithmetic-based pipelined architecture for an LSTM network. IEEE Trans. Very Large Scale Integr. Syst. 28(2), 329\u2013338 (2020)","journal-title":"IEEE Trans. Very Large Scale Integr. Syst."}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-023-02412-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00034-023-02412-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-023-02412-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,5]],"date-time":"2023-10-05T19:04:25Z","timestamp":1696532665000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00034-023-02412-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,8]]},"references-count":39,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2023,11]]}},"alternative-id":["2412"],"URL":"https:\/\/doi.org\/10.1007\/s00034-023-02412-4","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"value":"0278-081X","type":"print"},{"value":"1531-5878","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,6,8]]},"assertion":[{"value":"14 December 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 May 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 May 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 June 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}