{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,27]],"date-time":"2025-09-27T08:17:41Z","timestamp":1758961061292,"version":"3.37.3"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2020,9,7]],"date-time":"2020-09-07T00:00:00Z","timestamp":1599436800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,9,7]],"date-time":"2020-09-07T00:00:00Z","timestamp":1599436800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100008628","name":"Ministry of Electronics and Information technology","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100008628","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2021,3]]},"DOI":"10.1007\/s00034-020-01534-3","type":"journal-article","created":{"date-parts":[[2020,9,7]],"date-time":"2020-09-07T03:15:21Z","timestamp":1599448521000},"page":"1542-1567","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["M2DA: A Low-Complex Design Methodology for Convolutional Neural Network Exploiting Data Symmetry and Redundancy"],"prefix":"10.1007","volume":"40","author":[{"given":"Madhuri","family":"Panwar","sequence":"first","affiliation":[]},{"given":"Nemani","family":"Sri Hari","sequence":"additional","affiliation":[]},{"given":"Dwaipayan","family":"Biswas","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5636-0676","authenticated-orcid":false,"given":"Amit","family":"Acharyya","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,9,7]]},"reference":[{"key":"1534_CR1","doi-asserted-by":"publisher","first-page":"1737","DOI":"10.1109\/TVLSI.2018.2825145","volume":"26","author":"T Abtahi","year":"2018","unstructured":"T. Abtahi, C. Shea, A. Kulkarni, T. Mohsenin, Accelerating convolutional neural network with FFT on embedded hardware. IEEE Trans. Very Large Scale Integr. (VLSI) Syst. 26, 1737\u20131749 (2018)","journal-title":"IEEE Trans. Very Large Scale Integr. (VLSI) Syst."},{"key":"1534_CR2","doi-asserted-by":"publisher","first-page":"285","DOI":"10.1109\/TCSII.2009.2015386","volume":"56","author":"A Acharyya","year":"2009","unstructured":"A. Acharyya, K. Maharatna, B.M. Al-Hashimi, S.R. Gunn, Memory reduction methodology for distributed-arithmetic-based DWT\/IDWT exploiting data symmetry. IEEE Trans. Circuits Syst. II Express Briefs 56, 285\u2013289 (2009)","journal-title":"IEEE Trans. Circuits Syst. II Express Briefs"},{"key":"1534_CR3","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1109\/TCAD.2017.2682138","volume":"37","author":"R Andri","year":"2017","unstructured":"R. Andri, L. Cavigelli, D. Rossi, L. Benini, YodaNN: An architecture for ultralow power binary-weight CNN acceleration. IEEE Trans. Comput. Aided Des. Integr. Circuits Syst. 37, 48\u201360 (2017)","journal-title":"IEEE Trans. Comput. Aided Des. Integr. Circuits Syst."},{"key":"1534_CR4","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1109\/JSSC.2016.2616357","volume":"52","author":"Y-H Chen","year":"2016","unstructured":"Y.-H. Chen, T. Krishna, J.S. Emer, V. Sze, Eyeriss: an energy-efficient reconfigurable accelerator for deep convolutional neural networks. IEEE J. Solid-State Circuits 52, 127\u2013138 (2016)","journal-title":"IEEE J. Solid-State Circuits"},{"key":"1534_CR5","doi-asserted-by":"crossref","unstructured":"Y. Chen, T. Luo, S. Liu, S. Zhang, L. He, J. Wang, L. Li, T. Chen, Z. Xu, N. Sun, et al. Dadiannao: a machine-learning supercomputer. In 2014 47th Annual IEEE\/ACM International Symposium on Microarchitecture (IEEE, 2014), pp. 609\u2013622","DOI":"10.1109\/MICRO.2014.58"},{"key":"1534_CR6","doi-asserted-by":"crossref","unstructured":"W. Chen, J. Wilson, S. Tyree, K. Q. Weinberger, Y. Chen, Compressing convolutional neural networks in the frequency domain. In Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (2016), pp. 1475\u20131484","DOI":"10.1145\/2939672.2939839"},{"key":"1534_CR7","doi-asserted-by":"publisher","first-page":"292","DOI":"10.1109\/JETCAS.2019.2910232","volume":"9","author":"Y-H Chen","year":"2019","unstructured":"Y.-H. Chen, T.-J. Yang, J. Emer, V. Sze, Eyeriss v2: a flexible accelerator for emerging deep neural networks on mobile devices. IEEE J. Emerg. Sel. Top. Circuits Syst. 9, 292\u2013308 (2019)","journal-title":"IEEE J. Emerg. Sel. Top. Circuits Syst."},{"key":"1534_CR8","unstructured":"T. Chilimbi, Y. Suzue, J. Apacible, K. Kalyanaraman. Project adam: building an efficient and scalable deep learning training system. In 11th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 14) (2014), pp. 571\u2013582"},{"key":"1534_CR9","first-page":"2493","volume":"12","author":"R Collobert","year":"2011","unstructured":"R. Collobert, J. Weston, L. Bottou, M. Karlen, K. Kavukcuoglu, P. Kuksa, Natural language processing (almost) from scratch. J. Mach. Learn. Res. 12, 2493\u20132537 (2011)","journal-title":"J. Mach. Learn. Res."},{"key":"1534_CR10","first-page":"1","volume":"8","author":"A Gautam","year":"2020","unstructured":"A. Gautam, M. Panwar, D. Biswas, A. Acharyya, MyoNet: a transfer learning-based LRCN for lower limb movement recognition and knee joint angle prediction for remote monitoring of rehabilitation progress from sEMG. IEEE J. Transl. Eng. Health Med. 8, 1\u201310 (2020)","journal-title":"IEEE J. Transl. Eng. Health Med."},{"key":"1534_CR11","doi-asserted-by":"publisher","first-page":"602","DOI":"10.1016\/j.neunet.2005.06.042","volume":"18","author":"A Graves","year":"2005","unstructured":"A. Graves, J. Schmidhuber, Framewise phoneme classification with bidirectional LSTM and other neural network architectures. Neural netw. 18, 602\u2013610 (2005)","journal-title":"Neural netw."},{"key":"1534_CR12","unstructured":"A. Graves, G. Wayne, I. Danihelka. Neural turing machines. arXiv preprint arXiv:1410.5401"},{"key":"1534_CR13","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1145\/3007787.3001163","volume":"44","author":"S Han","year":"2016","unstructured":"S. Han, X. Liu, H. Mao, J. Pu, A. Pedram, M.A. Horowitz, W.J. Dally, EIE: efficient inference engine on compressed deep neural network. ACM SIGARCH Comput. Archit. News 44, 243\u2013254 (2016)","journal-title":"ACM SIGARCH Comput. Archit. News"},{"key":"1534_CR14","unstructured":"S. Han, H. Mao, W.J. Dally. Deep compression: compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149"},{"key":"1534_CR15","unstructured":"D. Huang, X. Zhang, R. Zhang, T. Zhi, D. He, J. Guo, C. Liu, Q. Guo, Z. Du, S. Liu et al. DWM: a decomposable winograd method for convolution acceleration. arXiv preprint arXiv:2002.00552"},{"key":"1534_CR16","doi-asserted-by":"publisher","first-page":"274","DOI":"10.1109\/TCSI.2018.2848647","volume":"66","author":"A Jafari","year":"2018","unstructured":"A. Jafari, A. Ganesan, C.S.K. Thalisetty, V. Sivasubramanian, T. Oates, T. Mohsenin, Sensornet: a scalable and low-power deep convolutional neural network for multimodal data classification. IEEE Trans. Circuits Syst. I Regul. Pap. 66, 274\u2013287 (2018)","journal-title":"IEEE Trans. Circuits Syst. I Regul. Pap."},{"key":"1534_CR17","unstructured":"A. Krizhevsky, I. Sutskever, G.E. Hinton. Imagenet classification with deep convolutional neural networks. In Advances in Neural Information Processing Systems (2012), pp. 1097\u20131105"},{"key":"1534_CR18","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"Y. LeCun, Y. Bengio, G. Hinton, Deep learning. Nature 521, 436\u2013444 (2015)","journal-title":"Nature"},{"key":"1534_CR19","doi-asserted-by":"publisher","first-page":"1908","DOI":"10.1109\/TCSI.2018.2885953","volume":"66","author":"C-T Liu","year":"2018","unstructured":"C.-T. Liu, T.-W. Lin, Y.-H. Wu, Y.-S. Lin, H. Lee, Y. Tsao, S.-Y. Chien, Computation-performance optimization of convolutional neural networks with redundant filter removal. IEEE Trans. Circuits Syst. I Regul. Pap. 66, 1908\u20131921 (2018)","journal-title":"IEEE Trans. Circuits Syst. I Regul. Pap."},{"key":"1534_CR20","doi-asserted-by":"publisher","first-page":"1354","DOI":"10.1109\/TVLSI.2018.2815603","volume":"26","author":"Y Ma","year":"2018","unstructured":"Y. Ma, Y. Cao, S. Vrudhula, J. Seo, Optimizing the convolution operation to accelerate deep neural networks on FPGA. IEEE Trans. Very Large Scale Integr. (VLSI) Syst. 26, 1354\u20131367 (2018)","journal-title":"IEEE Trans. Very Large Scale Integr. (VLSI) Syst."},{"key":"1534_CR21","doi-asserted-by":"crossref","unstructured":"P. Meloni, G. Deriu, F. Conti, I. Loi, L. Raffo, L. Benini, A high-efficiency runtime reconfigurable IP for CNN acceleration on a mid-range all-programmable SoC. In 2016 International Conference on ReConFigurable Computing and FPGAs (ReConFig) (IEEE, 2016), pp. 1\u20138","DOI":"10.1109\/ReConFig.2016.7857144"},{"key":"1534_CR22","unstructured":"D. Miyashita, E.H. Lee, B. Murmann, Convolutional neural networks using logarithmic data representation. arXiv preprint arXiv:1603.01025"},{"key":"1534_CR23","doi-asserted-by":"publisher","first-page":"1861","DOI":"10.1109\/TVLSI.2019.2905242","volume":"27","author":"DT Nguyen","year":"2019","unstructured":"D.T. Nguyen, T.N. Nguyen, H. Kim, H.-J. Lee, A high-throughput and power-efficient FPGA implementation of YOLO CNN for object detection. IEEE Trans. Very Large Scale Integr. (VLSI) Syst. 27, 1861\u20131873 (2019)","journal-title":"Very Large Scale Integr. (VLSI) Syst."},{"key":"1534_CR24","first-page":"1","volume":"2","author":"K Ovtcharov","year":"2015","unstructured":"K. Ovtcharov, O. Ruwase, J.-Y. Kim, J. Fowers, K. Strauss, E.S. Chung, Accelerating deep convolutional neural networks using specialized hardware. Microsoft Res. Whitepaper 2, 1\u20134 (2015)","journal-title":"Microsoft Res. Whitepaper"},{"key":"1534_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3005448","volume":"13","author":"A Page","year":"2017","unstructured":"A. Page, A. Jafari, C. Shea, T. Mohsenin, Sparcnet: a hardware accelerator for efficient deployment of sparse convolutional networks. ACM J. Emerg. Technol. Comput. Syst. (JETC) 13, 1\u201332 (2017)","journal-title":"ACM J. Emerg. Technol. Comput. Syst. (JETC)"},{"key":"1534_CR26","doi-asserted-by":"crossref","unstructured":"M. Panwar, S.R. Dyuthi, K.C. Prakash, D. Biswas, A. Acharyya, K. Maharatna, A. Gautam, G.R. Naik, CNN based approach for activity recognition using a wrist-worn accelerometer. In 2017 39th Annual International Conference of the IEEE Engineering in Medicine and Biology Society (EMBC) (IEEE, 2017), pp. 2438\u20132441","DOI":"10.1109\/EMBC.2017.8037349"},{"key":"1534_CR27","doi-asserted-by":"crossref","unstructured":"M. Panwar, J. Padmini, A. Acharyya, D. Biswas, et al., Modified distributed arithmetic based low complexity CNN architecture design methodology. In 2017 European Conference on Circuit Theory and Design (ECCTD) (IEEE, 2017), pp. 1\u20134","DOI":"10.1109\/ECCTD.2017.8093254"},{"key":"1534_CR28","doi-asserted-by":"publisher","first-page":"3026","DOI":"10.1109\/TBME.2019.2899927","volume":"66","author":"M Panwar","year":"2019","unstructured":"M. Panwar, D. Biswas, H. Bajaj, M. J\u00f6bges, R. Turk, K. Maharatna, A. Acharyya, Rehab-net: deep learning framework for arm movement classification using wearable sensors for stroke rehabilitation. IEEE Trans. Biomed. Eng. 66, 3026\u20133037 (2019)","journal-title":"IEEE Trans. Biomed. Eng."},{"key":"1534_CR29","doi-asserted-by":"publisher","first-page":"212","DOI":"10.1109\/TCSI.2019.2948791","volume":"67","author":"D Ray","year":"2019","unstructured":"D. Ray, N.V. George, P.K. Meher, An analytical framework and approximation strategy for efficient implementation of distributed arithmetic-based inner-product architectures. IEEE Trans. Circuits Syst. I Regul. Pap. 67, 212\u2013224 (2019)","journal-title":"IEEE Trans. Circuits Syst. I Regul. Pap."},{"key":"1534_CR30","doi-asserted-by":"publisher","first-page":"796","DOI":"10.1109\/JETCAS.2018.2835809","volume":"8","author":"SS Sarwar","year":"2018","unstructured":"S.S. Sarwar, G. Srinivasan, B. Han, P. Wijesinghe, A. Jaiswal, P. Panda, A. Raghunathan, K. Roy, Energy efficient neural computing: a study of cross-layer approximations. IEEE J. Emerg. Sel. Top. Circuits Syst. 8, 796\u2013809 (2018)","journal-title":"IEEE J. Emerg. Sel. Top. Circuits Syst."},{"key":"1534_CR31","doi-asserted-by":"crossref","unstructured":"Y. Shen, M. Ferdman, P. Milder, Maximizing CNN accelerator efficiency through resource partitioning. In 2017 ACM\/IEEE 44th Annual International Symposium on Computer Architecture (ISCA) (IEEE, 2017), pp. 535\u2013547","DOI":"10.1145\/3079856.3080221"},{"key":"1534_CR32","doi-asserted-by":"publisher","first-page":"102991","DOI":"10.1016\/j.micpro.2020.102991","volume":"73","author":"RJ Struharik","year":"2020","unstructured":"R.J. Struharik, B.Z. Vukobratovi\u0107, A.M. Erdeljan, D.M. Rakanovi\u0107, CoNNa\u2013hardware accelerator for compressed convolutional neural networks. Microprocess. Microsyst. 73, 102991 (2020)","journal-title":"Microprocess. Microsyst."},{"key":"1534_CR33","doi-asserted-by":"crossref","unstructured":"N. Suda, V. Chandra, G. Dasika, A. Mohanty, Y. Ma, S. Vrudhula, J. Seo, Y. Cao. Throughput-optimized OpenCL-based FPGA accelerator for largescale convolutional neural networks. In Proceedings of the 2016 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays (2016), pp. 16\u201325","DOI":"10.1145\/2847263.2847276"},{"key":"1534_CR34","doi-asserted-by":"publisher","first-page":"2220","DOI":"10.1109\/TVLSI.2017.2688340","volume":"25","author":"F Tu","year":"2017","unstructured":"F. Tu, S. Yin, P. Ouyang, S. Tang, L. Liu, S. Wei, Deep convolutional neural network architecture with reconfigurable computation patterns. IEEE Trans. Very Large Scale Integr. (VLSI) Syst. 25, 2220\u20132233 (2017)","journal-title":"IEEE Trans. Very Large Scale Integr. (VLSI) Syst."},{"key":"1534_CR35","first-page":"513","volume":"36","author":"C Wang","year":"2016","unstructured":"C. Wang, L. Gong, Q. Yu, X. Li, Y. Xie, X. Zhou, DLAU: a scalable deep learning accelerator unit on FPGA. IEEE Trans. Comput. Aided Des. Integr. Circuits Syst. 36, 513\u2013517 (2016)","journal-title":"IEEE Trans. Comput. Aided Des. Integr. Circuits Syst."},{"key":"1534_CR36","doi-asserted-by":"publisher","first-page":"1941","DOI":"10.1109\/TCSI.2017.2767204","volume":"65","author":"J Wang","year":"2017","unstructured":"J. Wang, J. Lin, Z. Wang, Efficient hardware architectures for deep convolutional neural network. IEEE Trans. Circuits Syst. I Regul. Pap. 65, 1941\u20131953 (2017)","journal-title":"IEEE Trans. Circuits Syst. I Regul. Pap."},{"key":"1534_CR37","doi-asserted-by":"publisher","first-page":"280","DOI":"10.1109\/TVLSI.2017.2767624","volume":"26","author":"Y Wang","year":"2017","unstructured":"Y. Wang, J. Lin, Z. Wang, An energy-efficient architecture for binary weight convolutional neural networks. IEEE Trans. Very Large Scale Integr. (VLSI) Syst. 26, 280\u2013293 (2017)","journal-title":"IEEE Trans. Very Large Scale Integr. (VLSI) Syst."},{"key":"1534_CR38","doi-asserted-by":"publisher","first-page":"288","DOI":"10.1109\/TCSI.2018.2856624","volume":"66","author":"Y Wang","year":"2018","unstructured":"Y. Wang, J. Lin, Z. Wang, Fpap: a folded architecture for energy-quality scalable convolutional neural networks. IEEE Trans. Circuits Syst. I Regul. Pap. 66, 288\u2013301 (2018)","journal-title":"IEEE Trans. Circuits Syst. I Regul. Pap."},{"key":"1534_CR39","doi-asserted-by":"crossref","unstructured":"S. Wang, D. Zhou, X. Han, T. Yoshimura, Chain-NN: an energy-efficient 1D chain architecture for accelerating deep convolutional neural networks. In Design, Automation & Test in Europe Conference & Exhibition (DATE) (IEEE, 2017), pp. 1032\u20131037","DOI":"10.23919\/DATE.2017.7927142"},{"key":"1534_CR40","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1109\/53.29648","volume":"6","author":"SA White","year":"1989","unstructured":"S.A. White, Applications of distributed arithmetic to digital signal processing: a tutorial review. IEEE ASSP Mag. 6, 4\u201319 (1989)","journal-title":"IEEE ASSP Mag."},{"key":"1534_CR41","unstructured":"C. Wu, M. Wang, X. Chu, K. Wang, and L. He. Low Precision Floating-point Arithmetic for High Performance FPGA-based CNN Acceleration. arXiv preprint arXiv:2003.03852"},{"key":"1534_CR42","doi-asserted-by":"crossref","unstructured":"C. Zhang, P. Li, G. Sun, Y. Guan, B. Xiao, J. Cong, Optimizing FPGA-based accelerator design for deep convolutional neural networks. In Proceedings of the 2015 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays (2015), pp. 161\u2013170","DOI":"10.1145\/2684746.2689060"},{"key":"1534_CR43","doi-asserted-by":"crossref","unstructured":"R. Zhao, W. Song, W. Zhang, T. Xing, J.-H. Lin, M. Srivastava, R. Gupta, Z. Zhang. Accelerating binarized convolutional neural networks with software programmable fpgas. In Proceedings of the 2017 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays (2017), pp. 15\u201324","DOI":"10.1145\/3020078.3021741"},{"key":"1534_CR44","unstructured":"C. Zhu, K. Huang, S. Yang, Z. Zhu, H. Zhang, H. Shen, An efficient hardware accelerator for structured sparse convolutional neural networks on FPGAs. arXiv preprint arXiv:2001.01955"},{"key":"1534_CR45","doi-asserted-by":"crossref","unstructured":"J. Zhu, Z. Qian, C.-Y. Tsui. BHNN: a memory-efficient accelerator for compressing deep neural networks with blocked hashing techniques. In 2017 22nd Asia and South Pacific Design Automation Conference (ASP-DAC) (IEEE, 2017), pp. 690\u2013695","DOI":"10.1109\/ASPDAC.2017.7858404"}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-020-01534-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00034-020-01534-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-020-01534-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,9,6]],"date-time":"2021-09-06T23:48:27Z","timestamp":1630972107000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00034-020-01534-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,9,7]]},"references-count":45,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2021,3]]}},"alternative-id":["1534"],"URL":"https:\/\/doi.org\/10.1007\/s00034-020-01534-3","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"type":"print","value":"0278-081X"},{"type":"electronic","value":"1531-5878"}],"subject":[],"published":{"date-parts":[[2020,9,7]]},"assertion":[{"value":"14 January 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 August 2020","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 August 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 September 2020","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}