{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,13]],"date-time":"2025-06-13T05:47:50Z","timestamp":1749793670655,"version":"3.37.3"},"reference-count":101,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T00:00:00Z","timestamp":1641945600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T00:00:00Z","timestamp":1641945600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100014718","name":"Innovative Research Group Project of the National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61836010"],"award-info":[{"award-number":["61836010"]}],"id":[{"id":"10.13039\/100014718","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Des Autom Embed Syst"],"published-print":{"date-parts":[[2022,3]]},"DOI":"10.1007\/s10617-021-09256-8","type":"journal-article","created":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T00:02:41Z","timestamp":1641945761000},"page":"1-27","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["New paradigm of FPGA-based computational intelligence from surveying the implementation of DNN accelerators"],"prefix":"10.1007","volume":"26","author":[{"given":"Yang","family":"You","sequence":"first","affiliation":[]},{"given":"Yinghui","family":"Chang","sequence":"additional","affiliation":[]},{"given":"Weikang","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Bingrui","family":"Guo","sequence":"additional","affiliation":[]},{"given":"Hongyin","family":"Luo","sequence":"additional","affiliation":[]},{"given":"Xiaojie","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Bijing","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Kairong","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Shan","family":"He","sequence":"additional","affiliation":[]},{"given":"Lin","family":"Li","sequence":"additional","affiliation":[]},{"given":"Donghui","family":"Guo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,1,12]]},"reference":[{"key":"9256_CR1","unstructured":"Abadi M, Agarwal A, Barham P, Brevdo E, Chen Z, Citro C, Corrado GS, Davis A, Dean J, Devin M (2016) Tensorflow: large-scale machine learning on heterogeneous distributed systems. arXiv preprint arXiv:1603.04467"},{"issue":"3S","key":"9256_CR2","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1145\/2872887.2750397","volume":"43","author":"B Akin","year":"2015","unstructured":"Akin B, Franchetti F, Hoe JC (2015) Data reorganization in memory using 3D-stacked dram. ACM SIGARCH Comput Archit News 43(3S):131\u2013143","journal-title":"ACM SIGARCH Comput Archit News"},{"issue":"4","key":"9256_CR3","doi-asserted-by":"publisher","first-page":"439","DOI":"10.1109\/Tcsvt.2008.918775","volume":"18","author":"A Beric","year":"2008","unstructured":"Beric A, van Meerbergen J, de Haan G, Sethuraman R (2008) Memory-centric video processing. IEEE Trans Circuits Syst Video Technol 18(4):439\u2013452. https:\/\/doi.org\/10.1109\/Tcsvt.2008.918775","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"9256_CR4","doi-asserted-by":"publisher","unstructured":"Beyls K, D\u2019Hollander EH (2009) Refactoring for data locality. Computer 42(2):62\u201371. https:\/\/doi.org\/10.1109\/Mc.2009.57","DOI":"10.1109\/Mc.2009.57"},{"key":"9256_CR5","doi-asserted-by":"crossref","unstructured":"Boo Y, Sung W (2017) Structured sparse ternary weight coding of deep neural networks for efficient hardware implementations. In: 2017 IEEE international workshop on signal processing systems (SIPS)","DOI":"10.1109\/SiPS.2017.8110021"},{"key":"9256_CR6","doi-asserted-by":"publisher","unstructured":"Cadambi S, Durdanovic I, Jakkula V, Sankaradass M, Cosatto E, Chakradhar S, Graf HP (2009) A massively parallel FPGA-based coprocessor for support vector machines. In: Proceedings of the 2009 17th IEEE symposium on field programmable custom computing machines, pp 115\u2013122. https:\/\/doi.org\/10.1109\/Fccm.2009.34","DOI":"10.1109\/Fccm.2009.34"},{"key":"9256_CR7","doi-asserted-by":"publisher","first-page":"2273","DOI":"10.1109\/Access.2018.2886876","volume":"7","author":"J Chang","year":"2019","unstructured":"Chang J, Sha J (2019) Prune deep neural networks with the modified l-1\/2 penalty. IEEE Access 7:2273\u20132280. https:\/\/doi.org\/10.1109\/Access.2018.2886876","journal-title":"IEEE Access"},{"key":"9256_CR8","doi-asserted-by":"publisher","unstructured":"Chang YJ, Tsai KL, Cheng YC (2020) Data retention based low leakage power TCAM for network packet routing. IEEE Trans Circuits Syst II Express Briefs 1. https:\/\/doi.org\/10.1109\/TCSII.2020.3014154","DOI":"10.1109\/TCSII.2020.3014154"},{"issue":"12","key":"9256_CR9","doi-asserted-by":"publisher","first-page":"2896","DOI":"10.1109\/TPDS.2020.3006238","volume":"31","author":"G Chen","year":"2020","unstructured":"Chen G, Meng H, Liang Y, Huang K (2020) GPU-accelerated real-time stereo estimation with binary neural network. IEEE Trans Parallel Distrib Syst 31(12):2896\u20132907","journal-title":"IEEE Trans Parallel Distrib Syst"},{"key":"9256_CR10","doi-asserted-by":"publisher","first-page":"269","DOI":"10.1145\/2644865.2541967","volume":"49","author":"T Chen","year":"2014","unstructured":"Chen T, Du Z, Sun N, Wang J, Wu C, Chen Y, Temam O (2014) Diannao: a small-footprint high-throughput accelerator for ubiquitous machine-learning. ACM SIGPLAN Not 49:269\u2013284","journal-title":"ACM SIGPLAN Not"},{"issue":"11","key":"9256_CR11","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1145\/2996864","volume":"59","author":"Y Chen","year":"2016","unstructured":"Chen Y, Chen T, Xu Z, Sun N, Temam O (2016) Diannao family. Commun ACM 59(11):105\u2013112. https:\/\/doi.org\/10.1145\/2996864","journal-title":"Commun ACM"},{"key":"9256_CR12","doi-asserted-by":"crossref","unstructured":"Chen Y, Luo T, Liu S, Zhang S, He L, Wang J, Li L, Chen T, Xu Z, Sun N et al (2014) Dadiannao: a machine-learning supercomputer. In: Proceedings of the 47th annual IEEE\/ACM international symposium on microarchitecture, pp 609\u2013622. IEEE Computer Society","DOI":"10.1109\/MICRO.2014.58"},{"issue":"1","key":"9256_CR13","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1109\/jssc.2016.2616357","volume":"52","author":"YH Chen","year":"2017","unstructured":"Chen YH, Krishna T, Emer JS, Sze V (2017) Eyeriss: an energy-efficient reconfigurable accelerator for deep convolutional neural networks. IEEE J Solid-State Circuits 52(1):127\u2013138. https:\/\/doi.org\/10.1109\/jssc.2016.2616357","journal-title":"IEEE J Solid-State Circuits"},{"issue":"10","key":"9256_CR14","doi-asserted-by":"publisher","first-page":"4730","DOI":"10.1109\/TNNLS.2017.2774288","volume":"29","author":"J Cheng","year":"2018","unstructured":"Cheng J, Wu J, Leng C, Wang Y, Hu Q (2018) Quantized CNN: a unified approach to accelerate and compress convolutional networks. IEEE Trans Neural Netw Learn Syst 29(10):4730\u20134743. https:\/\/doi.org\/10.1109\/TNNLS.2017.2774288","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"9256_CR15","doi-asserted-by":"crossref","unstructured":"Chi P, Li S, Xu C, Zhang T, Zhao J, Liu Y, Wang Y, Xie Y (2016) Prime: a novel processing-in-memory architecture for neural network computation in reram-based main memory. In: 2016 ACM\/IEEE 43rd annual international symposium on computer architecture (ISCA), vol 3, pp 27\u201339. IEEE Press","DOI":"10.1145\/3007787.3001140"},{"key":"9256_CR16","doi-asserted-by":"crossref","unstructured":"Cloutier J, Cosatto E, Pigeon S, Boyer FR, Simard PY (1996) VIP: an FPGA-based processor for image processing and neural networks. In: Proceedings of fifth international conference on microelectronics for neural networks, pp 330\u2013336. IEEE","DOI":"10.1109\/MNNFS.1996.493811"},{"key":"9256_CR17","doi-asserted-by":"crossref","unstructured":"Deng L, Li J, Huang JT, Yao K, Yu D, Seide F, Seltzer M, Zweig G, He X, Williams J (2013) Recent advances in deep learning for speech research at Microsoft. In: 2013 IEEE international conference on acoustics, speech and signal processing, pp 8604\u20138608. IEEE","DOI":"10.1109\/ICASSP.2013.6639345"},{"key":"9256_CR18","doi-asserted-by":"crossref","unstructured":"Dong H, Jiang L, Li TJ, Liang XY (2018) A systematic FPGA acceleration design for applications based on convolutional neural networks. In: Advances in materials, machinery, electronics II, vol 1955","DOI":"10.1063\/1.5033781"},{"key":"9256_CR19","doi-asserted-by":"publisher","unstructured":"Du ZD, Fasthuber R, Chen TS, Ienne P, Li L, Luo T, Feng, XB, Chen YJ, Temam O (2015) Shidiannao: shifting vision processing closer to the sensor. In: 2015 ACM\/IEEE 42nd annual international symposium on computer architecture (ISCA), pp 92\u2013104. https:\/\/doi.org\/10.1145\/2749469.2750389","DOI":"10.1145\/2749469.2750389"},{"issue":"7639","key":"9256_CR20","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1038\/nature21056","volume":"542","author":"A Esteva","year":"2017","unstructured":"Esteva A, Kuprel B, Novoa RA, Ko J, Swetter SM, Blau HM, Thrun S (2017) Dermatologist-level classification of skin cancer with deep neural networks. Nature 542(7639):115\u2013118. https:\/\/doi.org\/10.1038\/nature21056","journal-title":"Nature"},{"key":"9256_CR21","doi-asserted-by":"publisher","unstructured":"Farabet C, Poulet C, Han JY, Lecun Y (2009) CNP: an FPGA-based processor for convolutional networks. In: FPL: 2009 international conference on field programmable logic and applications p 32. https:\/\/doi.org\/10.1109\/Fpl.2009.5272559","DOI":"10.1109\/Fpl.2009.5272559"},{"issue":"1","key":"9256_CR22","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1109\/LCA.2014.2333735","volume":"14","author":"A Farmahini-Farahani","year":"2014","unstructured":"Farmahini-Farahani A, Ahn JH, Morrow K, Kim NS (2014) Drama: an architecture for accelerated processing near memory. IEEE Comput Archit Lett 14(1):26\u201329","journal-title":"IEEE Comput Archit Lett"},{"key":"9256_CR23","doi-asserted-by":"crossref","unstructured":"Finker R, del Campo I, Echanobe J, Doctor F (2013) Multilevel adaptive neural network architecture for implementing single-chip intelligent agents on FPGAs. In: 2013 international joint conference on neural networks (IJCNN)","DOI":"10.1109\/IJCNN.2013.6706760"},{"key":"9256_CR24","doi-asserted-by":"crossref","unstructured":"Foucher C, Muller F, Giulieri A (2012) Fast integration of hardware accelerators for dynamically reconfigurable architecture. In: 2012 7th international workshop on reconfigurable and communication-centric systems-on-chip (RECOSOC)","DOI":"10.1109\/ReCoSoC.2012.6322902"},{"issue":"9","key":"9256_CR25","doi-asserted-by":"publisher","first-page":"4224","DOI":"10.1109\/Tii.2018.2822828","volume":"14","author":"HB Gao","year":"2018","unstructured":"Gao HB, Cheng B, Wang JQ, Li KQ, Zhao JH, Li DY (2018) Object classification using CNN-based fusion of vision and lidar in autonomous vehicle environment. IEEE Trans Ind Inform 14(9):4224\u20134231. https:\/\/doi.org\/10.1109\/Tii.2018.2822828","journal-title":"IEEE Trans Ind Inform"},{"key":"9256_CR26","doi-asserted-by":"crossref","unstructured":"Geng T, Wang T, Sanaullah A, Yang C, Patel R, Herbordt M (2018) A framework for acceleration of CNN training on deeply-pipelined FPGA clusters with work and weight load balancing. In: 2018 28th international conference on field programmable logic and applications (FPL), pp 394\u20133944","DOI":"10.1109\/FPL.2018.00074"},{"key":"9256_CR27","doi-asserted-by":"crossref","unstructured":"Gokhale V, Jin J, Dundar A, Martini B, Culurciello E (2014) A 240 g-ops\/s mobile coprocessor for deep neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition workshops, pp 682\u2013687","DOI":"10.1109\/CVPRW.2014.106"},{"key":"9256_CR28","doi-asserted-by":"crossref","unstructured":"Graves A, Mohamed AR, Hinton G (2013) Speech recognition with deep recurrent neural networks. In: 2013 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp 6645\u20136649","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"9256_CR29","doi-asserted-by":"publisher","first-page":"767","DOI":"10.1007\/978-981-10-3229-5","volume":"423","author":"HN Guo","year":"2018","unstructured":"Guo HN, Ren XD, Li SH (2018) A new pruning method to train deep neural networks. Commun Signal Process Syst 423:767\u2013775. https:\/\/doi.org\/10.1007\/978-981-10-3229-5","journal-title":"Commun Signal Process Syst"},{"issue":"1","key":"9256_CR30","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1109\/tcad.2017.2705069","volume":"37","author":"K Guo","year":"2018","unstructured":"Guo K, Sui L, Qiu J, Yu J, Wang J, Yao S, Han S, Wang Y, Yang H (2018) Angel-eye: a complete design flow for mapping CNN onto embedded FPGA. IEEE Trans Comput Aided Des Integr Circuits Syst 37(1):35\u201347. https:\/\/doi.org\/10.1109\/tcad.2017.2705069","journal-title":"IEEE Trans Comput Aided Des Integr Circuits Syst"},{"key":"9256_CR31","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1016\/j.neucom.2018.04.077","volume":"308","author":"Z Hajduk","year":"2018","unstructured":"Hajduk Z (2018) Reconfigurable FPGA implementation of neural networks. Neurocomputing 308:227\u2013234. https:\/\/doi.org\/10.1016\/j.neucom.2018.04.077","journal-title":"Neurocomputing"},{"key":"9256_CR32","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1016\/j.image.2018.07.007","volume":"68","author":"A HajiRassouliha","year":"2018","unstructured":"HajiRassouliha A, Taberner AJ, Nash MP, Nielsen PMF (2018) Suitability of recent hardware accelerators (DSPs, FPGAs, and GPUs) for computer vision and image processing algorithms. Signal Process Image Commun 68:101\u2013119. https:\/\/doi.org\/10.1016\/j.image.2018.07.007","journal-title":"Signal Process Image Commun"},{"key":"9256_CR33","doi-asserted-by":"publisher","unstructured":"Han S, Kang JL, Mao HZ, Hu YM, Li X, Li YB, Xie DL, Luo H, Yao S, Wang Y, Yang HZ, Dally WJ (2017) ESE: efficient speech recognition engine with sparse LSTM on FPGA. In: FPGA\u201917: proceedings of the 2017 ACM\/SIGDA international symposium on field-programmable gate arrays, pp 75\u201384. https:\/\/doi.org\/10.1145\/3020078.3021745","DOI":"10.1145\/3020078.3021745"},{"key":"9256_CR34","doi-asserted-by":"publisher","unstructured":"Han S, Liu X, Mao H, Pu J, Pedram A, Horowitz M, Dally W (2016) EIE: efficient inference engine on compressed deep neural network. 2016 ACM\/IEEE 43rd annual international symposium on computer architecture (ISCA). pp 243\u2013254. https:\/\/doi.org\/10.1109\/ISCA.2016.30","DOI":"10.1109\/ISCA.2016.30"},{"key":"9256_CR35","unstructured":"Han S, Pool J, Tran J, Dally WJ (2015) Learning both weights and connections for efficient neural networks. In: Advances in neural information processing systems 28 (NIPS 2015), vol 28"},{"key":"9256_CR36","unstructured":"Hennessy JL, Patterson DA (2018) A new golden age for computer architecture: domain-specific hardware\/software co-design, enhanced security, open instruction sets, and agile chip development. Turing lecture at international symposium on computer architecture (ISCA\u201918), Los Angles, USA"},{"key":"9256_CR37","doi-asserted-by":"crossref","unstructured":"Horowitz M (2014) Computing\u2019s energy problem (and what we can do about it). In: 2014 IEEE international solid-state circuits conference digest of technical papers (ISSCC), vol 57, pp 10\u201314","DOI":"10.1109\/ISSCC.2014.6757323"},{"key":"9256_CR38","doi-asserted-by":"crossref","unstructured":"Hsien-De\u00a0Huang T, Yu CM, Kao HY (2017) Data-driven and deep learning methodology for deceptive advertising and phone scams detection. In: 2017 conference on technologies and applications of artificial intelligence (TAAI), pp 166\u2013171","DOI":"10.1109\/TAAI.2017.30"},{"key":"9256_CR39","doi-asserted-by":"publisher","first-page":"96060","DOI":"10.1109\/ACCESS.2019.2927108","volume":"7","author":"M Irfan","year":"2019","unstructured":"Irfan M, Ullah Z, Cheung RCC (2019) D-TCAM: a high-performance distributed RAM based TCAM architecture on FPGAs. IEEE Access 7:96060\u201396069","journal-title":"IEEE Access"},{"issue":"4","key":"9256_CR40","doi-asserted-by":"publisher","first-page":"491","DOI":"10.1007\/s10462-012-9321-7","volume":"41","author":"N Izeboudjen","year":"2012","unstructured":"Izeboudjen N, Larbes C, Farah A (2012) A new classification approach for neural networks hardware: from standards chips to embedded systems on chip. Artif Intell Rev 41(4):491\u2013534. https:\/\/doi.org\/10.1007\/s10462-012-9321-7","journal-title":"Artif Intell Rev"},{"key":"9256_CR41","doi-asserted-by":"publisher","unstructured":"Jaki Z, Cadenelli N, Prats DB, Polo J, Perez DC (2019) A highly parameterizable framework for conditional restricted Boltzmann machine based workloads accelerated with FPGAs and OPENCL. Future Gener Comput Syst 104:201\u2013211. https:\/\/doi.org\/10.1016\/j.future.2019.10.025","DOI":"10.1016\/j.future.2019.10.025"},{"key":"9256_CR42","doi-asserted-by":"crossref","unstructured":"Jia Y, Shelhamer E, Donahue J, Karayev S, Long J, Girshick R, Guadarrama S, Darrell T (2014) Caffe: convolutional architecture for fast feature embedding. In: MM 2014\u2014proceedings of the 2014 ACM conference on multimedia","DOI":"10.1145\/2647868.2654889"},{"key":"9256_CR43","doi-asserted-by":"publisher","first-page":"101775","DOI":"10.1016\/j.sysarc.2020.101775","volume":"110","author":"W Jiang","year":"2020","unstructured":"Jiang W, Song Z, Zhan J, He Z, Jiang K (2020) Optimized co-scheduling of mixed-precision neural network accelerator for real-time multitasking applications. J Syst Archit 110:101775","journal-title":"J Syst Archit"},{"issue":"2","key":"9256_CR44","doi-asserted-by":"publisher","first-page":"352","DOI":"10.1109\/Tpami.2017.2670560","volume":"40","author":"YG Jiang","year":"2018","unstructured":"Jiang YG, Wu ZX, Wang J, Xue XY, Chang SF (2018) Exploiting feature and class relationships in video categorization with regularized deep neural networks. IEEE Trans Pattern Anal Mach Intell 40(2):352\u2013364. https:\/\/doi.org\/10.1109\/Tpami.2017.2670560","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"9256_CR45","doi-asserted-by":"crossref","unstructured":"Jiao L, Luo C, Cao W, Zhou X, Wang L (2017) Accelerating low bit-width convolutional neural networks with embedded FPGA. In: Santambrogio M, Gohringer D, Stroobandt D, Mentens N, Nurmi J (eds) 2017 27th international conference on field programmable logic and applications (FPL), pp 1\u20134","DOI":"10.23919\/FPL.2017.8056820"},{"key":"9256_CR46","unstructured":"Jouppi NP, Young C, Patil N, Patterson D, Agrawal G, Bajwa R, Bates S, Bhatia S, Boden N, Borchers A et\u00a0al (2017) In-datacenter performance analysis of a tensor processing unit. In: 2017 ACM\/IEEE 44th annual international symposium on computer architecture (ISCA), pp 1\u201312. IEEE"},{"issue":"6","key":"9256_CR47","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2017) Imagenet classification with deep convolutional neural networks. Commun ACM 60(6):84\u201390. https:\/\/doi.org\/10.1145\/3065386","journal-title":"Commun ACM"},{"key":"9256_CR48","doi-asserted-by":"crossref","unstructured":"Kwon H, Samajdar A, Krishna T (2018) Maeri: enabling flexible dataflow mapping over DNN accelerators via programmable interconnects. In: Proceedings of the 23rd international conference on architectural support for programming languages and operating systems, pp 461\u2013475","DOI":"10.1145\/3296957.3173176"},{"key":"9256_CR49","doi-asserted-by":"publisher","unstructured":"Lebedev V, Lempitsky V (2016) Fast convnets using group-wise brain damage. In: 2016 IEEE conference on computer vision and pattern recognition (CVPR), pp 2554\u20132564. https:\/\/doi.org\/10.1109\/Cvpr.2016.280","DOI":"10.1109\/Cvpr.2016.280"},{"issue":"7553","key":"9256_CR50","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun Y, Bengio Y, Hinton G (2015) Deep learning. Nature 521(7553):436\u201344. https:\/\/doi.org\/10.1038\/nature14539","journal-title":"Nature"},{"key":"9256_CR51","unstructured":"LeCun Y, Denker JS, Solla SA (2000) Optimal brain damage. In: Advances in neural information processing systems, vol 2, pp 598\u2013605"},{"key":"9256_CR52","doi-asserted-by":"publisher","unstructured":"Li HM, Fan XT, Jiao L, Cao W, Zhou XG, Wang LL (2016) A high performance FPGA-based accelerator for large-scale convolutional neural networks. In: 2016 26th international conference on field programmable logic and applications (FPL). https:\/\/doi.org\/10.1109\/Fpl.2016.7577308","DOI":"10.1109\/Fpl.2016.7577308"},{"issue":"4","key":"9256_CR53","doi-asserted-by":"publisher","first-page":"1203","DOI":"10.1587\/transinf.2017EDL8248","volume":"E101d","author":"LQ Li","year":"2018","unstructured":"Li LQ, Xu YH, Zhu J (2018) Filter level pruning based on similar feature extraction for convolutional neural networks. IEICE Trans Inf Syst E101d(4):1203\u20131206. https:\/\/doi.org\/10.1587\/transinf.2017EDL8248","journal-title":"IEICE Trans Inf Syst"},{"key":"9256_CR54","doi-asserted-by":"crossref","unstructured":"Li X, Cai Y, Han J, Zeng X (2017) A high utilization FPGA-based accelerator for variable-scale convolutional neural network. In: 2017 IEEE 12th international conference on ASIC (ASICON), pp 944\u2013947. IEEE","DOI":"10.1109\/ASICON.2017.8252633"},{"key":"9256_CR55","doi-asserted-by":"publisher","first-page":"1072","DOI":"10.1016\/j.neucom.2017.09.046","volume":"275","author":"S Liang","year":"2018","unstructured":"Liang S, Yin S, Liu L, Luk W, Wei S (2018) FP-BNN: binarized neural network on FPGA. Neurocomputing 275:1072\u20131086. https:\/\/doi.org\/10.1016\/j.neucom.2017.09.046","journal-title":"Neurocomputing"},{"issue":"99","key":"9256_CR56","first-page":"1","volume":"PP","author":"W Liu","year":"2020","unstructured":"Liu W, Lin J, Wang Z (2020) A precision-scalable energy-efficient convolutional neural network accelerator. IEEE Trans Circuits Syst I Regul Pap PP(99):1\u201314","journal-title":"IEEE Trans Circuits Syst I Regul Pap"},{"key":"9256_CR57","unstructured":"Lu HY, Wang M, Foroosh H, Tappen M, Penksy M (2015) Sparse convolutional neural networks. In: 2015 IEEE conference on computer vision and pattern recognition (CVPR), pp 806\u2013814"},{"issue":"1","key":"9256_CR58","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1109\/tc.2016.2574353","volume":"66","author":"T Luo","year":"2017","unstructured":"Luo T, Liu S, Li L, Wang Y, Zhang S, Chen T, Xu Z, Temam O, Chen Y (2017) Dadiannao: a neural network supercomputer. IEEE Trans Comput 66(1):73\u201388. https:\/\/doi.org\/10.1109\/tc.2016.2574353","journal-title":"IEEE Trans Comput"},{"key":"9256_CR59","doi-asserted-by":"publisher","first-page":"715","DOI":"10.1016\/j.procs.2017.03.153","volume":"107","author":"X Luo","year":"2017","unstructured":"Luo X, Shen R, Hu J, Deng J, Hu L, Guan Q (2017) A deep convolution neural network model for vehicle recognition and face recognition. Proc Comput Sci 107:715\u2013720. https:\/\/doi.org\/10.1016\/j.procs.2017.03.153","journal-title":"Proc Comput Sci"},{"key":"9256_CR60","doi-asserted-by":"publisher","unstructured":"Ma RR, Niu LF (2018) A survey of sparse-learning methods for deep neural networks. In: 2018 IEEE\/WIC\/ACM international conference on web intelligence (WI 2018), pp 647\u2013650. https:\/\/doi.org\/10.1109\/Wi.2018.00-20","DOI":"10.1109\/Wi.2018.00-20"},{"issue":"7","key":"9256_CR61","doi-asserted-by":"publisher","first-page":"1354","DOI":"10.1109\/tvlsi.2018.2815603","volume":"26","author":"Y Ma","year":"2018","unstructured":"Ma Y, Cao Y, Vrudhula S, Seo J (2018) Optimizing the convolution operation to accelerate deep neural networks on FPGA. IEEE Trans Very Large Scale Integr (VLSI) Syst 26(7):1354\u20131367. https:\/\/doi.org\/10.1109\/tvlsi.2018.2815603","journal-title":"IEEE Trans Very Large Scale Integr (VLSI) Syst"},{"key":"9256_CR62","doi-asserted-by":"publisher","unstructured":"Ma YF, Cao Y, Vrudhula S, Seo JS (2017) Optimizing loop operation and dataflow in FPGA acceleration of deep convolutional neural networks. In: FPGA\u201917: proceedings of the 2017 ACM\/SIGDA international symposium on field-programmable gate arrays, pp 45\u201354. https:\/\/doi.org\/10.1145\/3020078.3021736","DOI":"10.1145\/3020078.3021736"},{"key":"9256_CR63","doi-asserted-by":"publisher","unstructured":"Mair J, Huang ZY, Eyers D, Chen YW (2015) Quantifying the energy efficiency challenges of achieving exascale computing. In: 2015 15th IEEE\/ACM international symposium on cluster, cloud and grid computing, pp 943\u2013950. https:\/\/doi.org\/10.1109\/CCGrid.2015.130","DOI":"10.1109\/CCGrid.2015.130"},{"key":"9256_CR64","doi-asserted-by":"publisher","unstructured":"Marwa GAM, Mohamed B, Najoua C, Hedi BM (2017) Parallelism hardware computation for artificial neural network. In: 2017 IEEE\/ACS 14th international conference on computer systems and applications (AICCSA), pp 1049\u20131055. https:\/\/doi.org\/10.1109\/Aiccsa.2017.166","DOI":"10.1109\/Aiccsa.2017.166"},{"key":"9256_CR65","doi-asserted-by":"crossref","unstructured":"Meiners CR, Liu AX, Torng E (2007) TCAM razor: a systematic approach towards minimizing packet classifiers in TCAMs. In: 2007 IEEE international conference on network protocols, pp 266\u2013275","DOI":"10.1109\/ICNP.2007.4375857"},{"key":"9256_CR66","doi-asserted-by":"publisher","unstructured":"Meloni P, Capotondi A, Deriu G, Brian M, Conti F, Rossi D, Raffo L, Benini L (2018) Neuraghe:exploiting CPU-FPGA synergies for efficient and flexible CNN inference acceleration on ZYNQ SOCS.ACM Trans Reconfig Technol Syst 11(3). https:\/\/doi.org\/10.1145\/3284357","DOI":"10.1145\/3284357"},{"issue":"1\u20133","key":"9256_CR67","doi-asserted-by":"publisher","first-page":"239","DOI":"10.1016\/j.neucom.2010.03.021","volume":"74","author":"J Misra","year":"2010","unstructured":"Misra J, Saha I (2010) Artificial neural networks in hardware a survey of two decades of progress. Neurocomputing 74(1\u20133):239\u2013255. https:\/\/doi.org\/10.1016\/j.neucom.2010.03.021","journal-title":"Neurocomputing"},{"key":"9256_CR68","doi-asserted-by":"crossref","unstructured":"Motamedi M, Gysel P, Akella V, Ghiasi S (2016) Design space exploration of FPGA-based deep convolutional neural networks. In: 2016 21st Asia and South Pacific design automation conference (ASP-DAC), pp 575\u2013580","DOI":"10.1109\/ASPDAC.2016.7428073"},{"key":"9256_CR69","doi-asserted-by":"publisher","unstructured":"Nabavinejad Morteza S (2020) An overview of efficient interconnection networks for deep neural network accelerators. IEEE J Emerg Sel Top Circuits Syst 10(3):268\u2013282. https:\/\/doi.org\/10.1109\/JETCAS.2020.3022920","DOI":"10.1109\/JETCAS.2020.3022920"},{"key":"9256_CR70","doi-asserted-by":"crossref","unstructured":"Nakahara H, Fujii T, Sato S (2017) A fully connected layer elimination for a binarizec convolutional neural network on an fpga. In: 2017 27th international conference on field programmable logic and applications (FPL), pp 1\u20134. IEEE","DOI":"10.23919\/FPL.2017.8056771"},{"issue":"2","key":"9256_CR71","doi-asserted-by":"publisher","first-page":"657","DOI":"10.1109\/TNET.2018.2809583","volume":"26","author":"E Norige","year":"2018","unstructured":"Norige E, Liu AX, Torng E (2018) A ternary unification framework for optimizing TCAM-based packet classification systems. IEEE\/ACM Trans Netw 26(2):657\u2013670","journal-title":"IEEE\/ACM Trans Netw"},{"key":"9256_CR72","doi-asserted-by":"crossref","unstructured":"Nurvitadhi E, Sheffield D, Sim J, Mishra A, Venkatesh G, Marr D (2016) Accelerating binarized neural networks: comparison of FPGA, CPU, GPU, and ASIC. In: 2016 international conference on field-programmable technology (FPT), pp 77\u201384. IEEE","DOI":"10.1109\/FPT.2016.7929192"},{"key":"9256_CR73","doi-asserted-by":"crossref","unstructured":"Peemen M, Setio AA, Mesman B, Corporaal H (2013) Memory-centric accelerator design for convolutional neural networks. In: 2013 IEEE 31st international conference on computer design (ICCD), pp 13\u201319. IEEE","DOI":"10.1109\/ICCD.2013.6657019"},{"key":"9256_CR74","doi-asserted-by":"crossref","unstructured":"Podili A, Zhang C, Prasanna V (2017) Fast and efficient implementation of convolutional neural networks on FPGA. In: 2017 IEEE 28th international conference on application-specific systems, architectures and processors (ASAP), pp 11\u201318","DOI":"10.1109\/ASAP.2017.7995253"},{"key":"9256_CR75","doi-asserted-by":"publisher","first-page":"151","DOI":"10.1016\/j.micpro.2018.04.004","volume":"60","author":"T Posewsky","year":"2018","unstructured":"Posewsky T, Ziener D (2018) Throughput optimizations for FPGA-based deep neural network inference. Microprocess Microsyst 60:151\u2013161. https:\/\/doi.org\/10.1016\/j.micpro.2018.04.004","journal-title":"Microprocess Microsyst"},{"key":"9256_CR76","doi-asserted-by":"publisher","unstructured":"Qiu JT, Wang J, Yao S, Guo KY, Li BX, Zhou EJ, Yu JC, Tang TQ, Xu NY, Song S, Wang Y, Yang HZ (2016) Going deeper with embedded FPGA platform for convolutional neural network. In: Proceedings of the 2016 ACM\/SIGDA international symposium on field-programmable gate arrays (FPGA\u201916), pp 26\u201335. https:\/\/doi.org\/10.1145\/2847263.2847265","DOI":"10.1145\/2847263.2847265"},{"key":"9256_CR77","doi-asserted-by":"crossref","unstructured":"Rahman A, Lee J, Choi K (2016) Efficient FPGA acceleration of convolutional neural networks using logical-3D compute array. In: Proceedings of the 2016 design, automation & test in Europe conference & exhibition (date), pp 1393\u20131398","DOI":"10.3850\/9783981537079_0833"},{"key":"9256_CR78","doi-asserted-by":"publisher","first-page":"110","DOI":"10.1016\/j.vlsi.2018.11.010","volume":"65","author":"AG Scanlan","year":"2019","unstructured":"Scanlan AG (2019) Low power & mobile hardware accelerators for deep convolutional neural networks. Integration 65:110\u2013127. https:\/\/doi.org\/10.1016\/j.vlsi.2018.11.010","journal-title":"Integration"},{"issue":"3","key":"9256_CR79","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1145\/3007787.3001139","volume":"44","author":"A Shafiee","year":"2016","unstructured":"Shafiee A, Nag A, Muralimanohar N, Balasubramonian R, Strachan JP, Hu M, Williams RS, Srikumar V (2016) Isaac: a convolutional neural network accelerator with in-situ analog arithmetic in crossbars. ACM SIGARCH Comput Archit News 44(3):14\u201326","journal-title":"ACM SIGARCH Comput Archit News"},{"issue":"5","key":"9256_CR80","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1109\/MM.2018.053631145","volume":"38","author":"D Shin","year":"2018","unstructured":"Shin D, Lee J, Lee J, Lee J, Yoo HJ (2018) Dnpu: an energy-efficient deep-learning processor with heterogeneous multi-core architecture. IEEE Micro 38(5):85\u201393","journal-title":"IEEE Micro"},{"key":"9256_CR81","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556"},{"key":"9256_CR82","doi-asserted-by":"crossref","unstructured":"Song L, Qian X, Li H, Chen Y (2017) Pipelayer: a pipelined reram-based accelerator for deep learning. In: 2017 IEEE international symposium on high performance computer architecture (HPCA), pp 541\u2013552. IEEE","DOI":"10.1109\/HPCA.2017.55"},{"key":"9256_CR83","doi-asserted-by":"crossref","unstructured":"Srinivas S, Babu RV (2015) Data-free parameter pruning for deep neural networks. arXiv preprint arXiv:1507.06149","DOI":"10.5244\/C.29.31"},{"key":"9256_CR84","doi-asserted-by":"publisher","unstructured":"Vansteenkiste E, Al Farisi B, Bruneel K, Stroobandt D (2014) Tpar: place and route tools for the dynamic reconfiguration of the FPGA\u2019s interconnect network. IEEE Trans Comput Aided Des Integr Circuits Syst 33(3):370\u2013383. https:\/\/doi.org\/10.1109\/Tcad.2013.2291659","DOI":"10.1109\/Tcad.2013.2291659"},{"key":"9256_CR85","doi-asserted-by":"crossref","unstructured":"Waldrop MM (2016) The chips are down for Moore\u2019s law. Nat News 530(7589):144","DOI":"10.1038\/530144a"},{"key":"9256_CR86","doi-asserted-by":"publisher","unstructured":"Wang JS, Lou QW, Zhang XF, Zhu C, Lin YH, Chen DM (2018) Design flow of accelerating hybrid extremely low bit-width neural network in embedded FPGA. In: 2018 28th international conference on field programmable logic and applications (FPL), pp 163\u2013169. https:\/\/doi.org\/10.1109\/Fpl.2018.00035","DOI":"10.1109\/Fpl.2018.00035"},{"key":"9256_CR87","doi-asserted-by":"publisher","unstructured":"Wang Y, Xu J, Han YH, Li HW, Li XW (2016) Deepburning: automatic generation of FPGA-based learning accelerators for the neural network family. In: 2016 ACM\/EDAC\/IEEE design automation conference (DAC). https:\/\/doi.org\/10.1145\/2897937.2898003","DOI":"10.1145\/2897937.2898003"},{"issue":"5","key":"9256_CR88","doi-asserted-by":"publisher","first-page":"1009","DOI":"10.1109\/Tcad.2017.2729466","volume":"37","author":"LX Xia","year":"2018","unstructured":"Xia LX, Li BX, Tang TQ, Gu P, Chen PY, Yu SM, Cao Y, Wang Y, Xie Y, Yang HZ (2018) Mnsim: simulation platform for memristor-based neuromorphic computing system. IEEE Trans Comput Aided Des Integr Circuits Syst 37(5):1009\u20131022. https:\/\/doi.org\/10.1109\/Tcad.2017.2729466","journal-title":"IEEE Trans Comput Aided Des Integr Circuits Syst"},{"key":"9256_CR89","doi-asserted-by":"publisher","unstructured":"Xiao QC, Liang Y, Lu LQ, Yan SG, Tai YW (2017) Exploring heterogeneous algorithms for accelerating deep convolutional neural networks on FPGAs. In: Proceedings of the 2017 54th ACM\/EDAC\/IEEE design automation conference (DAC). https:\/\/doi.org\/10.1145\/3061639.3062244","DOI":"10.1145\/3061639.3062244"},{"key":"9256_CR90","doi-asserted-by":"publisher","unstructured":"Yin L, Cheng R, Yao W, Liu C, He J (2021) Emerging 2D memory devices for in-memory computing. Adv Mater 33. https:\/\/doi.org\/10.1002\/adma.202007081","DOI":"10.1002\/adma.202007081"},{"key":"9256_CR91","doi-asserted-by":"crossref","unstructured":"Yu NG, Qiu S, Hu XL, Li JM (2017) Accelerating convolutional neural networks by group-wise 2D-filter pruning. In: 2017 international joint conference on neural networks (IJCNN), pp 2502\u20132509","DOI":"10.1109\/IJCNN.2017.7966160"},{"key":"9256_CR92","doi-asserted-by":"publisher","first-page":"88","DOI":"10.1016\/j.neucom.2016.09.010","volume":"219","author":"S Yu","year":"2017","unstructured":"Yu S, Jia S, Xu C (2017) Convolutional neural networks for hyperspectral image classification. Neurocomputing 219:88\u201398. https:\/\/doi.org\/10.1016\/j.neucom.2016.09.010","journal-title":"Neurocomputing"},{"key":"9256_CR93","doi-asserted-by":"publisher","unstructured":"Zhan C, Fang ZM, Zhou PP, Pan PC, Cong J (2016) Caffeine: towards uniformed representation and acceleration for deep convolutional neural networks. In: 2016 IEEE\/ACM international conference on computer-aided design (ICCAD). https:\/\/doi.org\/10.1145\/2966986.2967011","DOI":"10.1145\/2966986.2967011"},{"key":"9256_CR94","doi-asserted-by":"crossref","unstructured":"Zhang C, Li P, Sun G, Guan Y, Xiao B, Cong J (2015) Optimizing FPGA-based accelerator design for deep convolutional neural networks. In: Proceedings of the 2015 ACM\/SIGDA international symposium on field-programmable gate arrays, pp 161\u2013170. ACM","DOI":"10.1145\/2684746.2689060"},{"key":"9256_CR95","doi-asserted-by":"publisher","unstructured":"Zhang C, Prasanna V (2017) Frequency domain acceleration of convolutional neural networks on CPU-FPGA shared memory system. In: FPGA\u201917: proceedings of the 2017 ACM\/SIGDA international symposium on field-programmable gate arrays, pp 35\u201344. https:\/\/doi.org\/10.1145\/3020078.3021727","DOI":"10.1145\/3020078.3021727"},{"key":"9256_CR96","doi-asserted-by":"publisher","unstructured":"Zhang M, Li LP, Wang H, Liu Y, Qin HB, Zhao W (2019) Optimized compression for implementing convolutional neural networks on FPGA. Electronics 8(3). https:\/\/doi.org\/10.3390\/electronics8030295","DOI":"10.3390\/electronics8030295"},{"key":"9256_CR97","doi-asserted-by":"crossref","unstructured":"Zhang SJ, Du ZD, Zhang L, Lan HY, Liu SL, Li L, Guo Q, Chen TS, Chen YJ (2016) Cambricon-x: an accelerator for sparse neural networks. In: 2016 49th annual IEEE\/ACM international symposium on microarchitecture (Micro)","DOI":"10.1109\/MICRO.2016.7783723"},{"key":"9256_CR98","doi-asserted-by":"publisher","unstructured":"Zhou S, Guo Q, Du Z, Liu D, Chen T, Li L, Liu S, Zhou J, Teman O, Feng X, Zhou X, Chen Y (2019) Paraml: a polyvalent multi-core accelerator for machine learning. IEEE Trans Comput Aided Des Integr Circuits Syst 39(9):1764\u20131777. https:\/\/doi.org\/10.1109\/TCAD.2019.2927523","DOI":"10.1109\/TCAD.2019.2927523"},{"key":"9256_CR99","doi-asserted-by":"publisher","first-page":"171853","DOI":"10.1109\/ACCESS.2019.2954897","volume":"7","author":"X Zhou","year":"2019","unstructured":"Zhou X, Zhang J, Wan J, Zhou L, Wei Z, Zhang J (2019) Scheduling-efficient framework for neural network on heterogeneous distributed systems and mobile edge computing systems. IEEE Access 7:171853\u2013171863","journal-title":"IEEE Access"},{"issue":"7","key":"9256_CR100","doi-asserted-by":"publisher","first-page":"3176","DOI":"10.1109\/Tnnls.2017.2717442","volume":"29","author":"XC Zhou","year":"2018","unstructured":"Zhou XC, Li SL, Tang F, Hu SD, Lin Z, Zhang L (2018) Danoc: an efficient algorithm and hardware codesign of deep neural networks on chip. IEEE Trans Neural Netw Learn Syst 29(7):3176\u20133187. https:\/\/doi.org\/10.1109\/Tnnls.2017.2717442","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"9256_CR101","doi-asserted-by":"crossref","unstructured":"Zuo W, Liang Y, Li P, Rupnow K, Chen D, Cong J (2013) Improving high level synthesis optimization opportunity through polyhedral transformations. In: Proceedings of the ACM\/SIGDA international symposium on field programmable gate arrays, pp 9\u201318. ACM","DOI":"10.1145\/2435264.2435271"}],"container-title":["Design Automation for Embedded Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10617-021-09256-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10617-021-09256-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10617-021-09256-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T03:18:08Z","timestamp":1726456688000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10617-021-09256-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,1,12]]},"references-count":101,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2022,3]]}},"alternative-id":["9256"],"URL":"https:\/\/doi.org\/10.1007\/s10617-021-09256-8","relation":{},"ISSN":["0929-5585","1572-8080"],"issn-type":[{"type":"print","value":"0929-5585"},{"type":"electronic","value":"1572-8080"}],"subject":[],"published":{"date-parts":[[2022,1,12]]},"assertion":[{"value":"10 February 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 November 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 January 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}