{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,27]],"date-time":"2025-06-27T09:10:01Z","timestamp":1751015401730,"version":"3.41.0"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"19","license":[{"start":{"date-parts":[[2025,4,30]],"date-time":"2025-04-30T00:00:00Z","timestamp":1745971200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,4,30]],"date-time":"2025-04-30T00:00:00Z","timestamp":1745971200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100020593","name":"Fundamental Research Funds for the Central Universities of Beijing University of Chemical Technology","doi-asserted-by":"publisher","award":["FRGS\/1\/2021\/ICT02\/UTAR\/02\/3"],"award-info":[{"award-number":["FRGS\/1\/2021\/ICT02\/UTAR\/02\/3"]}],"id":[{"id":"10.13039\/100020593","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1007\/s00521-025-11209-2","type":"journal-article","created":{"date-parts":[[2025,4,30]],"date-time":"2025-04-30T03:25:04Z","timestamp":1745983504000},"page":"13497-13524","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A compact and flexible FPGA accelerator for regular and octave convolutional neural networks"],"prefix":"10.1007","volume":"37","author":[{"given":"Jin-Chuan","family":"See","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hui-Fuang","family":"Ng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hung-Khoon","family":"Tan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jing-Jing","family":"Chang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4659-8979","authenticated-orcid":false,"given":"Wai-Kong","family":"Lee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,4,30]]},"reference":[{"issue":"12","key":"11209_CR1","doi-asserted-by":"publisher","first-page":"2816","DOI":"10.1109\/TVLSI.2019.2941250","volume":"27","author":"S Kala","year":"2019","unstructured":"Kala S, Jose BR, Mathew J, Nalesh S (2019) High-performance CNN accelerator on FPGA using unified winograd-gemm architecture. IEEE Trans Very Larg Scale Integr (VLSI) Syst 27(12):2816\u20132828. https:\/\/doi.org\/10.1109\/TVLSI.2019.2941250","journal-title":"IEEE Trans Very Larg Scale Integr (VLSI) Syst"},{"issue":"4","key":"11209_CR2","doi-asserted-by":"publisher","first-page":"857","DOI":"10.1109\/TCAD.2019.2897701","volume":"39","author":"Y Liang","year":"2020","unstructured":"Liang Y, Lu L, Xiao Q, Yan S (2020) Evaluating fast algorithms for convolutional neural networks on FPGAs. IEEE Trans Comput Aided Des Integ Circuits Syst 39(4):857\u2013870","journal-title":"IEEE Trans Comput Aided Des Integ Circuits Syst"},{"issue":"12","key":"11209_CR3","doi-asserted-by":"publisher","first-page":"1902","DOI":"10.1109\/TVLSI.2022.3211665","volume":"30","author":"C Yang","year":"2022","unstructured":"Yang C, Meng Y, Huo K, Xi J, Mei K (2022) A sparse CNN accelerator for eliminating redundant computations in intra- and inter-convolutional\/pooling layers. IEEE Trans Very Larg Scale Integr (VLSI) Syst 30(12):1902\u20131915. https:\/\/doi.org\/10.1109\/TVLSI.2022.3211665","journal-title":"IEEE Trans Very Larg Scale Integr (VLSI) Syst"},{"key":"11209_CR4","doi-asserted-by":"publisher","first-page":"1847","DOI":"10.1109\/TC.2021.3110413","volume":"71","author":"W Lou","year":"2022","unstructured":"Lou W, Gong L, Wang C, Du Z, Zhou X (2022) OctCNN: a high throughput fpga accelerator for CNNs using octave convolution algorithm. IEEE Trans Comput 71:1847\u20131859. https:\/\/doi.org\/10.1109\/TC.2021.3110413","journal-title":"IEEE Trans Comput"},{"issue":"2","key":"11209_CR5","doi-asserted-by":"publisher","first-page":"8","DOI":"10.1109\/MCAS.2023.3267921","volume":"23","author":"X Liu","year":"2023","unstructured":"Liu X, Parhi KK (2023) Tensor decomposition for model reduction in neural networks: a review [feature]. IEEE Circuits Syst Mag 23(2):8\u201328. https:\/\/doi.org\/10.1109\/MCAS.2023.3267921","journal-title":"IEEE Circuits Syst Mag"},{"issue":"4","key":"11209_CR6","doi-asserted-by":"publisher","first-page":"5099","DOI":"10.1109\/TPAMI.2022.3200344","volume":"45","author":"Q Zhang","year":"2023","unstructured":"Zhang Q, Cheng X, Chen Y, Rao Z (2023) Quantifying the knowledge in a dnn to explain knowledge distillation for classification. IEEE Trans Pattern Anal Mach Intell 45(4):5099\u20135113. https:\/\/doi.org\/10.1109\/TPAMI.2022.3200344","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"8","key":"11209_CR7","doi-asserted-by":"publisher","first-page":"3974","DOI":"10.1109\/TNNLS.2021.3055240","volume":"33","author":"S Liu","year":"2022","unstructured":"Liu S, Fan H, Ferianc M, Niu X, Shi H, Luk W (2022) Toward full-stack acceleration of deep convolutional neural networks on FPGAs. IEEE Trans Neural Netw Learn Syst 33(8):3974\u20133987. https:\/\/doi.org\/10.1109\/TNNLS.2021.3055240","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"issue":"8","key":"11209_CR8","doi-asserted-by":"publisher","first-page":"4069","DOI":"10.1109\/TNNLS.2021.3055814","volume":"33","author":"W Huang","year":"2022","unstructured":"Huang W, Wu H, Chen Q, Luo C, Zeng S, Li T, Huang Y (2022) FPGA-based high-throughput CNN hardware accelerator with high computing resource utilization ratio. IEEE Trans Neural Netw Learn Syst 33(8):4069\u20134083. https:\/\/doi.org\/10.1109\/TNNLS.2021.3055814","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"11209_CR9","doi-asserted-by":"publisher","unstructured":"Li J, Chen J, Un K-F, Yu W-H, Mak P-I, Martins RP (2021) A 50.4 GOPs\/W FPGA-Based MobileNetV2 Accelerator using the Double-Layer MAC and DSP Efficiency Enhancement. In: 2021 IEEE Asian Solid-State Circuits Conference (A-SSCC), pp 1\u20133 . https:\/\/doi.org\/10.1109\/A-SSCC53895.2021.9634838","DOI":"10.1109\/A-SSCC53895.2021.9634838"},{"key":"11209_CR10","doi-asserted-by":"publisher","first-page":"3279","DOI":"10.1109\/TCSI.2021.3078541","volume":"68","author":"B Li","year":"2021","unstructured":"Li B, Wang H, Zhang X, Ren J, Liu L, Sun H, Zheng N (2021) Dynamic dataflow scheduling and computation mapping techniques for efficient depthwise separable convolution acceleration. IEEE Trans Circuits Syst I Regul Papers 68:3279\u20133292. https:\/\/doi.org\/10.1109\/TCSI.2021.3078541","journal-title":"IEEE Trans Circuits Syst I Regul Papers"},{"issue":"3","key":"11209_CR11","doi-asserted-by":"publisher","first-page":"1185","DOI":"10.1109\/TCSI.2021.3131581","volume":"69","author":"X Wu","year":"2022","unstructured":"Wu X, Ma Y, Wang M, Wang Z (2022) A flexible and efficient FPGA accelerator for various large-scale and lightweight cnns. IEEE Trans Circuits Syst I Regul Papers 69(3):1185\u20131198","journal-title":"IEEE Trans Circuits Syst I Regul Papers"},{"key":"11209_CR12","doi-asserted-by":"crossref","unstructured":"Chang S, Li Y, Sun M, Shi R, So HH, Qian X, Wang Y, Lin X (2021) Mix and match: a novel fpga-centric deep neural network quantization framework. In: 2021 IEEE international symposium on high-performance computer architecture (HPCA), pp 208\u2013220","DOI":"10.1109\/HPCA51647.2021.00027"},{"issue":"3","key":"11209_CR13","doi-asserted-by":"publisher","first-page":"461","DOI":"10.1109\/TVLSI.2020.3046125","volume":"29","author":"S Colleman","year":"2021","unstructured":"Colleman S, Verhelst M (2021) High-utilization, high-flexibility depth-first CNN coprocessor for image pixel processing on FPGA. IEEE Trans Very Larg Scale Integr (VLSI) Syst 29(3):461\u2013471. https:\/\/doi.org\/10.1109\/TVLSI.2020.3046125","journal-title":"IEEE Trans Very Larg Scale Integr (VLSI) Syst"},{"key":"11209_CR14","doi-asserted-by":"publisher","unstructured":"Zhuge C, Liu X, Zhang X, Gummadi S, Xiong J, Chen D (2018) Face Recognition with Hybrid Efficient Convolution Algorithms on FPGAs. In: Proceedings of the 2018 on Great Lakes Symposium On VLSI. GLSVLSI \u201918, pp. 123\u2013128. Association for Computing Machinery, ??? https:\/\/doi.org\/10.1145\/3194554.3194597","DOI":"10.1145\/3194554.3194597"},{"key":"11209_CR15","doi-asserted-by":"publisher","unstructured":"Chen Y, Fan H, Xu B, Yan Z, Kalantidis Y, Rohrbach M, Shuicheng Y, Feng J (2019) Drop an Octave: reducing spatial redundancy in convolutional neural networks with octave convolution. In: 2019 IEEE\/CVF international conference on computer vision (ICCV), pp 3434\u20133443 . https:\/\/doi.org\/10.1109\/ICCV.2019.00353 . ISSN: 2380-7504","DOI":"10.1109\/ICCV.2019.00353"},{"key":"11209_CR16","unstructured":"Optimizing Convolutional Layers. NVIDIA Corporation (2023). https:\/\/docs.nvidia.com\/deeplearning\/performance\/dl-performance-convolutional\/index.html"},{"key":"11209_CR17","doi-asserted-by":"publisher","unstructured":"Selvam S, Ganesan V, Kumar P (2021) FuSeConv: fully separable convolutions for fast inference on systolic arrays. arXiv. arXiv:2105.13434 [cs] . https:\/\/doi.org\/10.48550\/arXiv.2105.13434 . http:\/\/arxiv.org\/abs\/2105.13434","DOI":"10.48550\/arXiv.2105.13434"},{"key":"11209_CR18","doi-asserted-by":"publisher","first-page":"888","DOI":"10.1109\/TCAD.2018.2824280","volume":"38","author":"S Lee","year":"2019","unstructured":"Lee S, Kim D, Nguyen D, Lee J (2019) Double MAC on a DSP: boosting the performance of convolutional neural networks on FPGAs. IEEE Trans Comput Aided Des Integr Circuits Syst 38:888\u2013897. https:\/\/doi.org\/10.1109\/TCAD.2018.2824280","journal-title":"IEEE Trans Comput Aided Des Integr Circuits Syst"},{"key":"11209_CR19","doi-asserted-by":"publisher","first-page":"100993","DOI":"10.1109\/ACCESS.2019.2931161","volume":"7","author":"Z Huang","year":"2019","unstructured":"Huang Z, Zhang S, Wang W (2019) An efficient method of parallel multiplication on a single DSP slice for embedded FPGAs. IEEE Access 7:100993\u2013101008. https:\/\/doi.org\/10.1109\/ACCESS.2019.2931161","journal-title":"IEEE Access"},{"key":"11209_CR20","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks. Adv Neural Inform Process Syst Vol 25"},{"key":"11209_CR21","doi-asserted-by":"publisher","unstructured":"Samajdar A, Garg T, Krishna T, Kapre N (2019) Scaling the Cascades: interconnect-aware FPGA implementation of machine learning problems, pp 342\u2013349 https:\/\/doi.org\/10.1109\/FPL.2019.00061 . ISSN: 1946-1488","DOI":"10.1109\/FPL.2019.00061"},{"key":"11209_CR22","unstructured":"AMD Xilinx: (2022) UltraScale Architecture-Based FPGAs Memory IP. AMD Xilinx. v1.4"},{"key":"11209_CR23","doi-asserted-by":"publisher","unstructured":"Abdelfattah MS, Han D, Bitar A, DiCecco R, O\u2019Connell S, Shanker N, Chu J, Prins I, Fender J, Ling AC, Chiu GR (2018) Dla: Compiler and fpga overlay for neural network inference acceleration. In: 2018 28th international conference on field programmable logic and applications (FPL), pp 411\u20134117 . https:\/\/doi.org\/10.1109\/FPL.2018.00077","DOI":"10.1109\/FPL.2018.00077"},{"key":"11209_CR24","doi-asserted-by":"publisher","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: 2016 IEEE conference on computer vision and pattern recognition (CVPR), pp 770\u2013778 . https:\/\/doi.org\/10.1109\/CVPR.2016.90","DOI":"10.1109\/CVPR.2016.90"},{"key":"11209_CR25","doi-asserted-by":"crossref","unstructured":"Sandler M, Howard A, Zhu M, Zhmoginov A, Chen L (2018) Mobilenetv2: Inverted residuals and linear bottlenecks. In: 2018 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 4510\u20134520","DOI":"10.1109\/CVPR.2018.00474"},{"key":"11209_CR26","doi-asserted-by":"publisher","unstructured":"Chen Y, He J, Zhang X, Hao C, Chen D (2019) Cloud-DNN: An open framework for mapping dnn models to cloud FPGAs. In: Proceedings of the 2019 ACM\/SIGDA international symposium on field-programmable gate arrays, pp 73\u201382 . https:\/\/doi.org\/10.1145\/3289602.3293915","DOI":"10.1145\/3289602.3293915"},{"issue":"4","key":"11209_CR27","first-page":"2301","volume":"69","author":"X Hu","year":"2022","unstructured":"Hu X, Li X, Huang H, Zheng X, Xiong X (2022) TiNNA: a tiny accelerator for neural networks with efficient DSP optimization. IEEE Trans Circuits Syst II Express Briefs 69(4):2301\u20132305","journal-title":"IEEE Trans Circuits Syst II Express Briefs"},{"key":"11209_CR28","doi-asserted-by":"crossref","unstructured":"Yu Y, Zhao T, Wang K, He L (2020) Light-OPU: an FPGA-based overlay processor for lightweight convolutional neural networks. In: Proceedings of the 2020 ACM\/SIGDA international symposium on field-programmable gate arrays, pp 122\u2013132","DOI":"10.1145\/3373087.3375311"},{"key":"11209_CR29","doi-asserted-by":"crossref","unstructured":"Yang C, Meng Y, Xi J, Xiang S, Wang J, Mei K (2023) Wra-ss: A high-performance accelerator integrating winograd with structured sparsity for convolutional neural networks. IEEE Trans Very Large Scale Integr (VLSI) Syst","DOI":"10.1109\/TVLSI.2023.3330993"},{"key":"11209_CR30","unstructured":"Howard AG, Zhu M, Chen B, Kalenichenko D, Wang W, Weyand T, Andreetto M, Adam H (2017) MobileNets: efficient convolutional neural networks for mobile vision applications. arXiv. http:\/\/arxiv.org\/abs\/1704.04861"},{"key":"11209_CR31","doi-asserted-by":"crossref","unstructured":"Howard A, Sandler M, Chu G, Chen L-C, Chen B, Tan M, Wang W, Zhu Y, Pang R, Vasudevan V, Le QV, Adam H (2019) Searching for mobilenetv3. In: Proceedings of the IEEE\/CVF international conference on computer vision (ICCV)","DOI":"10.1109\/ICCV.2019.00140"},{"key":"11209_CR32","doi-asserted-by":"crossref","unstructured":"Zhang X, Zhou X, Lin M, Sun J (2018) ShuffleNet: an extremely efficient convolutional neural network for mobile devices. In: 2018 IEEE\/CVF conference on computer vision and pattern recognition, pp 6848\u20136856","DOI":"10.1109\/CVPR.2018.00716"},{"key":"11209_CR33","unstructured":"Iandola FN, Han S, Moskewicz MW, Ashraf K, Dally WJ, Keutzer K (2016) SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and<0.5MB model size. arXiv . http:\/\/arxiv.org\/abs\/1602.07360"},{"key":"11209_CR34","doi-asserted-by":"publisher","first-page":"8","DOI":"10.1109\/MM.2019.2928962","volume":"39","author":"T Moreau","year":"2019","unstructured":"Moreau T, Chen T, Vega L, Roesch J, Yan E, Zheng L, Fromm J, Jiang Z, Ceze L, Guestrin C, Krishnamurthy A (2019) A hardware-software blueprint for flexible deep learning specialization. IEEE Micro 39:8\u201316","journal-title":"IEEE Micro"},{"issue":"8","key":"11209_CR35","doi-asserted-by":"publisher","first-page":"1209","DOI":"10.1002\/cta.2797","volume":"48","author":"J-C See","year":"2020","unstructured":"See J-C, Mok K-M, Lee W-K, Goh H-G (2020) Risc32-e: field programmable gate array based sensor node with queue system to support fast encryption in industrial internet of things applications. Int J Circuit Theory Appl 48(8):1209\u20131226","journal-title":"Int J Circuit Theory Appl"},{"key":"11209_CR36","doi-asserted-by":"crossref","unstructured":"Zeng S, Liu J, Dai G, Yang X, Fu T, Wang H, Ma W, Sun H, Li S, Huang Z (2024) Flightllm: efficient large language model inference with a complete mapping flow on fpgas. In: Proceedings of the 2024 ACM\/SIGDA international symposium on field programmable gate arrays, pp 223\u2013234","DOI":"10.1145\/3626202.3637562"},{"issue":"1","key":"11209_CR37","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s12559-024-10376-z","volume":"17","author":"L Chen","year":"2025","unstructured":"Chen L, Yang L, Jie T, Haoyuan M, Yu L, Shenbing F, Wang J, Wu H, Li G (2025) Enhanced self-attention-based rapid cnn for detecting dense objects in varying illumination. Cognitive Comput 17(1):1\u201320","journal-title":"Cognitive Comput"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-025-11209-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-025-11209-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-025-11209-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,27]],"date-time":"2025-06-27T08:28:20Z","timestamp":1751012900000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-025-11209-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,30]]},"references-count":37,"journal-issue":{"issue":"19","published-print":{"date-parts":[[2025,7]]}},"alternative-id":["11209"],"URL":"https:\/\/doi.org\/10.1007\/s00521-025-11209-2","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"type":"print","value":"0941-0643"},{"type":"electronic","value":"1433-3058"}],"subject":[],"published":{"date-parts":[[2025,4,30]]},"assertion":[{"value":"17 June 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 March 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 April 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no Conflict of interest or Conflict of interest with any parties.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest\/Conflict of interest:"}}]}}