{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,14]],"date-time":"2025-06-14T05:10:02Z","timestamp":1749877802277,"version":"3.41.0"},"reference-count":31,"publisher":"Institute of Electronics, Information and Communications Engineers (IEICE)","issue":"11","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEICE Electron. Express"],"published-print":{"date-parts":[[2025,6,10]]},"DOI":"10.1587\/elex.22.20250246","type":"journal-article","created":{"date-parts":[[2025,5,7]],"date-time":"2025-05-07T22:06:46Z","timestamp":1746655606000},"page":"20250246-20250246","source":"Crossref","is-referenced-by-count":0,"title":["LMAI2C: Low memory access Im2col method for CNN inference"],"prefix":"10.1587","volume":"22","author":[{"given":"Mengda","family":"Li","sequence":"first","affiliation":[{"name":"Institute of Microelectronics of Chinese Academy of Sciences"},{"name":"University of Chinese Academy of Sciences"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ziyi","family":"Chen","sequence":"additional","affiliation":[{"name":"Institute of Microelectronics of Chinese Academy of Sciences"},{"name":"University of Chinese Academy of Sciences"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiangen","family":"Hong","sequence":"additional","affiliation":[{"name":"Institute of Microelectronics of Chinese Academy of Sciences"},{"name":"University of Chinese Academy of Sciences"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yiheng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Institute of Microelectronics of Chinese Academy of Sciences"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaoran","family":"Hao","sequence":"additional","affiliation":[{"name":"Institute of Microelectronics of Chinese Academy of Sciences"},{"name":"University of Chinese Academy of Sciences"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ming","family":"Chen","sequence":"additional","affiliation":[{"name":"Institute of Microelectronics of Chinese Academy of Sciences"},{"name":"University of Chinese Academy of Sciences"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mao","family":"Ni","sequence":"additional","affiliation":[{"name":"Institute of Microelectronics of Chinese Academy of Sciences"},{"name":"University of Chinese Academy of Sciences"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"532","reference":[{"doi-asserted-by":"crossref","unstructured":"[1] Y. LeCun, <i>et al<\/i>.: \u201cDeep learning,\u201d Nature <b>521<\/b> (2015) 436 (DOI: 10.1038\/nature14539).","key":"1","DOI":"10.1038\/nature14539"},{"doi-asserted-by":"crossref","unstructured":"[2] V.A. Ashwath, <i>et al<\/i>.: \u201cTS-CNN: a three-tier self-interpretable CNN for multi-region medical image classification,\u201d IEEE Access <b>11<\/b> (2023) 78402 (DOI: 10.1109\/ACCESS.2023.3299850).","key":"2","DOI":"10.1109\/ACCESS.2023.3299850"},{"doi-asserted-by":"crossref","unstructured":"[3] A. Krizhevsky, <i>et al<\/i>.: \u201cImageNet classification with deep convolutional neural networks,\u201d Communications of the ACM <b>60<\/b> (2017) 84 (DOI: 10.1145\/3065386).","key":"3","DOI":"10.1145\/3065386"},{"doi-asserted-by":"crossref","unstructured":"[4] Z. Zou, <i>et al<\/i>.: \u201cObject detection in 20 years: a survey,\u201d Proc. IEEE <b>111<\/b> (2023) 257 (DOI: 10.1109\/JPROC.2023.3238524).","key":"4","DOI":"10.1109\/JPROC.2023.3238524"},{"doi-asserted-by":"crossref","unstructured":"[5] H. Gao, <i>et al<\/i>.: \u201cObject classification using CNN-based fusion of vision and LIDAR in autonomous vehicle environment,\u201d IEEE Trans. Ind. Informat. <b>14<\/b> (2018) 4224 (DOI: 10.1109\/TII.2018.2822828).","key":"5","DOI":"10.1109\/TII.2018.2822828"},{"doi-asserted-by":"crossref","unstructured":"[6] Q. Zou, <i>et al<\/i>.: \u201cRobust lane detection from continuous driving scenes using deep neural networks,\u201d IEEE Trans. Veh. Technol. <b>69<\/b> (2020) 41 (DOI: 10.1109\/TVT.2019.2949603).","key":"6","DOI":"10.1109\/TVT.2019.2949603"},{"doi-asserted-by":"crossref","unstructured":"[7] Q. Liu and S. Zhou: \u201cLightFusion: lightweight CNN architecture for enabling efficient sensor fusion in free road segmentation of autonomous driving,\u201d IEEE Trans. Circuits Syst. II, Exp. Briefs <b>71<\/b> (2024) 4296 (DOI: 10.1109\/TCSII.2024.3384419).","key":"7","DOI":"10.1109\/TCSII.2024.3384419"},{"doi-asserted-by":"crossref","unstructured":"[8] T. He, <i>et al<\/i>.: \u201cText-attentional convolutional neural network for scene text detection,\u201d IEEE Trans. Image Process. <b>25<\/b> (2016) 2529 (DOI: 10.1109\/TIP.2016.2547588).","key":"8","DOI":"10.1109\/TIP.2016.2547588"},{"doi-asserted-by":"crossref","unstructured":"[9] X. Guo, <i>et al<\/i>.: \u201cA single attention-based combination of CNN and RNN for relation classification,\u201d IEEE Access <b>7<\/b> (2019) 12467 (DOI: 10.1109\/ACCESS.2019.2891770).","key":"9","DOI":"10.1109\/ACCESS.2019.2891770"},{"doi-asserted-by":"crossref","unstructured":"[10] Y. Chen, <i>et al<\/i>.: \u201cDetecting traffic information from social media texts with deep learning approaches,\u201d IEEE Trans. Intell. Transp. Syst. <b>20<\/b> (2019) 3049 (DOI: 10.1109\/TITS.2018.2871269).","key":"10","DOI":"10.1109\/TITS.2018.2871269"},{"doi-asserted-by":"crossref","unstructured":"[11] E. Hanson, <i>et al<\/i>.: \u201cCascading structured pruning: enabling high data reuse for sparse DNN accelerators,\u201d Proc. 49th Annual International Symposium on Computer Architecture (2022) 525 (DOI: 10.1145\/3470496.3527419).","key":"11","DOI":"10.1145\/3470496.3527419"},{"doi-asserted-by":"crossref","unstructured":"[12] J. Lee, <i>et al<\/i>.: \u201cUNPU: an energy-efficient deep neural network accelerator with fully variable weight bit precision,\u201d IEEE J. Solid-State Circuits <b>54<\/b> (2019) 173 (DOI: 10.1109\/jssc.2018.2865489).","key":"12","DOI":"10.1109\/JSSC.2018.2865489"},{"doi-asserted-by":"crossref","unstructured":"[13] Z. Du, <i>et al<\/i>.: \u201cShiDianNao: shifting vision processing closer to the sensor,\u201d 2015 ACM\/IEEE 42nd Annual International Symposium on Computer Architecture (2015) 92 (DOI: 10.1145\/2749469.2750389).","key":"13","DOI":"10.1145\/2749469.2750389"},{"unstructured":"[14] N.P. Jouppi, <i>et al<\/i>.: \u201cIn-datacenter performance analysis of a tensor processing unit,\u201d 2017 ACM\/IEEE 44th Annual International Symposium on Computer Architecture (2017) 1 (DOI: 10.1145\/3079856.3080246).","key":"14"},{"doi-asserted-by":"crossref","unstructured":"[15] R. Xu, <i>et al<\/i>.: \u201cHeSA: heterogeneous systolic array architecture for compact CNNs hardware accelerators,\u201d 2021 Design, Automation &amp; Test in Europe Conference &amp; Exhibition (2021) 657 (DOI: 10.23919\/DATE51398.2021.9474145).","key":"15","DOI":"10.23919\/DATE51398.2021.9474145"},{"doi-asserted-by":"crossref","unstructured":"[16] J.-C. See, <i>et al<\/i>.: \u201cCryptensor: a resource-shared co-processor to accelerate convolutional neural network and polynomial convolution,\u201d IEEE Trans. Comput.-Aided Des. Integr. Circuits Syst. <b>42<\/b> (2023) 4735 (DOI: 10.1109\/TCAD.2023.3296375).","key":"16","DOI":"10.1109\/TCAD.2023.3296375"},{"doi-asserted-by":"crossref","unstructured":"[17] E. Medina and E. Dagan: \u201cHabana labs purpose-built AI inference and training processor architectures: scaling AI training systems using standard ethernet with gaudi processor,\u201d IEEE Micro <b>40<\/b> (2020) 17 (DOI: 10.1109\/MM.2020.2975185).","key":"17","DOI":"10.1109\/MM.2020.2975185"},{"doi-asserted-by":"crossref","unstructured":"[18] A. Yang: \u201cDeep learning training at scale spring crest deep learning accelerator (Intel\u00ae Nervana\u2122 NNP-T),\u201d 2019 IEEE Hot Chips 31 Symposium (2019) 1 (DOI: 10.1109\/HOTCHIPS.2019.8875643).","key":"18","DOI":"10.1109\/HOTCHIPS.2019.8875643"},{"doi-asserted-by":"crossref","unstructured":"[19] Y.-H. Chen, <i>et al<\/i>.: \u201cEyeriss: a spatial architecture for energy-efficient dataflow for convolutional neural networks,\u201d SIGARCH Comput. Archit. News <b>44<\/b> (2016) 367 (DOI: 10.1145\/3007787.3001177).","key":"19","DOI":"10.1145\/3007787.3001177"},{"doi-asserted-by":"crossref","unstructured":"[20] Y.-H. Chen, <i>et al<\/i>.: \u201cEyeriss v2: a flexible accelerator for emerging deep neural networks on mobile devices,\u201d IEEE J. Emerg. Sel. Topics Circuits Syst. <b>9<\/b> (2019) 292 (DOI: 10.1109\/JETCAS.2019.2910232).","key":"20","DOI":"10.1109\/JETCAS.2019.2910232"},{"unstructured":"[21] S. Venkataramani, <i>et al<\/i>.: \u201cRaPiD: AI accelerator for ultra-low precision training and inference,\u201d 2021 ACM\/IEEE 48th Annual International Symposium on Computer Architecture (2021) 153 (DOI: 10.1109\/ISCA52012.2021.00021).","key":"21"},{"doi-asserted-by":"crossref","unstructured":"[22] L.S. Blackford, <i>et al<\/i>.: \u201cAn updated set of basic linear algebra subprograms (BLAS),\u201d ACM Trans. Math. Softw <b>28<\/b> (2002) 135 (DOI: 10.1145\/567806.567807).","key":"22","DOI":"10.1145\/567806.567807"},{"unstructured":"[23] S. Chetlur, <i>et al<\/i>.: \u201ccuDNN: efficient primitives for deep learning,\u201d arXiv preprint (2014) arXiv:1410.0759 (DOI: 10.48550\/arXiv.1410.0759).","key":"23"},{"doi-asserted-by":"crossref","unstructured":"[24] M. Soltaniyeh, <i>et al<\/i>.: \u201cAn accelerator for sparse convolutional neural networks leveraging systolic general matrix-matrix multiplication,\u201d ACM Trans. Archit. Code Optim. <b>19<\/b> (2022) 42 (DOI: 10.1145\/3532863).","key":"24","DOI":"10.1145\/3532863"},{"doi-asserted-by":"crossref","unstructured":"[25] J. Qiu, <i>et al<\/i>.: \u201cGoing deeper with embedded FPGA platform for convolutional neural network,\u201d 2016 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays ( 2016) 26 (DOI: 10.1145\/2847263.2847265).","key":"25","DOI":"10.1145\/2847263.2847265"},{"doi-asserted-by":"crossref","unstructured":"[26] H. Kim, <i>et al<\/i>.: \u201cDuplo: lifting redundant memory accesses of deep neural networks for GPU tensor cores,\u201d 2020 53rd Annual IEEE\/ACM International Symposium on Microarchitecture (2020) 725 (DOI: 10.1109\/MICRO50266.2020.00065).","key":"26","DOI":"10.1109\/MICRO50266.2020.00065"},{"doi-asserted-by":"crossref","unstructured":"[27] A. Gondimalla, <i>et al<\/i>.: \u201cSparTen: a sparse tensor accelerator for convolutional neural networks,\u201d Proc. 52nd Annual IEEE\/ACM International Symposium on Microarchitecture (2019) 151 (DOI: 10.1145\/3352460.3358291).","key":"27","DOI":"10.1145\/3352460.3358291"},{"doi-asserted-by":"crossref","unstructured":"[28] H. Genc, <i>et al<\/i>.: \u201cGemmini: enabling systematic deep-learning architecture evaluation via full-stack integration,\u201d 2021 58th ACM\/IEEE Design Automation Conference (2021) 769 (DOI: 10.1109\/DAC18074.2021.9586216).","key":"28","DOI":"10.1109\/DAC18074.2021.9586216"},{"doi-asserted-by":"crossref","unstructured":"[29] H. Zhang, <i>et al<\/i>.: \u201cAutomated feature map padding and transfer circuit for CNN inference,\u201d IEICE Electron. Express <b>21<\/b> (2024) 20240559 (DOI: 10.1587\/elex.21.20240559).","key":"29","DOI":"10.1587\/elex.21.20240559"},{"doi-asserted-by":"crossref","unstructured":"[30] Y. Zhou, <i>et al<\/i>.: \u201cCharacterizing and demystifying the implicit convolution algorithm on commercial matrix-multiplication accelerators,\u201d 2021 IEEE International Symposium on Workload Characterization (2021) 214 (DOI: 10.1109\/IISWC53511.2021.00029).","key":"30","DOI":"10.1109\/IISWC53511.2021.00029"},{"doi-asserted-by":"crossref","unstructured":"[31] J. Fornt, <i>et al<\/i>.: \u201cAn energy-efficient GeMM-based convolution accelerator with on-the-fly im2col,\u201d IEEE Trans. Very Large Scale Integr. (VLSI) Syst. <b>31<\/b> (2023) 1874 (DOI: 10.1109\/TVLSI.2023.3286122).","key":"31","DOI":"10.1109\/TVLSI.2023.3286122"}],"container-title":["IEICE Electronics Express"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/elex\/22\/11\/22_22.20250246\/_pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,14]],"date-time":"2025-06-14T04:28:43Z","timestamp":1749875323000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/elex\/22\/11\/22_22.20250246\/_article"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,10]]},"references-count":31,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2025]]}},"URL":"https:\/\/doi.org\/10.1587\/elex.22.20250246","relation":{},"ISSN":["1349-2543"],"issn-type":[{"type":"electronic","value":"1349-2543"}],"subject":[],"published":{"date-parts":[[2025,6,10]]},"article-number":"22.20250246"}}