{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,1,4]],"date-time":"2023-01-04T06:29:48Z","timestamp":1672813788261},"reference-count":31,"publisher":"Institute of Electronics, Information and Communications Engineers (IEICE)","issue":"15","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEICE Electron. Express"],"published-print":{"date-parts":[[2021,8,10]]},"DOI":"10.1587\/elex.18.20210252","type":"journal-article","created":{"date-parts":[[2021,7,8]],"date-time":"2021-07-08T22:08:03Z","timestamp":1625782083000},"page":"20210252-20210252","source":"Crossref","is-referenced-by-count":0,"title":["A proposal for enhancing training speed in deep learning models based on memory activity survey"],"prefix":"10.1587","volume":"18","author":[{"given":"Dang Tuan","family":"Kiet","sequence":"first","affiliation":[{"name":"University of Electro-Communications (UEC)"}]},{"given":"Binh","family":"Kieu-Do-Nguyen","sequence":"additional","affiliation":[{"name":"University of Electro-Communications (UEC)"}]},{"given":"Trong-Thuc","family":"Hoang","sequence":"additional","affiliation":[{"name":"University of Electro-Communications (UEC)"}]},{"given":"Khai-Duy","family":"Nguyen","sequence":"additional","affiliation":[{"name":"University of Electro-Communications (UEC)"}]},{"given":"Xuan-Tu","family":"Tran","sequence":"additional","affiliation":[{"name":"The Information Technology Institute (VNU-ITI)"}]},{"given":"Cong-Kha","family":"Pham","sequence":"additional","affiliation":[{"name":"University of Electro-Communications (UEC)"}]}],"member":"532","reference":[{"key":"1","unstructured":"[1] J. Liu, <i>et al.<\/i>: \u201cPerformance analysis and characterization of training deep learning models on mobile device,\u201d ICPADS (2019) 506 (DOI: 10.1109\/ICPADS47876.2019.00077)."},{"key":"2","unstructured":"[2] K. Guo, <i>et al.<\/i>: \u201cA survey of FPGA-based neural network accelerator,\u201d ACM TRETS <b>12<\/b> (2019) 2 (DOI: 10.1145\/3289185)."},{"key":"3","unstructured":"[3] NVIDIA: Profiler user\u2019s guide, DU-05982-001_v11.2 (2021) https:\/\/docs.nvidia.com\/cuda\/pdf\/CUDA_Profiler_Users_Guide.pdf."},{"key":"4","unstructured":"[4] NVIDIA: Jetson Linux driver package software features (2019) https:\/\/docs.nvidia.com\/jetson\/archives\/l4t-archived\/l4t-3231\/index.html#page\/Tegra%2520Linux%2520Driver%25-20Package%2520Development%2520Guide%2FAppendix-TegraStats.html."},{"key":"5","unstructured":"[5] TensorFlow Profiler: Profile model performance (2021) https:\/\/www.tensorflow.org\/tensorboard\/tensorboard_profiling_keras."},{"key":"6","unstructured":"[6] M. Hashemi, <i>et al.<\/i>: \u201cLearning memory access patterns,\u201d arXiv cs.LG (2018) https:\/\/arxiv.org\/abs\/1803.02329."},{"key":"7","unstructured":"[7] Z. Lu, <i>et al.<\/i>: \u201cModeling the resource requirements of convolutional neural networks on mobile devices,\u201d ACM MM\u201917 (2017) 1663 (DOI: 10.1145\/3123266.3123389)."},{"key":"8","unstructured":"[8] J. Hanhirova, <i>et al.<\/i>: \u201cLatency and throughput characterization of convolutional neural networks for mobile computer vision,\u201d ACM MMSys\u201918 (2018) 204 (DOI: 10.1145\/3204949.3204975)."},{"key":"9","unstructured":"[9] A. Khan, <i>et al.<\/i>: \u201cA survey of the recent architectures of deep convolutional neural networks,\u201d Springer AI Review <b>53<\/b> (2020) 5455 (DOI: 10.1007\/s10462-020-09825-6)."},{"key":"10","unstructured":"[10] J. Gu, <i>et al.<\/i>: \u201cRecent advances in convolutional neural networks,\u201d Elsevier Patt. Recog. <b>77<\/b> (2018) 354 (DOI: 10.1016\/j.patcog.2017.10.013)."},{"key":"11","unstructured":"[11] K. He, <i>et al.<\/i>: \u201cDeep residual learning for image recognition,\u201d CVPR (2016) 770 (DOI: 10.1109\/CVPR.2016.90)."},{"key":"12","unstructured":"[12] A. Krizhevsky, <i>et al.<\/i>: \u201cImageNet classification with deep convolutional neural networks,\u201d Communications of the ACM <b>60<\/b> (2017) 84 (DOI: 10.1145\/3065386)."},{"key":"13","unstructured":"[13] K. Simonyan and A. Zisserman: \u201cVery deep convolutional networks for large-scale image recognition,\u201d arXiv cs.CV (2015) http:\/\/arxiv.org\/abs\/1409.1556."},{"key":"14","unstructured":"[14] C. Szegedy, <i>et al.<\/i>: \u201cGoing deeper with convolutions,\u201d CVPR (2015) 1 (DOI: 10.1109\/CVPR.2015.7298594)."},{"key":"15","unstructured":"[15] ImageNet (2021) http:\/\/image-net.org\/."},{"key":"16","unstructured":"[16] CIFAR-10\/CIFAR-100 (2021) https:\/\/www.cs.toronto.edu\/~kriz\/cifar.html."},{"key":"17","unstructured":"[17] Y. LeCun, <i>et al.<\/i>: The MNIST Database of Handwritten Digits (2021) http:\/\/yann.lecun.com\/exdb\/mnist\/."},{"key":"18","unstructured":"[18] TensorFlow (2021) https:\/\/www.tensorflow.org\/."},{"key":"19","unstructured":"[19] PyTorch (2021) https:\/\/pytorch.org\/."},{"key":"20","unstructured":"[20] Keras (2021) https:\/\/keras.io\/."},{"key":"21","unstructured":"[21] OpenNN: Neural Networks (2021) https:\/\/www.opennn.net\/."},{"key":"22","unstructured":"[22] A. Paszke, <i>et al.<\/i>: \u201cPyTorch: an imperative style, high-performance deep learning library,\u201d NeurIPS <b>32<\/b> (2019) 1 https:\/\/arxiv.org\/abs\/1912.01703."},{"key":"23","unstructured":"[23] OpenCV (2021) https:\/\/opencv.org\/."},{"key":"24","unstructured":"[24] PyTorch: TorchVision (2021) https:\/\/github.com\/pytorch\/vision."},{"key":"25","unstructured":"[25] J. Dorsey, <i>et al.<\/i>: \u201cAn integrated quad-core opteron processor,\u201d ISSCC (2007) 102 (DOI: 10.1109\/ISSCC.2007.373608)."},{"key":"26","unstructured":"[26] Y. Yarom, <i>et al.<\/i>: \u201cMapping the Intel last-level cache,\u201d IACR (2015) 905 https:\/\/eprint.iacr.org\/2015\/905."},{"key":"27","doi-asserted-by":"crossref","unstructured":"[27] Y. Solihin: <i>Fundamentals of Parallel Multicore Architecture<\/i> (Chapman &amp; Hall\/CRC, Boca Raton, Florida, 2015) 160.","DOI":"10.1201\/b20200"},{"key":"28","unstructured":"[28] Intel Corp.: Intel VTune profiler user guide (2021) https:\/\/software.intel.com\/content\/www\/us\/en\/develop\/documentation\/vtune-help\/top\/reference\/cpu-metrics-reference\/clockticks-per-instructions-retired-cpi.html."},{"key":"29","unstructured":"[29] Intel Corp.: Intel VTune profiler performance analysis cookbook (2020) https:\/\/software.intel.com\/content\/www\/us\/en\/develop\/documentation\/vtune-cookbook\/top\/tuning-recipes\/instruction-cache-misses.html"},{"key":"30","doi-asserted-by":"crossref","unstructured":"[30] J.D. Bakos: <i>Embedded Systems<\/i> (Morgan Kaufmann, Boston, Massachusetts, 2016) 147.","DOI":"10.1016\/B978-0-12-800342-8.00004-3"},{"key":"31","unstructured":"[31] Intel Corp.: Memory access analysis for cache misses and high bandwidth issues (2020) https:\/\/software.intel.com\/content\/www\/us\/en\/develop\/documentation\/vtune-help\/top\/analyze-performance\/microarchitecture-analysis-group\/memory-access-analysis.html."}],"container-title":["IEICE Electronics Express"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/elex\/18\/15\/18_18.20210252\/_pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,3]],"date-time":"2023-01-03T11:51:19Z","timestamp":1672746679000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/elex\/18\/15\/18_18.20210252\/_article"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,8,10]]},"references-count":31,"journal-issue":{"issue":"15","published-print":{"date-parts":[[2021]]}},"URL":"https:\/\/doi.org\/10.1587\/elex.18.20210252","relation":{},"ISSN":["1349-2543"],"issn-type":[{"value":"1349-2543","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,8,10]]},"article-number":"18.20210252"}}