{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T22:08:55Z","timestamp":1766268535353,"version":"3.37.3"},"reference-count":25,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2023,9,9]],"date-time":"2023-09-09T00:00:00Z","timestamp":1694217600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,9,9]],"date-time":"2023-09-09T00:00:00Z","timestamp":1694217600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62002365","62025208"],"award-info":[{"award-number":["62002365","62025208"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2021YFBO300101"],"award-info":[{"award-number":["2021YFBO300101"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["CCF Trans. HPC"],"published-print":{"date-parts":[[2024,2]]},"DOI":"10.1007\/s42514-023-00166-8","type":"journal-article","created":{"date-parts":[[2023,9,9]],"date-time":"2023-09-09T09:02:11Z","timestamp":1694250131000},"page":"78-93","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["High performance dilated convolutions on multi-core DSPs"],"prefix":"10.1007","volume":"6","author":[{"given":"Yang","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8286-6566","authenticated-orcid":false,"given":"Qinglin","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiangdong","family":"Pei","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Songzhu","family":"Mei","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rongchun","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jie","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,9,9]]},"reference":[{"key":"166_CR1","unstructured":"Arm Corporation: ARM Computer Library: a software library for machine learning. https:\/\/www.arm.com\/technologies\/compute-library. Online, accessed 3-Jan-2023 (2023)"},{"key":"166_CR2","unstructured":"Chaudhary, N., Misra, S., Kalamkar, D., Heinecke, A., Georganas, E., Ziv, B., Adelman, M., Kaul, B.: Efficient and generic 1d dilated convolution layer for deep learning. arXiv preprint arXiv:2104.08002 (2021)"},{"key":"166_CR3","doi-asserted-by":"crossref","unstructured":"Chen, Q., Xu, J., Koltun, V.: Fast image processing with fully-convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2497\u20132506 (2017a)","DOI":"10.1109\/ICCV.2017.273"},{"issue":"4","key":"166_CR4","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"L-C Chen","year":"2017","unstructured":"Chen, L.-C., Papandreou, G., Kokkinos, I., Murphy, K., Yuille, A.L.: Deeplab: semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected CRFS. IEEE Trans. Pattern Anal. Mach. Intell. 40(4), 834\u2013848 (2017b)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"3","key":"166_CR5","doi-asserted-by":"publisher","first-page":"401","DOI":"10.1109\/TVLSI.2022.3233882","volume":"31","author":"D Filippas","year":"2023","unstructured":"Filippas, D., Nicopoulos, C., Dimitrakopoulos, G.: Streaming dilated convolution engine. IEEE Trans. Very Large Scale Integr. (VLSI) Syst. 31(3), 401\u2013405 (2023)","journal-title":"IEEE Trans. Very Large Scale Integr. (VLSI) Syst."},{"key":"166_CR6","doi-asserted-by":"publisher","unstructured":"Georganas, E., Avancha, S., Banerjee, K., Kalamkar, D., Henry, G., Pabst, H., Heinecke, A.: Anatomy of high-performance deep learning convolutions on SIMD architectures. In: SC18: International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 830\u2013841. IEEE (2018). https:\/\/doi.org\/10.1109\/SC.2018.00069","DOI":"10.1109\/SC.2018.00069"},{"key":"166_CR7","doi-asserted-by":"crossref","unstructured":"Hao, R., Wang, Q., Yin, S., Zhou, T., Shen, S., Mei, S., Liu, J.: Towards effective depthwise convolutions on armv8 architecture. arXiv preprint arXiv:2206.12124 (2022)","DOI":"10.1007\/978-3-031-29927-8_34"},{"key":"166_CR8","unstructured":"Heinecke, A., Georganas, E., Banerjee, K., Kalamkar, D., Sundaram, N., Venkat, A., Henry, G., Pabst, H.: Understanding the performance of small convolution operations for CNN on intel architecture. In: Poster in the International Conference for High Performance Computing, Networking, Storage, and Analysis (2017)"},{"key":"166_CR9","doi-asserted-by":"crossref","unstructured":"Igual, F.D., Ali, M., Friedmann, A., Stotzer, E., Wentz, T., Geijn, R.A.: Unleashing the high-performance and low-power of multi-core DSPS for general-purpose HPC. In: SC\u201912: Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis, pp. 1\u201311. IEEE (2012)","DOI":"10.1109\/SC.2012.109"},{"key":"166_CR10","unstructured":"Intel Corporation: oneAPI deep neural network library (oneDNN). https:\/\/github.com\/oneapi-src\/oneDNN. Online, accessed 3-Jan-2023 (2023)"},{"key":"166_CR11","doi-asserted-by":"crossref","unstructured":"Kim, M., Park, C., Kim, S., Hong, T., Ro, W.W.: Efficient dilated-Winograd convolutional neural networks. In: 2019 IEEE International Conference on Image Processing (ICIP), pp. 2711\u20132715. IEEE (2019)","DOI":"10.1109\/ICIP.2019.8803277"},{"key":"166_CR12","doi-asserted-by":"crossref","unstructured":"Kurth, T., Treichler, S., Romero, J., Mudigonda, M., Luehr, N., Phillips, E., Mahesh, A., Matheson, M., Deslippe, J., Fatica, M., et al.: Exascale deep learning for climate analytics. In: SC18: International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 649\u2013660. IEEE (2018)","DOI":"10.1109\/SC.2018.00054"},{"key":"166_CR13","doi-asserted-by":"publisher","first-page":"1219","DOI":"10.1016\/j.neucom.2017.09.062","volume":"275","author":"G Lin","year":"2018","unstructured":"Lin, G., Wu, Q., Qiu, L., Huang, X.: Image super-resolution using a dilated convolutional neural network. Neurocomputing 275, 1219\u20131230 (2018)","journal-title":"Neurocomputing"},{"issue":"1","key":"166_CR14","doi-asserted-by":"publisher","first-page":"70","DOI":"10.1109\/TPDS.2021.3084813","volume":"33","author":"G Lu","year":"2021","unstructured":"Lu, G., Zhang, W., Wang, Z.: Optimizing depthwise separable convolution operations on GPUs. IEEE Trans. Parallel Distrib. Syst. 33(1), 70\u201387 (2021)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"166_CR15","doi-asserted-by":"crossref","unstructured":"Mehta, S., Rastegari, M., Caspi, A., Shapiro, L., Hajishirzi, H.: ESPNet: efficient spatial pyramid of dilated convolutions for semantic segmentation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 552\u2013568 (2018)","DOI":"10.1007\/978-3-030-01249-6_34"},{"key":"166_CR16","doi-asserted-by":"crossref","unstructured":"Mogers, N., Radu, V., Li, L., Turner, J., O\u2019Boyle, M., Dubach, C.: Automatic generation of specialized direct convolutions for mobile gpus. In: Proceedings of the 13th Annual Workshop on General Purpose Processing Using Graphics Processing Unit, pp. 41\u201350 (2020)","DOI":"10.1145\/3366428.3380771"},{"key":"166_CR17","unstructured":"Paszke, A., Gross, S., Massa, F., et al.: PyTorch: an imperative style, high-performance deep learning library (2019)"},{"issue":"1","key":"166_CR18","first-page":"57","volume":"45","author":"X Pei","year":"2023","unstructured":"Pei, X., Wang, Q., Liao, L., Li, R., Mei, S., Liu, J., Pang, Z.: Optimizing parallel matrix transpose algorithm on multi-core digital signal processors (in Chinese). J. Natl. Univ. Def. Technol. 45(1), 57\u201366 (2023)","journal-title":"J. Natl. Univ. Def. Technol."},{"key":"166_CR19","doi-asserted-by":"publisher","first-page":"78","DOI":"10.3390\/electronics12010078","volume":"12","author":"I Safonov","year":"2022","unstructured":"Safonov, I., Kornilov, A., Makienko, D.: An approach for matrix multiplication of 32-bit fixed point numbers by means of 16-bit SIMD instructions on DSP. Electronics 12, 78 (2022)","journal-title":"Electronics"},{"issue":"1","key":"166_CR20","first-page":"86","volume":"45","author":"Q Wang","year":"2023","unstructured":"Wang, Q., Pei, X., Liao, L., Wang, H., Li, R., Mei, S., Li, D.: Evaluating matrix multiplication-based convolution algorithm on multi-core digital signal processors (in Chinese). J. Natl. Univ. Def. Technol. 45(1), 86\u201394 (2023)","journal-title":"J. Natl. Univ. Def. Technol."},{"key":"166_CR21","doi-asserted-by":"crossref","unstructured":"Yin, S., Wang, Q., Hao, R., Zhou, T., Mei, S., Liu, J.: Optimizing irregular-shaped matrix-matrix multiplication on multi-core DSPS. In: 2022 IEEE International Conference on Cluster Computing (CLUSTER), pp. 451\u2013461 (2022)","DOI":"10.1109\/CLUSTER51413.2022.00055"},{"key":"166_CR23","unstructured":"Yu, F., Koltun, V.: Multi-scale context aggregation by dilated convolutions. arXiv preprint arXiv:1511.07122 (2015)"},{"key":"166_CR22","doi-asserted-by":"crossref","unstructured":"Yu, F., Koltun, V., Funkhouser, T.: Dilated residual networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 472\u2013480 (2017)","DOI":"10.1109\/CVPR.2017.75"},{"key":"166_CR24","doi-asserted-by":"crossref","unstructured":"Zhang, K., Zuo, W., Gu, S., Zhang, L.: Learning deep CNN denoiser prior for image restoration. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3929\u20133938 (2017)","DOI":"10.1109\/CVPR.2017.300"},{"key":"166_CR25","unstructured":"Zhang, J., Franchetti, F., Low, T.M.: High performance zero-memory overhead direct convolutions. In: International Conference on Machine Learning, pp. 5771\u20135780 (2018)"}],"container-title":["CCF Transactions on High Performance Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42514-023-00166-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42514-023-00166-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42514-023-00166-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,9]],"date-time":"2024-05-09T09:07:48Z","timestamp":1715245668000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42514-023-00166-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9,9]]},"references-count":25,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2024,2]]}},"alternative-id":["166"],"URL":"https:\/\/doi.org\/10.1007\/s42514-023-00166-8","relation":{},"ISSN":["2524-4922","2524-4930"],"issn-type":[{"type":"print","value":"2524-4922"},{"type":"electronic","value":"2524-4930"}],"subject":[],"published":{"date-parts":[[2023,9,9]]},"assertion":[{"value":"8 July 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 August 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 September 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"On behalf of all authors, the corresponding author states that there is no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}