{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:15:43Z","timestamp":1750220143161,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":18,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,11,1]],"date-time":"2021-11-01T00:00:00Z","timestamp":1635724800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,11]]},"DOI":"10.1145\/3493229.3493305","type":"proceedings-article","created":{"date-parts":[[2021,11,13]],"date-time":"2021-11-13T11:15:52Z","timestamp":1636802152000},"page":"1-6","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Efficient Application of Tensor Core Units for Convolving Images"],"prefix":"10.1145","author":[{"given":"Stefan","family":"Groth","sequence":"first","affiliation":[{"name":"Hardware\/Software Co-Design, Department of Computer Science Friedrich-Alexander University Erlangen-N\u00fcrnberg (FAU) Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"J\u00fcrgen","family":"Teich","sequence":"additional","affiliation":[{"name":"Hardware\/Software Co-Design, Department of Computer Science Friedrich-Alexander University Erlangen-N\u00fcrnberg (FAU) Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Frank","family":"Hannig","sequence":"additional","affiliation":[{"name":"Hardware\/Software Co-Design, Department of Computer Science Friedrich-Alexander University Erlangen-N\u00fcrnberg (FAU) Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2021,11,13]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"NVIDIA Tesla V100 GPU Architecture","author":"NVIDIA Corporation","year":"2017","unstructured":"NVIDIA Corporation . NVIDIA Tesla V100 GPU Architecture , The World's Most Advanced Data Center GPU. 2017 . url: https:\/\/images.nvidia.com\/content\/volta-architecture\/pdf\/volta-architecture-whitepaper.pdf. NVIDIA Corporation. NVIDIA Tesla V100 GPU Architecture, The World's Most Advanced Data Center GPU. 2017. url: https:\/\/images.nvidia.com\/content\/volta-architecture\/pdf\/volta-architecture-whitepaper.pdf."},{"key":"e_1_3_2_1_2_1","unstructured":"NVIDIA Corporation. CUDA C++ Programming Guide. Version PG-02829-001_v11.4. Sept. 2021. url: https:\/\/docs.nvidia.com\/cuda\/pdf\/CUDA_C_Programming_Guide.pdf. NVIDIA Corporation. CUDA C++ Programming Guide. Version PG-02829-001_v11.4. Sept. 2021. url: https:\/\/docs.nvidia.com\/cuda\/pdf\/CUDA_C_Programming_Guide.pdf."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-84882-935-0","volume-title":"Computer Vision - Algorithms and Applications. Texts in Computer Science","author":"Szeliski R.","year":"2011","unstructured":"R. Szeliski . Computer Vision - Algorithms and Applications. Texts in Computer Science . Springer , 2011 . isbn: 978-1-84882-934-3. R. Szeliski. Computer Vision - Algorithms and Applications. Texts in Computer Science. Springer, 2011. isbn: 978-1-84882-934-3."},{"key":"e_1_3_2_1_4_1","first-page":"1410","article-title":"\"cuDNN: Efficient Primitives for Deep Learning","author":"Chetlur S.","year":"2014","unstructured":"S. Chetlur , C. Woolley , P. Vandermersch , J. Cohen , J. Tran , B. Catanzaro , and E. Shelhamer . \"cuDNN: Efficient Primitives for Deep Learning \". In: The Computing Research Repository (CoRR) ( 2014 ). arXiv: 1410 .0759. S. Chetlur, C. Woolley, P. Vandermersch, J. Cohen, J. Tran, B. Catanzaro, and E. Shelhamer. \"cuDNN: Efficient Primitives for Deep Learning\". In: The Computing Research Repository (CoRR) (2014). arXiv: 1410.0759.","journal-title":"The Computing Research Repository (CoRR) ("},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.5244\/C.2.23"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00371-014-0986-6"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.5555\/248979"},{"key":"e_1_3_2_1_9_1","first-page":"120","volume-title":"Proceedings of the Conference on Design, Automation and Test in Europe (DATE). IEEE","author":"Wu H.-N.","year":"2019","unstructured":"H.-N. Wu and C.-T. Huang . \" Data Locality Optimization of Depthwise Separable Convolutions for CNN Inference Accelerators\". In: Proceedings of the Conference on Design, Automation and Test in Europe (DATE). IEEE , Mar. 2019 , pp. 120 - 125 . doi: 10.23919\/DATE.2019.8715097. 10.23919\/DATE.2019.8715097 H.-N. Wu and C.-T. Huang. \"Data Locality Optimization of Depthwise Separable Convolutions for CNN Inference Accelerators\". In: Proceedings of the Conference on Design, Automation and Test in Europe (DATE). IEEE, Mar. 2019, pp. 120-125. doi: 10.23919\/DATE.2019.8715097."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2015.2394802"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2012.59"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2020.3011893"},{"key":"e_1_3_2_1_13_1","first-page":"88","article-title":"Accelerating Sparse Matrix-Matrix Multiplication with GPU Tensor Cores","author":"Zachariadis O.","year":"2020","unstructured":"O. Zachariadis , N. Satpute , J. G\u00f3mez-Luna , and J. Olivares . \" Accelerating Sparse Matrix-Matrix Multiplication with GPU Tensor Cores \". In: Comput. Electr.Eng. 88 ( 2020 ). doi: 10.1016\/j.compeleceng.2020.106848. 10.1016\/j.compeleceng.2020.106848 O. Zachariadis, N. Satpute, J. G\u00f3mez-Luna, and J. Olivares. \"Accelerating Sparse Matrix-Matrix Multiplication with GPU Tensor Cores\". In: Comput. Electr.Eng. 88 (2020). doi: 10.1016\/j.compeleceng.2020.106848.","journal-title":"Comput. Electr.Eng."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS49936.2021.00059"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437801.3441623"},{"key":"e_1_3_2_1_16_1","first-page":"249","volume-title":"Proceedings of the 40th Annual International Conference on the Theory and Applications of Cryptographic Techniques (EUROCRYPT), Part II.","volume":"12697","author":"Ducas L.","year":"2021","unstructured":"L. Ducas , M. Stevens , and W. P. J. van Woerden . \" Advanced Lattice Sieving on GP Us , with Tensor Cores\". In: Proceedings of the 40th Annual International Conference on the Theory and Applications of Cryptographic Techniques (EUROCRYPT), Part II. Vol. 12697 . Lecture Notes in Computer Science (LNCS). Springer , 2021 , pp. 249 - 279 . doi: 10.1007\/978-3-030-77886-6_9. 10.1007\/978-3-030-77886-6_9 L. Ducas, M. Stevens, and W. P. J. van Woerden. \"Advanced Lattice Sieving on GPUs, with Tensor Cores\". In: Proceedings of the 40th Annual International Conference on the Theory and Applications of Cryptographic Techniques (EUROCRYPT), Part II. Vol. 12697. Lecture Notes in Computer Science (LNCS). Springer, 2021, pp. 249-279. doi: 10.1007\/978-3-030-77886-6_9."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3378678.3391880"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3140582.3081039"}],"event":{"name":"SCOPES '21: 24th International Workshop on Software and Compilers for Embedded Systems","sponsor":["EDAA European Design Automation Association","SIGBED ACM Special Interest Group on Embedded Systems"],"location":"Eindhoven Netherlands","acronym":"SCOPES '21"},"container-title":["Proceedings of the 24th International Workshop on Software and Compilers for Embedded Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3493229.3493305","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3493229.3493305","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T18:59:57Z","timestamp":1750186797000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3493229.3493305"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,11]]},"references-count":18,"alternative-id":["10.1145\/3493229.3493305","10.1145\/3493229"],"URL":"https:\/\/doi.org\/10.1145\/3493229.3493305","relation":{},"subject":[],"published":{"date-parts":[[2021,11]]},"assertion":[{"value":"2021-11-13","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}