{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T00:44:17Z","timestamp":1773708257915,"version":"3.50.1"},"publisher-location":"Cham","reference-count":17,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031213946","type":"print"},{"value":"9783031213953","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-21395-3_2","type":"book-chapter","created":{"date-parts":[[2022,12,5]],"date-time":"2022-12-05T14:34:17Z","timestamp":1670250857000},"page":"17-29","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Optimizing Winograd Convolution on GPUs via Partial Kernel Fusion"],"prefix":"10.1007","author":[{"given":"Gan","family":"Tong","sequence":"first","affiliation":[]},{"given":"Run","family":"Yan","sequence":"additional","affiliation":[]},{"given":"Ling","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Mengqiao","family":"Lan","sequence":"additional","affiliation":[]},{"given":"Jing","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yuanhu","family":"Cheng","sequence":"additional","affiliation":[]},{"given":"Wentao","family":"Ma","sequence":"additional","affiliation":[]},{"given":"Yashuai","family":"L\u00fc","sequence":"additional","affiliation":[]},{"given":"Sheng","family":"Ma","sequence":"additional","affiliation":[]},{"given":"Libo","family":"Huang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,12,1]]},"reference":[{"key":"2_CR1","unstructured":"Patel, R., Patel, S.: A comprehensive study of applying convolutional neural network for computer vision. Int. J. Adv. Sci. Technol. 6, 2161\u20132174 (2020)"},{"key":"2_CR2","doi-asserted-by":"publisher","first-page":"2352","DOI":"10.1162\/neco_a_00990","volume":"29","author":"W Rawat","year":"2017","unstructured":"Rawat, W., Wang, Z.: Deep convolutional neural networks for image classification: a comprehensive review. Neural Comput. 29, 2352\u20132449 (2017)","journal-title":"Neural Comput."},{"key":"2_CR3","doi-asserted-by":"crossref","unstructured":"Fathi, E., Shoja, B.M.: Deep neural networks for natural language processing. In: Handbook of Statistics, vol. 38, pp. 229\u2013316 (2018)","DOI":"10.1016\/bs.host.2018.07.006"},{"key":"2_CR4","unstructured":"Goodfellow, I., Bengio, Y., Courville, A.: Deep Learning. The MIT Press (2016)"},{"issue":"12","key":"2_CR5","doi-asserted-by":"publisher","first-page":"2295","DOI":"10.1109\/JPROC.2017.2761740","volume":"105","author":"V Sze","year":"2017","unstructured":"Sze, V., Chen, Y.H., Yang, T.J., Emer, J.S.: Efficient processing of deep neural networks: a tutorial and survey. Proc. IEEE 105(12), 2295\u20132329 (2017). https:\/\/doi.org\/10.1109\/JPROC.2017.2761740","journal-title":"Proc. IEEE"},{"issue":"7","key":"2_CR6","doi-asserted-by":"publisher","first-page":"986","DOI":"10.1109\/TC.2020.2973144","volume":"69","author":"L Jia","year":"2020","unstructured":"Jia, L., Liang, Y., Li, X., Lu, L., Yan, S.: Enabling efficient fast convolution algorithms on GPUs via MegaKernels. IEEE Trans. Comput. 69(7), 986\u2013997 (2020). https:\/\/doi.org\/10.1109\/TC.2020.2973144","journal-title":"IEEE Trans. Comput."},{"key":"2_CR7","doi-asserted-by":"crossref","unstructured":"Lavin, A., Gray, S.: Fast algorithms for convolutional neural networks. Arxiv, September 2015","DOI":"10.1109\/CVPR.2016.435"},{"key":"2_CR8","unstructured":"Li, S., Park, J., Tang, P.T.P.: Enabling sparse Winograd convolution by native pruning. arXiv e-prints arXiv:1702.08597, February 2017"},{"key":"2_CR9","unstructured":"Meng, L., Brothers, J.: Efficient Winograd convolution via integer arithmetic. arXiv e-prints arXiv:1901.01965, January 2019"},{"key":"2_CR10","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"307","DOI":"10.1007\/978-3-030-35166-3_22","volume-title":"AI*IA 2019 \u2013 Advances in Artificial Intelligence","author":"B Barabasz","year":"2019","unstructured":"Barabasz, B., Gregg, D.: Winograd convolution for DNNs: beyond linear polynomials. In: Alviano, M., Greco, G., Scarcello, F. (eds.) AI*IA 2019. LNCS (LNAI), vol. 11946, pp. 307\u2013320. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-35166-3_22"},{"key":"2_CR11","doi-asserted-by":"publisher","unstructured":"Yan, D., Wang, W., Chu, X.: Optimizing batched Winograd convolution on GPUs. In: Proceedings of the 25th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, PPoPP 2020, pp. 32\u201344. Association for Computing Machinery, New York (2020). https:\/\/doi.org\/10.1145\/3332466.3374520","DOI":"10.1145\/3332466.3374520"},{"key":"2_CR12","doi-asserted-by":"crossref","unstructured":"Huang, Y., Shen, J., Wang, Z., Wen, M., Zhang, C.: A high-efficiency FPGA-based accelerator for convolutional neural networks using Winograd algorithm. J. Phys. Conf. Ser. 1026, 012019, May 2018","DOI":"10.1088\/1742-6596\/1026\/1\/012019"},{"key":"2_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"609","DOI":"10.1007\/978-3-319-68612-7_69","volume-title":"Artificial Neural Networks and Machine Learning \u2013 ICANN 2017","author":"Z Wang","year":"2017","unstructured":"Wang, Z., Lan, Q., He, H., Zhang, C.: Winograd algorithm for 3D convolution neural networks. In: Lintas, A., Rovetta, S., Verschure, P.F.M.J., Villa, A.E.P. (eds.) ICANN 2017. LNCS, vol. 10614, pp. 609\u2013616. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-68612-7_69"},{"key":"2_CR14","doi-asserted-by":"publisher","unstructured":"Laine, S., Karras, T., Aila, T.: Megakernels considered harmful: wavefront path tracing on GPUs. In: Proceedings of the 5th High-Performance Graphics Conference, HPG 2013, pp. 137\u2013143. Association for Computing Machinery, New York (2013). https:\/\/doi.org\/10.1145\/2492045.2492060","DOI":"10.1145\/2492045.2492060"},{"key":"2_CR15","volume-title":"Optimizing Compilers for Modern Architectures: A Dependence-Based Approach","author":"K Kennedy","year":"2001","unstructured":"Kennedy, K., Allen, J.R.: Optimizing Compilers for Modern Architectures: A Dependence-Based Approach. Morgan Kaufmann Publishers Inc., San Francisco (2001)"},{"key":"2_CR16","unstructured":"Chen, T., Moreau, T., Jiang, Z., Zheng, L., Yan, E., et al.: TVM: an automated end-to-end optimizing compiler for deep learning. In: USENIX OSDI 2018, pp. 579-594. USENIX, USA (2018)"},{"key":"2_CR17","unstructured":"Chen, T., et al.: Learning to optimize tensor programs. In: Bengio, S., Wallach, H., Larochelle, H., Grauman, K., Cesa-Bianchi, N., Garnett, R. (eds.) NIPS 2018, vol. 31. Curran Associates, Inc. (2018)"}],"container-title":["Lecture Notes in Computer Science","Network and Parallel Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-21395-3_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,5]],"date-time":"2022-12-05T14:51:42Z","timestamp":1670251902000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-21395-3_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031213946","9783031213953"],"references-count":17,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-21395-3_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"1 December 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"NPC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"IFIP International Conference on Network and Parallel Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Jinan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 September 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 September 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"npc2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/npc2022.jlu.edu.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"89","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"23","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"8","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"26% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"6","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}