{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T20:59:52Z","timestamp":1768424392774,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":24,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,8,5]],"date-time":"2019-08-05T00:00:00Z","timestamp":1564963200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,8,5]]},"DOI":"10.1145\/3337821.3337883","type":"proceedings-article","created":{"date-parts":[[2019,7,25]],"date-time":"2019-07-25T12:34:36Z","timestamp":1564058076000},"page":"1-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["swATOP"],"prefix":"10.1145","author":[{"given":"Wei","family":"Gao","sequence":"first","affiliation":[{"name":"Tsinghua University"}]},{"given":"Jiarui","family":"Fang","sequence":"additional","affiliation":[{"name":"Tsinghua University"}]},{"given":"Wenlai","family":"Zhao","sequence":"additional","affiliation":[{"name":"Tsinghua University"}]},{"given":"Jinzhe","family":"Yang","sequence":"additional","affiliation":[{"name":"Imperial College London"}]},{"given":"Long","family":"Wang","sequence":"additional","affiliation":[{"name":"System Department of Baidu"}]},{"given":"Lin","family":"Gan","sequence":"additional","affiliation":[{"name":"Tsinghua University, National Supercomputing Center in Wuxi"}]},{"given":"Haohuan","family":"Fu","sequence":"additional","affiliation":[{"name":"Tsinghua University, National Supercomputing Center in Wuxi"}]},{"given":"Guangwen","family":"Yang","sequence":"additional","affiliation":[{"name":"Tsinghua University National Supercomputing Center in Wuxi"}]}],"member":"320","published-online":{"date-parts":[[2019,8,5]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2015.17"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/1854273.1854317"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/1379022.1375595"},{"key":"e_1_3_2_1_4_1","volume-title":"Tenth International Workshop on Frontiers in Handwriting Recognition. Suvisoft.","author":"Chellapilla Kumar","year":"2006","unstructured":"Kumar Chellapilla , Sidd Puri , and Patrice Simard . 2006 . High performance convolutional neural networks for document processing . In Tenth International Workshop on Frontiers in Handwriting Recognition. Suvisoft. Kumar Chellapilla, Sidd Puri, and Patrice Simard. 2006. High performance convolutional neural networks for document processing. In Tenth International Workshop on Frontiers in Handwriting Recognition. Suvisoft."},{"key":"e_1_3_2_1_5_1","unstructured":"Tianqi Chen Thierry Moreau etal 2018. {TVM}: An Automated End-to-End Optimizing Compiler for Deep Learning. In 13th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 18). 578--594.   Tianqi Chen Thierry Moreau et al. 2018. {TVM}: An Automated End-to-End Optimizing Compiler for Deep Learning. In 13th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 18). 578--594."},{"key":"e_1_3_2_1_6_1","unstructured":"Sharan Chetlur Cliff Woolley Philippe Vandermersch etal 2014. cudnn: Efficient primitives for deep learning. arXiv preprint arXiv:1410.0759 (2014).  Sharan Chetlur Cliff Woolley Philippe Vandermersch et al. 2014. cudnn: Efficient primitives for deep learning. arXiv preprint arXiv:1410.0759 (2014)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2017.20"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2017.51"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.5555\/977395.977673"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.435"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2018.00087"},{"key":"e_1_3_2_1_13_1","volume-title":"Performance Analysis of GPU-Based Convolutional Neural Networks. In 2016 45th International Conference on Parallel Processing (ICPP). 67--76","author":"Li X.","unstructured":"X. Li , G. Zhang , H. H. Huang , Z. Wang , and W. Zheng . 2016 . Performance Analysis of GPU-Based Convolutional Neural Networks. In 2016 45th International Conference on Parallel Processing (ICPP). 67--76 . X. Li, G. Zhang, H. H. Huang, Z. Wang, and W. Zheng. 2016. Performance Analysis of GPU-Based Convolutional Neural Networks. In 2016 45th International Conference on Parallel Processing (ICPP). 67--76."},{"key":"e_1_3_2_1_14_1","volume-title":"swTVM: Exploring the Automated Compilation for Deep Learning on Sunway Architecture. arXiv preprint arXiv","author":"Liu Changxi","year":"1904","unstructured":"Changxi Liu , Hailong Yang , Rujun Sun , Zhongzhi Luan , and Depei Qian . 2019. swTVM: Exploring the Automated Compilation for Deep Learning on Sunway Architecture. arXiv preprint arXiv . 1904 .07404 ( 2019). Changxi Liu, Hailong Yang, Rujun Sun, Zhongzhi Luan, and Depei Qian. 2019. swTVM: Exploring the Automated Compilation for Deep Learning on Sunway Architecture. arXiv preprint arXiv. 1904.07404 ( 2019)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2004.840306"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2499370.2462176"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.91"},{"key":"e_1_3_2_1_18_1","volume-title":"Glow: Graph lowering compiler techniques for neural networks. arXiv preprint arXiv:1805.00907.","author":"Rotem Nadav","year":"2018","unstructured":"Nadav Rotem , Jordan Fix , Saleem Abdulrasool , Garret Catron , Summer Deng , Roman Dzhabarov , Nick Gibson , James Hegeman , Meghan Lele , Roman Levenstein , 2018 . Glow: Graph lowering compiler techniques for neural networks. arXiv preprint arXiv:1805.00907. Nadav Rotem, Jordan Fix, Saleem Abdulrasool, Garret Catron, Summer Deng, Roman Dzhabarov, Nick Gibson, James Hegeman, Meghan Lele, Roman Levenstein, et al. 2018. Glow: Graph lowering compiler techniques for neural networks. arXiv preprint arXiv:1805.00907."},{"key":"e_1_3_2_1_19_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman . 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 ( 2014 ). Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_20_1","volume-title":"Tensor Comprehensions: Framework-Agnostic High-Performance Machine Learning Abstractions. arXiv preprint arXiv:1802.04730","author":"Vasilache Nicolas","year":"2018","unstructured":"Nicolas Vasilache , Oleksandr Zinenko , 2018 . Tensor Comprehensions: Framework-Agnostic High-Performance Machine Learning Abstractions. arXiv preprint arXiv:1802.04730 (2018). Nicolas Vasilache, Oleksandr Zinenko, et al. 2018. Tensor Comprehensions: Framework-Agnostic High-Performance Machine Learning Abstractions. arXiv preprint arXiv:1802.04730 (2018)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/2400682.2400713"},{"key":"e_1_3_2_1_22_1","volume-title":"Supercomputing, 1998. SC98. IEEE\/ACM Conference on. IEEE, 38--38","author":"Clinton Whaley R","year":"1998","unstructured":"R Clinton Whaley and Jack J Dongarra . 1998 . Automatically tuned linear algebra software . In Supercomputing, 1998. SC98. IEEE\/ACM Conference on. IEEE, 38--38 . R Clinton Whaley and Jack J Dongarra. 1998. Automatically tuned linear algebra software. In Supercomputing, 1998. SC98. IEEE\/ACM Conference on. IEEE, 38--38."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2018.00086"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2017.9"}],"event":{"name":"ICPP 2019: 48th International Conference on Parallel Processing","location":"Kyoto Japan","acronym":"ICPP 2019","sponsor":["University of Tsukuba University of Tsukuba"]},"container-title":["Proceedings of the 48th International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3337821.3337883","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3337821.3337883","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T00:25:41Z","timestamp":1750206341000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3337821.3337883"}},"subtitle":["Automatically Optimizing Deep Learning Operators on SW26010 Many-Core Processor"],"short-title":[],"issued":{"date-parts":[[2019,8,5]]},"references-count":24,"alternative-id":["10.1145\/3337821.3337883","10.1145\/3337821"],"URL":"https:\/\/doi.org\/10.1145\/3337821.3337883","relation":{},"subject":[],"published":{"date-parts":[[2019,8,5]]},"assertion":[{"value":"2019-08-05","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}