{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T11:27:08Z","timestamp":1777030028008,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":66,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,3,9]],"date-time":"2020-03-09T00:00:00Z","timestamp":1583712000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100012659","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61572048"],"award-info":[{"award-number":["61572048"]}],"id":[{"id":"10.13039\/501100012659","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004826","name":"Natural Science Foundation of Beijing Municipality","doi-asserted-by":"publisher","award":["JQ19014"],"award-info":[{"award-number":["JQ19014"]}],"id":[{"id":"10.13039\/501100004826","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,3,9]]},"DOI":"10.1145\/3373376.3378508","type":"proceedings-article","created":{"date-parts":[[2020,3,13]],"date-time":"2020-03-13T22:37:01Z","timestamp":1584139021000},"page":"859-873","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":149,"title":["FlexTensor"],"prefix":"10.1145","author":[{"given":"Size","family":"Zheng","sequence":"first","affiliation":[{"name":"Peking University, Beijing, China"}]},{"given":"Yun","family":"Liang","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]},{"given":"Shuo","family":"Wang","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]},{"given":"Renze","family":"Chen","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]},{"given":"Kaiwen","family":"Sheng","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2020,3,13]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"TensorFlow: A System for Large-Scale Machine Learning. In 12th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2016","author":"Abadi Mart\u00edn","year":"2016"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3306346.3322967"},{"key":"e_1_3_2_1_3_1","volume-title":"Deep Learning using Rectified Linear Units (ReLU). CoRR abs\/1803.08375","author":"Agarap Abien Fred","year":"2018"},{"key":"e_1_3_2_1_4_1","volume-title":"Proceedings of the ACM\/IEEE Conference on High Performance Computing, SC 2009","author":"Belter Geoffrey","year":"2009"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/1375581.1375595"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939785"},{"key":"e_1_3_2_1_7_1","volume-title":"MXNet: A Flexible and Efficient Machine Learning Library for Heterogeneous Distributed Systems. CoRR abs\/1512.01274","author":"Chen Tianqi","year":"2015"},{"key":"e_1_3_2_1_8_1","volume-title":"TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In 13th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2018","author":"Chen Tianqi","year":"2018"},{"key":"e_1_3_2_1_9_1","volume-title":"Advances in Neural Information Processing Systems 31: Annual Conference on Neural Information Processing Systems 2018","author":"Chen Tianqi","year":"2018"},{"key":"e_1_3_2_1_10_1","volume-title":"An Exploration of Parameter Redundancy in Deep Networks with Circulant Projections. In 2015 IEEE International Conference on Computer Vision, ICCV 2015","author":"Cheng Yu","year":"2015"},{"key":"e_1_3_2_1_11_1","volume-title":"cuDNN: Efficient Primitives for Deep Learning. CoRR abs\/1410.0759","author":"Chetlur Sharan","year":"2014"},{"key":"e_1_3_2_1_12_1","volume-title":"Xception: Deep Learning with Depthwise Separable Convolutions. CoRR abs\/1610.02357","author":"Chollet Fran\u00e7ois","year":"2016"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2011.2110592"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"Leonardo Dagum and Ramesh Menon. 1988. OpenMP: An Industry- Standard API for Shared-Memory Programming. IEEE Computational Science & Engineering 5 Issue 1 (1988).  Leonardo Dagum and Ramesh Menon. 1988. OpenMP: An Industry- Standard API for Shared-Memory Programming. IEEE Computational Science & Engineering 5 Issue 1 (1988).","DOI":"10.1109\/99.660313"},{"key":"e_1_3_2_1_15_1","volume-title":"Leo: A Profile-Driven Dynamic Optimization Framework for GPU Applications. In 2014 Conference on Timely Results in Operating Systems, TRIOS '14","author":"Farooqui Naila","year":"2014"},{"key":"e_1_3_2_1_16_1","volume-title":"Proceedings of the 1998 IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP '98","author":"Frigo Matteo","year":"1998"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00069"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2017.373"},{"key":"e_1_3_2_1_19_1","unstructured":"Intel(R) PlaidML https:\/\/ai.intel.com\/plaidml. [n.d.]. https:\/\/ai.intel. com\/plaidml  Intel(R) PlaidML https:\/\/ai.intel.com\/plaidml. [n.d.]. https:\/\/ai.intel. com\/plaidml"},{"key":"e_1_3_2_1_20_1","unstructured":"Inter(R) MKL-DNN https:\/\/github.com\/intel\/mkl dnn. [n.d.]. https: \/\/github.com\/intel\/mkl-dnn  Inter(R) MKL-DNN https:\/\/github.com\/intel\/mkl dnn. [n.d.]. https: \/\/github.com\/intel\/mkl-dnn"},{"key":"e_1_3_2_1_21_1","unstructured":"Torch\/CUNN https:\/\/github.com\/torch\/cunn. [n.d.]. https:\/\/github. com\/torch\/cunn  Torch\/CUNN https:\/\/github.com\/torch\/cunn. [n.d.]. https:\/\/github. com\/torch\/cunn"},{"key":"e_1_3_2_1_22_1","unstructured":"Intel(R) MKL https:\/\/software.intel.com\/en us\/mkl. [n.d.]. https: \/\/software.intel.com\/en-us\/mkl  Intel(R) MKL https:\/\/software.intel.com\/en us\/mkl. [n.d.]. https: \/\/software.intel.com\/en-us\/mkl"},{"key":"e_1_3_2_1_23_1","volume-title":"Caffe: Convolutional Architecture for Fast Feature Embedding. arXiv preprint arXiv:1408.5093","author":"Jia Yangqing","year":"2014"},{"key":"e_1_3_2_1_24_1","volume-title":"Proceedings of the ACM\/IEEE Conference on High Performance Networking and Computing, SC 2007","author":"Kim DaeGon","year":"2007"},{"key":"e_1_3_2_1_25_1","volume-title":"Convolutional Neural Networks for Sentence Classification. CoRR abs\/1408.5882","author":"Kim Yoon","year":"2014"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1126\/science.220.4598.671"},{"key":"e_1_3_2_1_27_1","volume-title":"Amarasinghe","author":"Kjolstad Fredrik","year":"2017"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289602.3293910"},{"key":"e_1_3_2_1_29_1","volume-title":"Fast Algorithms for Convolutional Neural Networks. CoRR abs\/1509.09308","author":"Lavin Andrew","year":"2015"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1989.1.4.541"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3293883.3295734"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2018.2840686"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3195970.3196120"},{"key":"e_1_3_2_1_34_1","volume-title":"Fast Training of Convolutional Networks through FFTs. CoRR abs\/1312.5851","author":"Mathieu Micha\u00ebl","year":"2013"},{"key":"e_1_3_2_1_35_1","volume-title":"Johannes de Fine Licht, and Torsten Hoefler","author":"Matteis Tiziano De","year":"2019"},{"key":"e_1_3_2_1_36_1","volume-title":"Human-level control through deep reinforcement learning. Nature 518, 7540","author":"Mnih Volodymyr","year":"2015"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/2897824.2925952"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/2694344.2694364"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSS.2017.2732685"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/1365490.1365500"},{"key":"e_1_3_2_1_41_1","unstructured":"NVIDIA(R). [n.d.]. CUBLAS Library https:\/\/www.nvidia.com\/. https:\/\/developer.download.nvidia.cn\/compute\/DevZone\/docs\/html\/ CUDALibraries\/doc\/CUBLAS_Library.pdf  NVIDIA(R). [n.d.]. CUBLAS Library https:\/\/www.nvidia.com\/. https:\/\/developer.download.nvidia.cn\/compute\/DevZone\/docs\/html\/ CUDALibraries\/doc\/CUBLAS_Library.pdf"},{"key":"e_1_3_2_1_42_1","volume-title":"Sidiropoulos","author":"Papalexakis Evangelos E.","year":"2017"},{"key":"e_1_3_2_1_43_1","volume-title":"High-Performance Deep Learning Library. In Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019","author":"Paszke Adam","year":"2019"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/2491956.2462176"},{"key":"e_1_3_2_1_45_1","volume-title":"Real-Time Object Detection. In 2016 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2016","author":"Redmon Joseph","year":"2016"},{"key":"e_1_3_2_1_46_1","volume-title":"Relay: A High-Level IR for Deep Learning. CoRR abs\/1904.08368","author":"Roesch Jared","year":"2019"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00474"},{"key":"e_1_3_2_1_48_1","volume-title":"2nd International Conference on Learning Representations, ICLR 2014, Banff, AB, Canada, April 14--16, 2014, Conference Track Proceedings. http: \/\/arxiv.org\/abs\/1312","author":"Sermanet Pierre","year":"2014"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2014.6853196"},{"key":"e_1_3_2_1_50_1","volume-title":"Barto","author":"Sutton Richard S.","year":"1998"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"e_1_3_2_1_52_1","volume-title":"3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7--9, 2015, Conference Track Proceedings. http:\/\/arxiv.org\/abs\/1412","author":"Vasilache Nicolas","year":"2015"},{"key":"e_1_3_2_1_53_1","volume-title":"Tensor Comprehensions: Framework- Agnostic High-Performance Machine Learning Abstractions. CoRR abs\/1802.04730","author":"Vasilache Nicolas","year":"2018"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342019866247"},{"key":"e_1_3_2_1_55_1","volume-title":"Albert Cohen, Jos\u00e9 Ignacio G\u00f3mez, Christian Tenllado, and Francky Catthoor.","author":"Verdoolaege Sven","year":"2013"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3174243.3174253"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"e_1_3_2_1_58_1","volume-title":"Encyclopedia of Parallel Computing. 95--101. https: \/\/doi.org\/10.1007\/978-0--387-09766--4_85","author":"Whaley R. Clint"},{"key":"e_1_3_2_1_59_1","volume-title":"2018 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2018","author":"Yue Xiangyu","year":"2018"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3061639.3062244"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307681.3325407"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/2830772.2830813"},{"key":"e_1_3_2_1_63_1","volume-title":"Proceedings, Part XIV. 3--19","author":"Yan Zhaoyi","year":"2018"},{"key":"e_1_3_2_1_64_1","volume-title":"ADADELTA: An Adaptive Learning Rate Method. CoRR abs\/1212.5701","author":"Zeiler Matthew D.","year":"2012"},{"key":"e_1_3_2_1_65_1","volume-title":"Optimizing FPGA-based Accelerator Design for Deep Convolutional Neural Networks (FPGA '15)","author":"Zhang Chen","year":"2015"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3020078.3021698"}],"event":{"name":"ASPLOS '20: Architectural Support for Programming Languages and Operating Systems","location":"Lausanne Switzerland","acronym":"ASPLOS '20","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGOPS ACM Special Interest Group on Operating Systems","SIGARCH ACM Special Interest Group on Computer Architecture","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the Twenty-Fifth International Conference on Architectural Support for Programming Languages and Operating Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3373376.3378508","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3373376.3378508","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:38:16Z","timestamp":1750199896000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3373376.3378508"}},"subtitle":["An Automatic Schedule Exploration and Optimization Framework for Tensor Computation on Heterogeneous System"],"short-title":[],"issued":{"date-parts":[[2020,3,9]]},"references-count":66,"alternative-id":["10.1145\/3373376.3378508","10.1145\/3373376"],"URL":"https:\/\/doi.org\/10.1145\/3373376.3378508","relation":{},"subject":[],"published":{"date-parts":[[2020,3,9]]},"assertion":[{"value":"2020-03-13","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}