{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,22]],"date-time":"2026-03-22T05:34:54Z","timestamp":1774157694069,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,10,27]],"date-time":"2019-10-27T00:00:00Z","timestamp":1572134400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000185","name":"Defense Advanced Research Projects Agency","doi-asserted-by":"publisher","award":["FA84750-14-2-0006"],"award-info":[{"award-number":["FA84750-14-2-0006"]}],"id":[{"id":"10.13039\/100000185","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000015","name":"U.S. Department of Energy","doi-asserted-by":"publisher","award":["17-SC-20- SC"],"award-info":[{"award-number":["17-SC-20- SC"]}],"id":[{"id":"10.13039\/100000015","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-1409813,CNS-1651570"],"award-info":[{"award-number":["CCF-1409813,CNS-1651570"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006168","name":"National Nuclear Security Administration","doi-asserted-by":"publisher","award":["17-SC-20- SC"],"award-info":[{"award-number":["17-SC-20- SC"]}],"id":[{"id":"10.13039\/100006168","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,10,27]]},"DOI":"10.1145\/3341301.3359630","type":"proceedings-article","created":{"date-parts":[[2019,10,21]],"date-time":"2019-10-21T13:34:22Z","timestamp":1571664862000},"page":"47-62","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":208,"title":["TASO"],"prefix":"10.1145","author":[{"given":"Zhihao","family":"Jia","sequence":"first","affiliation":[{"name":"Stanford University"}]},{"given":"Oded","family":"Padon","sequence":"additional","affiliation":[{"name":"Stanford University"}]},{"given":"James","family":"Thomas","sequence":"additional","affiliation":[{"name":"Stanford University"}]},{"given":"Todd","family":"Warszawski","sequence":"additional","affiliation":[{"name":"Stanford University"}]},{"given":"Matei","family":"Zaharia","sequence":"additional","affiliation":[{"name":"Stanford University"}]},{"given":"Alex","family":"Aiken","sequence":"additional","affiliation":[{"name":"Stanford University"}]}],"member":"320","published-online":{"date-parts":[[2019,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2017. Amazon EC2 P3 Instances. https:\/\/aws.amazon.com\/ec2\/instance-types\/p3\/.  2017. Amazon EC2 P3 Instances. https:\/\/aws.amazon.com\/ec2\/instance-types\/p3\/."},{"key":"e_1_3_2_1_2_1","unstructured":"2017. Tensorflow graph transform creates corrupted graph. https:\/\/github.com\/tensorflow\/tensorflow\/issues\/7523.  2017. Tensorflow graph transform creates corrupted graph. https:\/\/github.com\/tensorflow\/tensorflow\/issues\/7523."},{"key":"e_1_3_2_1_3_1","unstructured":"2017. XLA: Optimizing Compiler for TensorFlow. https:\/\/www.tensorflow.org\/xla.  2017. XLA: Optimizing Compiler for TensorFlow. https:\/\/www.tensorflow.org\/xla."},{"key":"e_1_3_2_1_4_1","unstructured":"2018. Graph transform: fold constant with invalid graph. https:\/\/github.com\/tensorflow\/tensorflow\/issues\/16545.  2018. Graph transform: fold constant with invalid graph. https:\/\/github.com\/tensorflow\/tensorflow\/issues\/16545."},{"key":"e_1_3_2_1_5_1","unstructured":"2018. Tensor Cores in NVIDIA Volta Architecture. https:\/\/www.nvidia.com\/en-us\/data-center\/tensorcore\/.  2018. Tensor Cores in NVIDIA Volta Architecture. https:\/\/www.nvidia.com\/en-us\/data-center\/tensorcore\/."},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the 12th USENIX Conference on Operating Systems Design and Implementation (OSDI).","author":"Abadi Mart\u00edn","year":"2016"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/1168857.1168906"},{"key":"e_1_3_2_1_8_1","volume-title":"TVM: End-to-End Optimization Stack for Deep Learning. CoRR abs\/1802.04799","author":"Chen Tianqi","year":"2018"},{"key":"e_1_3_2_1_9_1","unstructured":"Tianqi Chen Lianmin Zheng Eddie Yan Ziheng Jiang Thierry Moreau Luis Ceze Carlos Guestrin and Arvind Krishnamurthy. 2018. Learning to Optimize Tensor Programs. In Advances in Neural Information Processing Systems 31.  Tianqi Chen Lianmin Zheng Eddie Yan Ziheng Jiang Thierry Moreau Luis Ceze Carlos Guestrin and Arvind Krishnamurthy. 2018. Learning to Optimize Tensor Programs. In Advances in Neural Information Processing Systems 31."},{"key":"e_1_3_2_1_10_1","unstructured":"Sharan Chetlur Cliff Woolley Philippe Vandermersch Jonathan Cohen John Tran Bryan Catanzaro and Evan Shelhamer. 2014. cuDNN: Efficient Primitives for Deep Learning. CoRR abs\/1410.0759 (2014). http:\/\/arxiv.org\/abs\/1410.0759  Sharan Chetlur Cliff Woolley Philippe Vandermersch Jonathan Cohen John Tran Bryan Catanzaro and Evan Shelhamer. 2014. cuDNN: Efficient Primitives for Deep Learning. CoRR abs\/1410.0759 (2014). http:\/\/arxiv.org\/abs\/1410.0759"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3314221.3314596"},{"key":"e_1_3_2_1_12_1","unstructured":"cuBLAS 2016. Dense Linear Algebra on GPUs. https:\/\/developer.nvidia.com\/cublas.  cuBLAS 2016. Dense Linear Algebra on GPUs. https:\/\/developer.nvidia.com\/cublas."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"Manjeet Dahiya and Sorav Bansal. 2017. Black-Box Equivalence Checking Across Compiler Optimizations. In Programming Languages and Systems Bor-Yuh Evan Chang (Ed.). Springer International Publishing Cham.  Manjeet Dahiya and Sorav Bansal. 2017. Black-Box Equivalence Checking Across Compiler Optimizations. In Programming Languages and Systems Bor-Yuh Evan Chang (Ed.). Springer International Publishing Cham.","DOI":"10.1007\/978-3-319-71237-6_7"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.5555\/1792734.1792766"},{"key":"e_1_3_2_1_15_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. CoRR abs\/1810.04805","author":"Devlin Jacob","year":"2018"},{"key":"e_1_3_2_1_16_1","unstructured":"Vincent Dumoulin and Francesco Visin. 2016. A guide to convolution arithmetic for deep learning. CoRR (2016).  Vincent Dumoulin and Francesco Visin. 2016. A guide to convolution arithmetic for deep learning. CoRR (2016)."},{"key":"e_1_3_2_1_17_1","volume-title":"Proceedings of the 30th ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages (POPL '03)","author":"Gulwani Sumit"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_19_1","unstructured":"Andrew G. Howard Menglong Zhu Bo Chen Dmitry Kalenichenko Weijun Wang Tobias Weyand Marco Andreetto and Hartwig Adam. 2017. MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications. CoRR abs\/1704.04861 (2017).  Andrew G. Howard Menglong Zhu Bo Chen Dmitry Kalenichenko Weijun Wang Tobias Weyand Marco Andreetto and Hartwig Adam. 2017. MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications. CoRR abs\/1704.04861 (2017)."},{"key":"e_1_3_2_1_20_1","volume-title":"Proceedings of the 35th International Conference on Machine Learning (Proceedings of Machine Learning Research)","volume":"80","author":"Jia Zhihao","year":"2018"},{"key":"e_1_3_2_1_21_1","volume-title":"Proceedings of the 2nd Conference on Systems and Machine Learning (SysML'19)","author":"Jia Zhihao","year":"2019"},{"key":"e_1_3_2_1_22_1","volume-title":"Proceedings of the 2nd Conference on Systems and Machine Learning (SysML'19)","author":"Jia Zhihao","year":"2019"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/2594291.2594334"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.5555\/3014904.3014977"},{"key":"e_1_3_2_1_25_1","volume-title":"ACM SIGARCH Computer Architecture News","volume":"15","author":"Massalin Henry","year":"1987"},{"key":"e_1_3_2_1_26_1","unstructured":"Azalia Mirhoseini Hieu Pham Quoc V Le Benoit Steiner Rasmus Larsen Yuefeng Zhou Naveen Kumar Mohammad Norouzi Samy Bengio and Jeff Dean. 2017. Device Placement Optimization with Reinforcement Learning. (2017).  Azalia Mirhoseini Hieu Pham Quoc V Le Benoit Steiner Rasmus Larsen Yuefeng Zhou Naveen Kumar Mohammad Norouzi Samy Bengio and Jeff Dean. 2017. Device Placement Optimization with Reinforcement Learning. (2017)."},{"key":"e_1_3_2_1_27_1","unstructured":"MKLDNN 2016. Intel Math Kernel Library for Deep Neural Networks. https:\/\/01.org\/mkl-dnn.  MKLDNN 2016. Intel Math Kernel Library for Deep Neural Networks. https:\/\/01.org\/mkl-dnn."},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of the 27th International Conference on International Conference on Machine Learning (ICML'10)","author":"Nair Vinod"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/349299.349314"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1007\/BFb0054170"},{"key":"e_1_3_2_1_31_1","unstructured":"PyTorch 2017. Tensors and Dynamic neural networks in Python with strong GPU acceleration. https:\/\/pytorch.org.  PyTorch 2017. Tensors and Dynamic neural networks in Python with strong GPU acceleration. https:\/\/pytorch.org."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/2509136.2509509"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304072"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.2168\/LMCS-7(1:10)2011"},{"key":"e_1_3_2_1_36_1","unstructured":"TensorRT 2017. NVIDIA TensorRT: Programmable Inference Accelerator. https:\/\/developer.nvidia.com\/tensorrt.  TensorRT 2017. NVIDIA TensorRT: Programmable Inference Accelerator. https:\/\/developer.nvidia.com\/tensorrt."},{"key":"e_1_3_2_1_37_1","volume-title":"Training and Inference with Integers in Deep Neural Networks. In International Conference on Learning Representations.","author":"Wu Shuang","year":"2018"},{"key":"e_1_3_2_1_38_1","unstructured":"Saining Xie Ross B. Girshick Piotr Doll\u00e1r Zhuowen Tu and Kaiming He. 2016. Aggregated Residual Transformations for Deep Neural Networks. CoRR abs\/1611.05431 (2016).  Saining Xie Ross B. Girshick Piotr Doll\u00e1r Zhuowen Tu and Kaiming He. 2016. Aggregated Residual Transformations for Deep Neural Networks. CoRR abs\/1611.05431 (2016)."},{"key":"e_1_3_2_1_39_1","unstructured":"Barret Zoph and Quoc V. Le. 2016. Neural Architecture Search with Reinforcement Learning. CoRR abs\/1611.01578 (2016).  Barret Zoph and Quoc V. Le. 2016. Neural Architecture Search with Reinforcement Learning. CoRR abs\/1611.01578 (2016)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00907"}],"event":{"name":"SOSP '19: ACM SIGOPS 27th Symposium on Operating Systems Principles","location":"Huntsville Ontario Canada","acronym":"SOSP '19","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems","USENIX Assoc USENIX Assoc"]},"container-title":["Proceedings of the 27th ACM Symposium on Operating Systems Principles"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3341301.3359630","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3341301.3359630","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3341301.3359630","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:23:26Z","timestamp":1750202606000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3341301.3359630"}},"subtitle":["optimizing deep learning computation with automatic generation of graph substitutions"],"short-title":[],"issued":{"date-parts":[[2019,10,27]]},"references-count":40,"alternative-id":["10.1145\/3341301.3359630","10.1145\/3341301"],"URL":"https:\/\/doi.org\/10.1145\/3341301.3359630","relation":{},"subject":[],"published":{"date-parts":[[2019,10,27]]},"assertion":[{"value":"2019-10-27","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}