{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,4]],"date-time":"2026-02-04T17:05:56Z","timestamp":1770224756634,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,6,18]],"date-time":"2021-06-18T00:00:00Z","timestamp":1623974400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Institute for Information & communications Technology Promotion","award":["2018-0-00581"],"award-info":[{"award-number":["2018-0-00581"]}]},{"name":"National Research Foundation of Korea","award":["2016M3C4A7952587, 2019M3E4A1080386"],"award-info":[{"award-number":["2016M3C4A7952587, 2019M3E4A1080386"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,6,19]]},"DOI":"10.1145\/3453483.3454038","type":"proceedings-article","created":{"date-parts":[[2021,6,24]],"date-time":"2021-06-24T16:58:48Z","timestamp":1624553928000},"page":"190-205","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":25,"title":["DeepCuts: a deep learning optimization framework for versatile GPU workloads"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6750-6979","authenticated-orcid":false,"given":"Wookeun","family":"Jung","sequence":"first","affiliation":[{"name":"Seoul National University, South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9897-2769","authenticated-orcid":false,"given":"Thanh Tuan","family":"Dao","sequence":"additional","affiliation":[{"name":"Seoul National University, South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4638-8170","authenticated-orcid":false,"given":"Jaejin","family":"Lee","sequence":"additional","affiliation":[{"name":"Seoul National University, South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2021,6,18]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Accelerating Inference In TF-TRT User Guide. https:\/\/docs.nvidia.com\/deeplearning\/frameworks\/tf-trt-user-guide\/index.html Accessed: 2020-04-14.  Accelerating Inference In TF-TRT User Guide. https:\/\/docs.nvidia.com\/deeplearning\/frameworks\/tf-trt-user-guide\/index.html Accessed: 2020-04-14."},{"key":"e_1_3_2_1_2_1","unstructured":"BERT Example using the TensorRT C++ API. https:\/\/github.com\/NVIDIA\/TensorRT\/tree\/release\/5.1\/demo\/BERT\/ Accessed: 2020-04-14.  BERT Example using the TensorRT C++ API. https:\/\/github.com\/NVIDIA\/TensorRT\/tree\/release\/5.1\/demo\/BERT\/ Accessed: 2020-04-14."},{"key":"e_1_3_2_1_3_1","unstructured":"BERT For TensorFlow. https:\/\/github.com\/NVIDIA\/DeepLearningExamples\/tree\/master\/TensorFlow\/LanguageModeling\/BERT Accessed: 2020-04-17.  BERT For TensorFlow. https:\/\/github.com\/NVIDIA\/DeepLearningExamples\/tree\/master\/TensorFlow\/LanguageModeling\/BERT Accessed: 2020-04-17."},{"key":"e_1_3_2_1_4_1","unstructured":"The CMU Audio Databases. http:\/\/www.speech.cs.cmu.edu\/databases\/an4\/index.html Accessed: 2019-4-15.  The CMU Audio Databases. http:\/\/www.speech.cs.cmu.edu\/databases\/an4\/index.html Accessed: 2019-4-15."},{"key":"e_1_3_2_1_5_1","unstructured":"Display Advertising Challenge. https:\/\/www.kaggle.com\/c\/criteo-display-ad-challenge\/overview Accessed: 2019-4-15.  Display Advertising Challenge. https:\/\/www.kaggle.com\/c\/criteo-display-ad-challenge\/overview Accessed: 2019-4-15."},{"key":"e_1_3_2_1_6_1","unstructured":"EMNLP 2017 SECOND CONFERENCE ON MACHINE TRANSLATION (WMT17). http:\/\/www.statmt.org\/wmt17\/translation-task.html Accessed: 2019-4-15.  EMNLP 2017 SECOND CONFERENCE ON MACHINE TRANSLATION (WMT17). http:\/\/www.statmt.org\/wmt17\/translation-task.html Accessed: 2019-4-15."},{"key":"e_1_3_2_1_7_1","unstructured":"TensorFlow implementation of DeepSpeech2. https:\/\/github.com\/yao-matrix\/deepSpeech2 Accessed: 2019-11-21.  TensorFlow implementation of DeepSpeech2. https:\/\/github.com\/yao-matrix\/deepSpeech2 Accessed: 2019-11-21."},{"key":"e_1_3_2_1_8_1","unstructured":"tf_cnn_benchmarks: High performance benchmarks. https:\/\/github.com\/tensorflow\/benchmarks\/tree\/master\/scripts\/tf_cnn_benchmarks Accessed: 2019-11-21.  tf_cnn_benchmarks: High performance benchmarks. https:\/\/github.com\/tensorflow\/benchmarks\/tree\/master\/scripts\/tf_cnn_benchmarks Accessed: 2019-11-21."},{"key":"e_1_3_2_1_9_1","volume-title":"12th USENIX Symposium on Operating Systems Design and Implementation (OSDI 16)","author":"Abadi Mart\u00edn","year":"2016","unstructured":"Mart\u00edn Abadi , Paul Barham , Jianmin Chen , Zhifeng Chen , Andy Davis , Jeffrey Dean , Matthieu Devin , Sanjay Ghemawat , Geoffrey Irving , and Michael Isard . 2016 . Tensorflow: A system for large-scale machine learning . In 12th USENIX Symposium on Operating Systems Design and Implementation (OSDI 16) . 265\u2013283. https:\/\/doi.org\/10.5555\/3026877.3026899 Mart\u00edn Abadi, Paul Barham, Jianmin Chen, Zhifeng Chen, Andy Davis, Jeffrey Dean, Matthieu Devin, Sanjay Ghemawat, Geoffrey Irving, and Michael Isard. 2016. Tensorflow: A system for large-scale machine learning. In 12th USENIX Symposium on Operating Systems Design and Implementation (OSDI 16). 265\u2013283. https:\/\/doi.org\/10.5555\/3026877.3026899"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783725"},{"key":"e_1_3_2_1_11_1","volume-title":"International conference on machine learning. 173\u2013182","author":"Amodei Dario","year":"2016","unstructured":"Dario Amodei , Sundaram Ananthanarayanan , Rishita Anubhai , Jingliang Bai , Eric Battenberg , Carl Case , Jared Casper , Bryan Catanzaro , Qiang Cheng , and Guoliang Chen . 2016 . Deep speech 2: End-to-end speech recognition in english and mandarin . In International conference on machine learning. 173\u2013182 . https:\/\/doi.org\/10.5555\/3045390.3045410 Dario Amodei, Sundaram Ananthanarayanan, Rishita Anubhai, Jingliang Bai, Eric Battenberg, Carl Case, Jared Casper, Bryan Catanzaro, Qiang Cheng, and Guoliang Chen. 2016. Deep speech 2: End-to-end speech recognition in english and mandarin. In International conference on machine learning. 173\u2013182. https:\/\/doi.org\/10.5555\/3045390.3045410"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/2628071.2628092"},{"key":"e_1_3_2_1_13_1","volume-title":"MXNet: A Flexible and Efficient Machine Learning Library for Heterogeneous Distributed Systems. CoRR, abs\/1512.01274","author":"Chen Tianqi","year":"2015","unstructured":"Tianqi Chen , Mu Li , Yutian Li , Min Lin , Naiyan Wang , Minjie Wang , Tianjun Xiao , Bing Xu , Chiyuan Zhang , and Zheng Zhang . 2015. MXNet: A Flexible and Efficient Machine Learning Library for Heterogeneous Distributed Systems. CoRR, abs\/1512.01274 ( 2015 ), arxiv:1512.01274. arxiv:1512.01274 Tianqi Chen, Mu Li, Yutian Li, Min Lin, Naiyan Wang, Minjie Wang, Tianjun Xiao, Bing Xu, Chiyuan Zhang, and Zheng Zhang. 2015. MXNet: A Flexible and Efficient Machine Learning Library for Heterogeneous Distributed Systems. CoRR, abs\/1512.01274 (2015), arxiv:1512.01274. arxiv:1512.01274"},{"key":"e_1_3_2_1_14_1","volume-title":"13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen , Thierry Moreau , Ziheng Jiang , Lianmin Zheng , Eddie Yan , Haichen Shen , Meghan Cowan , Leyuan Wang , Yuwei Hu , and Luis Ceze . 2018 . TVM: An automated end-to-end optimizing compiler for deep learning . In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18) . 578\u2013594. https:\/\/doi.org\/10.5555\/3291168.3291211 Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, and Luis Ceze. 2018. TVM: An automated end-to-end optimizing compiler for deep learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18). 578\u2013594. https:\/\/doi.org\/10.5555\/3291168.3291211"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00353"},{"key":"e_1_3_2_1_16_1","volume-title":"Professional Cuda C Programming","author":"Cheng John","unstructured":"John Cheng , Max Grossman , and Ty McKercher . 2014. Professional Cuda C Programming . John Wiley & Sons . https:\/\/doi.org\/10.5555\/2935593 John Cheng, Max Grossman, and Ty McKercher. 2014. Professional Cuda C Programming. John Wiley & Sons. https:\/\/doi.org\/10.5555\/2935593"},{"key":"e_1_3_2_1_17_1","volume-title":"cuDNN: Efficient Primitives for Deep Learning. CoRR, abs\/1410.0759","author":"Chetlur Sharan","year":"2014","unstructured":"Sharan Chetlur , Cliff Woolley , Philippe Vandermersch , Jonathan Cohen , John Tran , Bryan Catanzaro , and Evan Shelhamer . 2014. cuDNN: Efficient Primitives for Deep Learning. CoRR, abs\/1410.0759 ( 2014 ), arxiv:1410.0759. arxiv:1410.0759 Sharan Chetlur, Cliff Woolley, Philippe Vandermersch, Jonathan Cohen, John Tran, Bryan Catanzaro, and Evan Shelhamer. 2014. cuDNN: Efficient Primitives for Deep Learning. CoRR, abs\/1410.0759 (2014), arxiv:1410.0759. arxiv:1410.0759"},{"key":"e_1_3_2_1_18_1","unstructured":"Soumith Chintala. ImageNet training in PyTorch. https:\/\/github.com\/pytorch\/examples\/tree\/master\/imagenet Accessed: 2019-11-21.  Soumith Chintala. ImageNet training in PyTorch. https:\/\/github.com\/pytorch\/examples\/tree\/master\/imagenet Accessed: 2019-11-21."},{"key":"e_1_3_2_1_19_1","volume-title":"Yixing Lao, Christopher R. Lishka, Jaikrishnan Menon, Jennifer Myers, Sandeep Aswath Narayana, Adam Procter, and Tristan J. Webb.","author":"Cyphers Scott","year":"2018","unstructured":"Scott Cyphers , Arjun K. Bansal , Anahita Bhiwandiwalla , Jayaram Bobba , Matthew Brookhart , Avijit Chakraborty , William Constable , Christian Convey , Leona Cook , Omar Kanawi , Robert Kimball , Jason Knight , Nikolay Korovaiko , Varun Kumar Vijay , Yixing Lao, Christopher R. Lishka, Jaikrishnan Menon, Jennifer Myers, Sandeep Aswath Narayana, Adam Procter, and Tristan J. Webb. 2018 . Intel nGraph: An Intermediate Representation, Compiler, and Executor for Deep Learning. CoRR , abs\/1801.08058 (2018), arxiv:1801.08058. arxiv:1801.08058 Scott Cyphers, Arjun K. Bansal, Anahita Bhiwandiwalla, Jayaram Bobba, Matthew Brookhart, Avijit Chakraborty, William Constable, Christian Convey, Leona Cook, Omar Kanawi, Robert Kimball, Jason Knight, Nikolay Korovaiko, Varun Kumar Vijay, Yixing Lao, Christopher R. Lishka, Jaikrishnan Menon, Jennifer Myers, Sandeep Aswath Narayana, Adam Procter, and Tristan J. Webb. 2018. Intel nGraph: An Intermediate Representation, Compiler, and Executor for Deep Learning. CoRR, abs\/1801.08058 (2018), arxiv:1801.08058. arxiv:1801.08058"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_21_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. CoRR, abs\/1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin , Ming-Wei Chang , Kenton Lee , and Kristina Toutanova . 2018 . BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. CoRR, abs\/1810.04805 (2018), arxiv:1810.04805. arxiv:1810.04805 Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. CoRR, abs\/1810.04805 (2018), arxiv:1810.04805. arxiv:1810.04805"},{"key":"e_1_3_2_1_22_1","unstructured":"Facebook. An implementation of a deep learning recommendation model (DLRM). https:\/\/github.com\/facebookresearch\/dlrm Accessed: 2019-11-21.  Facebook. An implementation of a deep learning recommendation model (DLRM). https:\/\/github.com\/facebookresearch\/dlrm Accessed: 2019-11-21."},{"key":"e_1_3_2_1_23_1","volume-title":"Deep Residual Learning for Image Recognition. CoRR, abs\/1512.03385","author":"He Kaiming","year":"2015","unstructured":"Kaiming He , Xiangyu Zhang , Shaoqing Ren , and Jian Sun . 2015. Deep Residual Learning for Image Recognition. CoRR, abs\/1512.03385 ( 2015 ), arxiv:1512.03385. arxiv:1512.03385 Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. 2015. Deep Residual Learning for Image Recognition. CoRR, abs\/1512.03385 (2015), arxiv:1512.03385. arxiv:1512.03385"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359630"},{"key":"e_1_3_2_1_27_1","unstructured":"Wonkyung Jung Daejin Jung Sunjung Lee Wonjong Rhee and Jung Ho Ahn. 2018. Restructuring batch normalization to accelerate CNN training. arXiv preprint arXiv:1807.01702.  Wonkyung Jung Daejin Jung Sunjung Lee Wonjong Rhee and Jung Ho Ahn. 2018. Restructuring batch normalization to accelerate CNN training. arXiv preprint arXiv:1807.01702."},{"key":"e_1_3_2_1_28_1","volume-title":"XLA: TensorFlow, compiled. https:\/\/www.youtube.com\/watch?v=kAOanJczHA0 TensorFlow Dev Summit, Accessed: 2021-05-10.","author":"Leary Chris","year":"2017","unstructured":"Chris Leary and Todd Wang . 2017 . XLA: TensorFlow, compiled. https:\/\/www.youtube.com\/watch?v=kAOanJczHA0 TensorFlow Dev Summit, Accessed: 2021-05-10. Chris Leary and Todd Wang. 2017. XLA: TensorFlow, compiled. https:\/\/www.youtube.com\/watch?v=kAOanJczHA0 TensorFlow Dev Summit, Accessed: 2021-05-10."},{"key":"e_1_3_2_1_29_1","volume-title":"Automatic Horizontal Fusion for GPU Kernels. CoRR, abs\/2007.01277","author":"Li Ao","year":"2020","unstructured":"Ao Li , Bojian Zheng , Gennady Pekhimenko , and Fan Long . 2020. Automatic Horizontal Fusion for GPU Kernels. CoRR, abs\/2007.01277 ( 2020 ), arxiv:2007.01277. arxiv:2007.01277 Ao Li, Bojian Zheng, Gennady Pekhimenko, and Fan Long. 2020. Automatic Horizontal Fusion for GPU Kernels. CoRR, abs\/2007.01277 (2020), arxiv:2007.01277. arxiv:2007.01277"},{"key":"e_1_3_2_1_30_1","volume-title":"Manning","author":"Luong Minh-Thang","year":"2015","unstructured":"Minh-Thang Luong , Hieu Pham , and Christopher D . Manning . 2015 . Effective Approaches to Attention-based Neural Machine Translation. CoRR , abs\/1508.04025 (2015), arxiv:1508.04025. arxiv:1508.04025 Minh-Thang Luong, Hieu Pham, and Christopher D. Manning. 2015. Effective Approaches to Attention-based Neural Machine Translation. CoRR, abs\/1508.04025 (2015), arxiv:1508.04025. arxiv:1508.04025"},{"key":"e_1_3_2_1_31_1","unstructured":"Maxim Naumov Dheevatsa Mudigere Hao-Jun Michael Shi Jianyu Huang Narayanan Sundaraman Jongsoo Park Xiaodong Wang Udit Gupta Carole-Jean Wu Alisson G. Azzolini Dmytro Dzhulgakov Andrey Mallevich Ilia Cherniavskii Yinghai Lu Raghuraman Krishnamoorthi Ansha Yu Volodymyr Kondratenko Stephanie Pereira Xianjie Chen Wenlin Chen Vijay Rao Bill Jia Liang Xiong and Misha Smelyanskiy. 2019. Deep Learning Recommendation Model for Personalization and Recommendation Systems. CoRR abs\/1906.00091 (2019) arxiv:1906.00091. arxiv:1906.00091  Maxim Naumov Dheevatsa Mudigere Hao-Jun Michael Shi Jianyu Huang Narayanan Sundaraman Jongsoo Park Xiaodong Wang Udit Gupta Carole-Jean Wu Alisson G. Azzolini Dmytro Dzhulgakov Andrey Mallevich Ilia Cherniavskii Yinghai Lu Raghuraman Krishnamoorthi Ansha Yu Volodymyr Kondratenko Stephanie Pereira Xianjie Chen Wenlin Chen Vijay Rao Bill Jia Liang Xiong and Misha Smelyanskiy. 2019. Deep Learning Recommendation Model for Personalization and Recommendation Systems. CoRR abs\/1906.00091 (2019) arxiv:1906.00091. arxiv:1906.00091"},{"key":"e_1_3_2_1_32_1","unstructured":"NVIDIA. NVIDIA TensorRT. https:\/\/developer.nvidia.com\/tensorrt Accessed: 2020-05-11.  NVIDIA. NVIDIA TensorRT. https:\/\/developer.nvidia.com\/tensorrt Accessed: 2020-05-11."},{"key":"e_1_3_2_1_33_1","unstructured":"NVIDIA. NVIDIA visual profiler. https:\/\/developer.nvidia.com\/nvidia-visual-profiler  NVIDIA. NVIDIA visual profiler. https:\/\/developer.nvidia.com\/nvidia-visual-profiler"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-4009"},{"key":"e_1_3_2_1_35_1","volume-title":"PyTorch: An Imperative Style","author":"Paszke Adam","year":"1912","unstructured":"Adam Paszke , Sam Gross , Francisco Massa , Adam Lerer , James Bradbury , Gregory Chanan , Trevor Killeen , Zeming Lin , Natalia Gimelshein , Luca Antiga , Alban Desmaison , Andreas K\u00f6pf , Edward Yang , Zach DeVito , Martin Raison , Alykhan Tejani , Sasank Chilamkurthy , Benoit Steiner , Lu Fang , Junjie Bai , and Soumith Chintala . 2019. PyTorch: An Imperative Style , High-Performance Deep Learning Library . CoRR, abs\/ 1912 .01703 (2019), arxiv:1912.01703. arxiv:1912.01703 Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas K\u00f6pf, Edward Yang, Zach DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. CoRR, abs\/1912.01703 (2019), arxiv:1912.01703. arxiv:1912.01703"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3330345.3330377"},{"key":"e_1_3_2_1_37_1","volume-title":"100, 000+ Questions for Machine Comprehension of Text. CoRR, abs\/1606.05250","author":"Rajpurkar Pranav","year":"2016","unstructured":"Pranav Rajpurkar , Jian Zhang , Konstantin Lopyrev , and Percy Liang . 2016. SQuAD : 100, 000+ Questions for Machine Comprehension of Text. CoRR, abs\/1606.05250 ( 2016 ), arxiv:1606.05250. arxiv:1606.05250 Pranav Rajpurkar, Jian Zhang, Konstantin Lopyrev, and Percy Liang. 2016. SQuAD: 100, 000+ Questions for Machine Comprehension of Text. CoRR, abs\/1606.05250 (2016), arxiv:1606.05250. arxiv:1606.05250"},{"key":"e_1_3_2_1_38_1","volume-title":"Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation. CoRR, abs\/1801.04381","author":"Sandler Mark","year":"2018","unstructured":"Mark Sandler , Andrew G. Howard , Menglong Zhu , Andrey Zhmoginov , and Liang-Chieh Chen . 2018. Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation. CoRR, abs\/1801.04381 ( 2018 ), arxiv:1801.04381. arxiv:1801.04381 Mark Sandler, Andrew G. Howard, Menglong Zhu, Andrey Zhmoginov, and Liang-Chieh Chen. 2018. Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation. CoRR, abs\/1801.04381 (2018), arxiv:1801.04381. arxiv:1801.04381"},{"key":"e_1_3_2_1_39_1","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very Deep Convolutional Networks for Large-Scale Image Recognition. abs\/1409.1556 arxiv:1409.1556. arxiv:1409.1556  Karen Simonyan and Andrew Zisserman. 2014. Very Deep Convolutional Networks for Large-Scale Image Recognition. abs\/1409.1556 arxiv:1409.1556. arxiv:1409.1556"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2018.08.004"},{"key":"e_1_3_2_1_42_1","volume-title":"Tensor Comprehensions: Framework-Agnostic High-Performance Machine Learning Abstractions. CoRR, abs\/1802.04730","author":"Vasilache Nicolas","year":"2018","unstructured":"Nicolas Vasilache , Oleksandr Zinenko , Theodoros Theodoridis , Priya Goyal , Zachary DeVito , William S. Moses , Sven Verdoolaege , Andrew Adams , and Albert Cohen . 2018 . Tensor Comprehensions: Framework-Agnostic High-Performance Machine Learning Abstractions. CoRR, abs\/1802.04730 (2018), arxiv:1802.04730. arxiv:1802.04730 Nicolas Vasilache, Oleksandr Zinenko, Theodoros Theodoridis, Priya Goyal, Zachary DeVito, William S. Moses, Sven Verdoolaege, Andrew Adams, and Albert Cohen. 2018. Tensor Comprehensions: Framework-Agnostic High-Performance Machine Learning Abstractions. CoRR, abs\/1802.04730 (2018), arxiv:1802.04730. arxiv:1802.04730"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-06486-4_7"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.1998.10004"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00951"},{"key":"e_1_3_2_1_47_1","unstructured":"Lianmin Zheng and Eddie Yan. Auto-tuning a convolutional network for NVIDIA GPU. https:\/\/docs.tvm.ai\/tutorials\/autotvm\/tune_relay_cuda.html Accessed: 2019-11-21.  Lianmin Zheng and Eddie Yan. Auto-tuning a convolutional network for NVIDIA GPU. https:\/\/docs.tvm.ai\/tutorials\/autotvm\/tune_relay_cuda.html Accessed: 2019-11-21."}],"event":{"name":"PLDI '21: 42nd ACM SIGPLAN International Conference on Programming Language Design and Implementation","location":"Virtual Canada","acronym":"PLDI '21","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages"]},"container-title":["Proceedings of the 42nd ACM SIGPLAN International Conference on Programming Language Design and Implementation"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3453483.3454038","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3453483.3454038","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:03:07Z","timestamp":1750197787000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3453483.3454038"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6,18]]},"references-count":47,"alternative-id":["10.1145\/3453483.3454038","10.1145\/3453483"],"URL":"https:\/\/doi.org\/10.1145\/3453483.3454038","relation":{},"subject":[],"published":{"date-parts":[[2021,6,18]]},"assertion":[{"value":"2021-06-18","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}