{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,15]],"date-time":"2026-01-15T22:27:40Z","timestamp":1768516060870,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":74,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,22]],"date-time":"2024-04-22T00:00:00Z","timestamp":1713744000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,22]]},"DOI":"10.1145\/3627703.3629572","type":"proceedings-article","created":{"date-parts":[[2024,4,18]],"date-time":"2024-04-18T06:28:28Z","timestamp":1713421708000},"page":"1054-1074","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["CDMPP: A Device-Model Agnostic Framework for Latency Prediction of Tensor Programs"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-5787-5226","authenticated-orcid":false,"given":"Hanpeng","family":"Hu","sequence":"first","affiliation":[{"name":"University of Hong Kong, Hong Kong and ByteDance Inc"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0537-4004","authenticated-orcid":false,"given":"Junwei","family":"Su","sequence":"additional","affiliation":[{"name":"University of Hong Kong, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3376-0607","authenticated-orcid":false,"given":"Juntao","family":"Zhao","sequence":"additional","affiliation":[{"name":"University of Hong Kong, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3989-4358","authenticated-orcid":false,"given":"Yanghua","family":"Peng","sequence":"additional","affiliation":[{"name":"ByteDance Inc., Seattle, Washington, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9113-2660","authenticated-orcid":false,"given":"Yibo","family":"Zhu","sequence":"additional","affiliation":[{"name":"ByteDance Inc., Seattle, Washington, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4879-5335","authenticated-orcid":false,"given":"Haibin","family":"Lin","sequence":"additional","affiliation":[{"name":"ByteDance Inc., Mountain View, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3144-4398","authenticated-orcid":false,"given":"Chuan","family":"Wu","sequence":"additional","affiliation":[{"name":"University of Hong Kong, Hong Kong"}]}],"member":"320","published-online":{"date-parts":[[2024,4,22]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Tensorflow: A System for Large-scale Machine Learning. In Proceedings of the 12th USENIX Symposium on Operating Systems Design and Implementation","author":"Abadi Mart\u00edn","year":"2016","unstructured":"Mart\u00edn Abadi, Paul Barham, Jianmin Chen, Zhifeng Chen, Andy Davis, Jeffrey Dean, Matthieu Devin, Sanjay Ghemawat, Geoffrey Irving, Michael Isard, et al. Tensorflow: A System for Large-scale Machine Learning. In Proceedings of the 12th USENIX Symposium on Operating Systems Design and Implementation, 2016."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3306346.3322967"},{"key":"e_1_3_2_1_3_1","volume-title":"Masanori Koyama. Optuna: A Next-generation Hyperparameter Optimization Framework. In Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining","author":"Akiba Takuya","year":"2019","unstructured":"Takuya Akiba, Shotaro Sano, Toshihiko Yanase, Takeru Ohta, and Masanori Koyama. Optuna: A Next-generation Hyperparameter Optimization Framework. In Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, 2019."},{"key":"e_1_3_2_1_4_1","unstructured":"AMD. AMD EPYC 2019. https:\/\/www.amd.com\/en\/products\/cpu\/amd-epyc-7452."},{"key":"e_1_3_2_1_5_1","volume-title":"Proceedings of Machine Learning and Systems","author":"Baghdadi Riyadh","year":"2021","unstructured":"Riyadh Baghdadi, Massinissa Merouani, Mohamed-Hicham Leghettas, Kamel Abdous, Taha Arbaoui, Karima Benatchba, et al. A deep learning based cost model for automatic code optimization. In Proceedings of Machine Learning and Systems, 2021."},{"key":"e_1_3_2_1_6_1","volume-title":"Automated optimized parameters for T-distributed stochastic neighbor embedding improve visualization and analysis of large datasets. Nature communications","author":"Belkina Anna C","year":"2019","unstructured":"Anna C Belkina, Christopher O Ciccolella, Rina Anno, Richard Halpert, Josef Spidlen, and Jennifer E Snyder-Cappione. Automated optimized parameters for T-distributed stochastic neighbor embedding improve visualization and analysis of large datasets. Nature communications, 2019."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.2517-6161.1964.tb00553.x"},{"key":"e_1_3_2_1_8_1","volume-title":"ECML PKDD Workshop: Languages for Data Mining and Machine Learning","author":"Buitinck Lars","year":"2013","unstructured":"Lars Buitinck, Gilles Louppe, Mathieu Blondel, Fabian Pedregosa, Andreas Mueller, Olivier Grisel, Vlad Niculae, Peter Prettenhofer, Alexandre Gramfort, Jaques Grobler, Robert Layton, Jake VanderPlas, Arnaud Joly, Brian Holt, and Ga\u00ebl Varoquaux. API design for machine learning software: experiences from the scikit-learn project. In ECML PKDD Workshop: Languages for Data Mining and Machine Learning, 2013."},{"key":"e_1_3_2_1_9_1","volume-title":"Proxylessnas: Direct neural architecture search on target task and hardware. arXiv preprint arXiv:1812.00332","author":"Cai Han","year":"2018","unstructured":"Han Cai, Ligeng Zhu, and Song Han. Proxylessnas: Direct neural architecture search on target task and hardware. arXiv preprint arXiv:1812.00332, 2018."},{"key":"e_1_3_2_1_10_1","volume-title":"Xgboost: extreme gradient boosting. R package version 0.4-2","author":"Chen Tianqi","year":"2015","unstructured":"Tianqi Chen, Tong He, Michael Benesty, Vadim Khotilovich, Yuan Tang, Hyunsu Cho, Kailong Chen, et al. Xgboost: extreme gradient boosting. R package version 0.4-2, 2015."},{"key":"e_1_3_2_1_11_1","volume-title":"TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In Proceedings of the 13th USENIX Symposium on Operating Systems Design and Implementation","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, et al. TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In Proceedings of the 13th USENIX Symposium on Operating Systems Design and Implementation, 2018."},{"key":"e_1_3_2_1_12_1","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Lianmin Zheng, Eddie Yan, Ziheng Jiang, Thierry Moreau, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. Learning to optimize tensor programs. In Proceedings of Advances in Neural Information Processing Systems, 2018."},{"key":"e_1_3_2_1_13_1","first-page":"K80","year":"2014","unstructured":"NVIDIA Corporation. NVIDIA K80, 2014. https:\/\/www.nvidia.com\/en-gb\/data-center\/tesla-k80\/.","journal-title":"NVIDIA Corporation. NVIDIA"},{"key":"e_1_3_2_1_14_1","first-page":"P100","year":"2016","unstructured":"NVIDIA Corporation. NVIDIA P100, 2016. https:\/\/www.nvidia.com\/en-sg\/data-center\/tesla-p100\/.","journal-title":"NVIDIA Corporation. NVIDIA"},{"key":"e_1_3_2_1_15_1","first-page":"V100","year":"2017","unstructured":"NVIDIA Corporation. NVIDIA V100, 2017. https:\/\/www.nvidia.com\/en-gb\/data-center\/tesla-v100\/.","journal-title":"NVIDIA Corporation. NVIDIA"},{"key":"e_1_3_2_1_16_1","volume-title":"Corporation. NVIDIA 2080Ti, 2018","author":"NVIDIA","year":"2080","unstructured":"NVIDIA Corporation. NVIDIA 2080Ti, 2018. https:\/\/www.nvidia.cn\/geforce\/graphics-cards\/rtx-2080-ti\/."},{"key":"e_1_3_2_1_17_1","first-page":"T4","year":"2018","unstructured":"NVIDIA Corporation. NVIDIA T4, 2018. https:\/\/www.nvidia.com\/en-in\/data-center\/tesla-t4\/.","journal-title":"NVIDIA Corporation. NVIDIA"},{"key":"e_1_3_2_1_18_1","first-page":"A100","year":"2020","unstructured":"NVIDIA Corporation. NVIDIA A100, 2020. https:\/\/www.nvidia.com\/en-sg\/data-center\/a100\/.","journal-title":"NVIDIA Corporation. NVIDIA"},{"key":"e_1_3_2_1_19_1","volume-title":"NVIDIA Data Center Deep Learning Product Performance","author":"NVIDIA Corporation","year":"2020","unstructured":"NVIDIA Corporation. NVIDIA Data Center Deep Learning Product Performance, 2020. https:\/\/developer.nvidia.com\/deep-learning-performance-training-inference."},{"key":"e_1_3_2_1_20_1","volume-title":"Transformer-xl: Attentive language models beyond a fixed-length context. arXiv preprint arXiv:1901.02860","author":"Dai Zihang","year":"2019","unstructured":"Zihang Dai, Zhilin Yang, Yiming Yang, Jaime Carbonell, Quoc V Le, and Ruslan Salakhutdinov. Transformer-xl: Attentive language models beyond a fixed-length context. arXiv preprint arXiv:1901.02860, 2019."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.5555\/2028916"},{"key":"e_1_3_2_1_22_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805, 2018."},{"key":"e_1_3_2_1_23_1","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"Dudziak Lukasz","year":"2020","unstructured":"Lukasz Dudziak, Thomas Chau, Mohamed Abdelfattah, Royson Lee, Hyeji Kim, and Nicholas Lane. BRP-NAS: Prediction-based NAS using GCNS. In Proceedings of Advances in Neural Information Processing Systems, 2020."},{"key":"e_1_3_2_1_24_1","volume-title":"AWS Graviton Processor","author":"Amazon","year":"2019","unstructured":"Amazon EC2. AWS Graviton Processor, 2019. https:\/\/aws.amazon.com\/cn\/ec2\/graviton\/."},{"key":"e_1_3_2_1_25_1","volume-title":"Gennady Pekhimenko. Habitat: A Runtime-Based Computational Performance Predictor for Deep Neural Network Training. In Proceedings of the 2021 USENIX Annual Technical Conference","author":"Geoffrey X Yu","year":"2021","unstructured":"X Yu Geoffrey, Yubo Gao, Pavel Golikov, and Gennady Pekhimenko. Habitat: A Runtime-Based Computational Performance Predictor for Deep Neural Network Training. In Proceedings of the 2021 USENIX Annual Technical Conference, 2021."},{"key":"e_1_3_2_1_26_1","volume-title":"Tensor Processing Unit","year":"2016","unstructured":"Google. Tensor Processing Unit, 2016. https:\/\/cloud.google.com\/tpu\/docs\/tpus."},{"key":"e_1_3_2_1_27_1","volume-title":"Jian Sun. Deep Residual Learning for Image Recognition. In Proceedings of the IEEE conference on Computer Vision and Pattern Recognition","author":"He Kaiming","year":"2016","unstructured":"Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. Deep Residual Learning for Image Recognition. In Proceedings of the IEEE conference on Computer Vision and Pattern Recognition, 2016."},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of International Conference on Machine Learning","author":"Hsieh Kevin","year":"2020","unstructured":"Kevin Hsieh, Amar Phanishayee, Onur Mutlu, and Phillip Gibbons. The non-iid data quagmire of decentralized machine learning. In Proceedings of International Conference on Machine Learning, 2020."},{"key":"e_1_3_2_1_29_1","volume-title":"Proceedings of Machine Learning and Systems","author":"Hu Hanpeng","year":"2022","unstructured":"Hanpeng Hu, Chenyu Jiang, Yuchen Zhong, Yanghua Peng, Chuan Wu, Yibo Zhu, Haibin Lin, and Chuanxiong Guo. dPRO: A Generic Profiling and Optimization System for Expediting Distributed DNN Training. In Proceedings of Machine Learning and Systems, 2022."},{"key":"e_1_3_2_1_30_1","unstructured":"HUAWEI. HUAWEI Ascend 2018. https:\/\/e.huawei.com\/cn\/products\/servers\/ascend."},{"key":"e_1_3_2_1_31_1","first-page":"v4","volume":"5","year":"2016","unstructured":"Intel. Intel\u00ae Xeon\u00ae Processor E5-2673 v4, 2016. https:\/\/ark.intel.com\/content\/www\/us\/en\/ark\/products\/series\/91287\/intel-xeon-processor-e5-v4-family.html.","journal-title":"Intel. Intel\u00ae Xeon\u00ae Processor E"},{"key":"e_1_3_2_1_32_1","unstructured":"Intel. Intel Platinum 2019. https:\/\/www.intel.sg\/content\/www\/xa\/en\/products\/details\/processors\/xeon\/scalable\/platinum.html."},{"key":"e_1_3_2_1_33_1","volume-title":"Alex Aiken. TASO: Optimizing Deep Learning Computation with Automatic Generation of Graph Substitutions. In Proceedings of the 27th ACM Symposium on Operating Systems Principles","author":"Jia Zhihao","year":"2019","unstructured":"Zhihao Jia, Oded Padon, James Thomas, Todd Warszawski, Matei Zaharia, and Alex Aiken. TASO: Optimizing Deep Learning Computation with Automatic Generation of Graph Substitutions. In Proceedings of the 27th ACM Symposium on Operating Systems Principles, 2019."},{"key":"e_1_3_2_1_34_1","volume-title":"Proceedings of Machine Learning and Systems","author":"Jia Zhihao","year":"2019","unstructured":"Zhihao Jia, James Thomas, Todd Warszawski, Mingyu Gao, Matei Zaharia, and Alex Aiken. Optimizing DNN computation with relaxed graph substitutions. In Proceedings of Machine Learning and Systems, 2019."},{"key":"e_1_3_2_1_35_1","volume-title":"Alex Aiken. Beyond Data and Model Parallelism for Deep Neural Networks. In Proceedings of Machine Learning and Systems","author":"Jia Zhihao","year":"2019","unstructured":"Zhihao Jia, Matei Zaharia, and Alex Aiken. Beyond Data and Model Parallelism for Deep Neural Networks. In Proceedings of Machine Learning and Systems, 2019."},{"key":"e_1_3_2_1_36_1","volume-title":"Domain Divergences: a Survey and Empirical Analysis","author":"Kashyap Abhinav Ramesh","year":"2020","unstructured":"Abhinav Ramesh Kashyap, Devamanyu Hazarika, Min-Yen Kan, and Roger Zimmermann. Domain Divergences: a Survey and Empirical Analysis, 2020."},{"key":"e_1_3_2_1_37_1","volume-title":"Proceedings of Machine Learning and Systems","author":"Kaufman Sam","year":"2021","unstructured":"Sam Kaufman, Phitchaya Phothilimthana, Yanqi Zhou, Charith Mendis, Sudip Roy, Amit Sabne, and Mike Burrows. A learned performance model for tensor processing units. In Proceedings of Machine Learning and Systems, 2021."},{"key":"e_1_3_2_1_38_1","volume-title":"Proc. Workshop ML Syst. NeurIPS","author":"Kaufman Samuel","year":"2019","unstructured":"Samuel Kaufman, Phitchaya Mangpo Phothilimthana, and Mike Burrows. Learned TPU cost model for XLA tensor programs. In Proc. Workshop ML Syst. NeurIPS, 2019."},{"key":"e_1_3_2_1_39_1","volume-title":"Proceedings of International Conference on Machine Learning","author":"Kosaian Jack","year":"2021","unstructured":"Jack Kosaian, Amar Phanishayee, Matthai Philipose, Debadeepta Dey, and Rashmi Vinayak. Boosting the throughput and accelerator utilization of specialized cnn inference beyond increasing batch size. In Proceedings of International Conference on Machine Learning, 2021."},{"key":"e_1_3_2_1_40_1","unstructured":"Habana Labs. Habana Goya 2019. https:\/\/habana.ai\/."},{"key":"e_1_3_2_1_41_1","volume-title":"MLIR: A compiler infrastructure for the end of Moore's law. arXiv preprint arXiv:2002.11054","author":"Lattner Chris","year":"2020","unstructured":"Chris Lattner, Mehdi Amini, Uday Bondhugula, Albert Cohen, Andy Davis, Jacques Pienaar, River Riddle, Tatiana Shpeisman, Nicolas Vasilache, and Oleksandr Zinenko. MLIR: A compiler infrastructure for the end of Moore's law. arXiv preprint arXiv:2002.11054, 2020."},{"key":"e_1_3_2_1_42_1","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"Lin Ji","year":"2020","unstructured":"Ji Lin, Wei-Ming Chen, Yujun Lin, Chuang Gan, Song Han, et al. Mcunet: Tiny deep learning on iot devices. In Proceedings of Advances in Neural Information Processing Systems, 2020."},{"key":"e_1_3_2_1_43_1","volume-title":"Hailong Yang. NNLQP: A Multi-Platform Neural Network Latency Query and Prediction System with An Evolving Database. In Proceedings of the 51st International Conference on Parallel Processing","author":"Liu Liang","year":"2022","unstructured":"Liang Liu, Mingzhu Shen, Ruihao Gong, Fengwei Yu, and Hailong Yang. NNLQP: A Multi-Platform Neural Network Latency Query and Prediction System with An Evolving Database. In Proceedings of the 51st International Conference on Parallel Processing, 2022."},{"key":"e_1_3_2_1_44_1","volume-title":"Deep Transfer Learning with Joint Adaptation Networks","author":"Long Mingsheng","year":"2016","unstructured":"Mingsheng Long, Han Zhu, Jianmin Wang, and Michael I. Jordan. Deep Transfer Learning with Joint Adaptation Networks, 2016."},{"key":"e_1_3_2_1_45_1","volume-title":"Proceedings of Machine Learning and Systems","author":"Mattson Peter","year":"2020","unstructured":"Peter Mattson, Christine Cheng, Gregory Diamos, Cody Coleman, Paulius Micikevicius, David Patterson, Hanlin Tang, Gu-Yeon Wei, Peter Bailis, Victor Bittorf, et al. Mlperf training benchmark. In Proceedings of Machine Learning and Systems, 2020."},{"key":"e_1_3_2_1_46_1","volume-title":"Marcelo Daniel Gutierrez Mallea, and Peter J Bentley. Pinet: A permutation invariant graph neural network for graph classification. arXiv preprint arXiv:1905.03046","author":"Meltzer Peter","year":"2019","unstructured":"Peter Meltzer, Marcelo Daniel Gutierrez Mallea, and Peter J Bentley. Pinet: A permutation invariant graph neural network for graph classification. arXiv preprint arXiv:1905.03046, 2019."},{"key":"e_1_3_2_1_47_1","unstructured":"MLPerf. MLPerf 2020. https:\/\/mlcommons.org\/."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2018.2844341"},{"key":"e_1_3_2_1_49_1","volume-title":"CUDA Toolkit Release Notes","author":"NVIDIA.","year":"2020","unstructured":"NVIDIA. CUDA Toolkit Release Notes, 2020. https:\/\/docs.nvidia.com\/cuda\/archive\/10.2\/cuda-toolkit-release-notes\/index.html."},{"key":"e_1_3_2_1_50_1","volume-title":"cuDNN Documentation","author":"NVIDIA.","year":"2021","unstructured":"NVIDIA. cuDNN Documentation, 2021. https:\/\/docs.nvidia.com\/deeplearning\/cudnn\/developer-guide\/index.html."},{"key":"e_1_3_2_1_51_1","volume-title":"NIPS-W","author":"Paszke Adam","year":"2017","unstructured":"Adam Paszke, Sam Gross, Soumith Chintala, Gregory Chanan, Edward Yang, Zachary DeVito, Zeming Lin, Alban Desmaison, Luca Antiga, and Adam Lerer. Automatic differentiation in PyTorch. In NIPS-W, 2017."},{"key":"e_1_3_2_1_52_1","volume-title":"High-performance Deep Learning Library. In Proceedings of Advances in Neural Information Processing Systems","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, et al. PyTorch: An Imperative Style, High-performance Deep Learning Library. In Proceedings of Advances in Neural Information Processing Systems, 2019."},{"key":"e_1_3_2_1_53_1","volume-title":"Learning sparse features can lead to overfitting in neural networks. arXiv preprint arXiv:2206.12314","author":"Petrini Leonardo","year":"2022","unstructured":"Leonardo Petrini, Francesco Cagnetta, Eric Vanden-Eijnden, and Matthieu Wyart. Learning sparse features can lead to overfitting in neural networks. arXiv preprint arXiv:2206.12314, 2022."},{"key":"e_1_3_2_1_54_1","volume-title":"Proceedings of International Conference on Machine Learning","author":"Pham Hieu","year":"2018","unstructured":"Hieu Pham, Melody Guan, Barret Zoph, Quoc Le, and Jeff Dean. Efficient neural architecture search via parameters sharing. In Proceedings of International Conference on Machine Learning, 2018."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00045"},{"key":"e_1_3_2_1_56_1","volume-title":"A survey on domain adaptation theory: learning bounds and theoretical guarantees. arXiv preprint arXiv:2004.11829","author":"Redko Ievgen","year":"2020","unstructured":"Ievgen Redko, Emilie Morvant, Amaury Habrard, Marc Sebban, and Youn\u00e8s Bennani. A survey on domain adaptation theory: learning bounds and theoretical guarantees. arXiv preprint arXiv:2004.11829, 2020."},{"key":"e_1_3_2_1_57_1","volume-title":"Metatune: Meta-learning based cost model for fast and efficient auto-tuning frameworks. arXiv preprint arXiv:2102.04199","author":"Ryu Jaehun","year":"2021","unstructured":"Jaehun Ryu and Hyojin Sung. Metatune: Meta-learning based cost model for fast and efficient auto-tuning frameworks. arXiv preprint arXiv:2102.04199, 2021."},{"key":"e_1_3_2_1_58_1","volume-title":"Simonyan and Andrew Zisserman. Very Deep Convolutional Networks for Large-Scale Image Recognition. In Proceedings of International Conference on Learning Representations","author":"Karen","year":"2015","unstructured":"Karen Simonyan and Andrew Zisserman. Very Deep Convolutional Networks for Large-Scale Image Recognition. In Proceedings of International Conference on Learning Representations, 2015."},{"key":"e_1_3_2_1_59_1","volume-title":"Proceedings of Machine Learning and Systems","author":"Steiner Benoit","year":"2021","unstructured":"Benoit Steiner, Chris Cummins, Horace He, and Hugh Leather. Value learning for throughput optimization of deep learning workloads. In Proceedings of Machine Learning and Systems, 2021."},{"key":"e_1_3_2_1_60_1","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"Stojanov Petar","year":"2021","unstructured":"Petar Stojanov, Zijian Li, Mingming Gong, Ruichu Cai, Jaime G. Carbonell, and Kun Zhang. Domain Adaptation with Invariant Representation Learning: What Transformations to Learn? In Proceedings of Advances in Neural Information Processing Systems, 2021."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2017.3641648"},{"key":"e_1_3_2_1_62_1","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141 ukasz Kaiser, and Illia Polosukhin. Attention is all you need. In Proceedings of Advances in Neural Information Processing Systems, 2017."},{"key":"e_1_3_2_1_63_1","volume-title":"Proceedings of Asian Conference on Machine Learning","author":"Wang Yidong","year":"2023","unstructured":"Yidong Wang, Bowen Zhang, Wenxin Hou, Zhen Wu, Jindong Wang, and Takahiro Shinozaki. Margin calibration for long-tailed visual recognition. In Proceedings of Asian Conference on Machine Learning, 2023."},{"key":"e_1_3_2_1_64_1","volume-title":"Yeo-Johnson power transformations. Department of Applied Statistics","author":"Weisberg Sanford","year":"2001","unstructured":"Sanford Weisberg. Yeo-Johnson power transformations. Department of Applied Statistics, University of Minnesota. Retrieved June, 2001."},{"key":"e_1_3_2_1_65_1","volume-title":"Communications of the ACM","author":"Williams Samuel","year":"2009","unstructured":"Samuel Williams, Andrew Waterman, and David Patterson. Roofline: an insightful visual performance model for multicore architectures. Communications of the ACM, 2009."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2019.2913833"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2019.01.025"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575737"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458864.3467882"},{"key":"e_1_3_2_1_70_1","volume-title":"Moses: Efficient Exploitation of Cross-device Transferable Features for Tensor Program Optimization. arXiv preprint arXiv:2201.05752","author":"Zhao Zhihe","year":"2022","unstructured":"Zhihe Zhao, Xian Shuai, Yang Bai, Neiwen Ling, Nan Guan, Zhenyu Yan, and Guoliang Xing. Moses: Efficient Exploitation of Cross-device Transferable Features for Tensor Program Optimization. arXiv preprint arXiv:2201.05752, 2022."},{"key":"e_1_3_2_1_71_1","volume-title":"Ansor: Generating High-Performance Tensor Programs for Deep Learning. In Proceedings of the 14th USENIX Symposium on Operating Systems Design and Implementation","author":"Zheng Lianmin","year":"2020","unstructured":"Lianmin Zheng, Chengfan Jia, Minmin Sun, Zhao Wu, Cody Hao Yu, Ameer Haj-Ali, Yida Wang, Jun Yang, Danyang Zhuo, Koushik Sen, et al. Ansor: Generating High-Performance Tensor Programs for Deep Learning. In Proceedings of the 14th USENIX Symposium on Operating Systems Design and Implementation, 2020."},{"key":"e_1_3_2_1_72_1","volume-title":"Proceedings of the Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track","author":"Zheng Lianmin","year":"2021","unstructured":"Lianmin Zheng, Ruochen Liu, Junru Shao, Tianqi Chen, Joseph E Gonzalez, Ion Stoica, and Ameer Haj Ali. Tenset: A large-scale program performance dataset for learned tensor compilers. In Proceedings of the Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track, 2021."},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2018.8573476"},{"key":"e_1_3_2_1_74_1","volume-title":"Shift-Robust GNNs: Overcoming the Limitations of Localized Graph Training Data","author":"Zhu Qi","year":"2021","unstructured":"Qi Zhu, Natalia Ponomareva, Jiawei Han, and Bryan Perozzi. Shift-Robust GNNs: Overcoming the Limitations of Localized Graph Training Data, 2021."}],"event":{"name":"EuroSys '24: Nineteenth European Conference on Computer Systems","location":"Athens Greece","acronym":"EuroSys '24","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the Nineteenth European Conference on Computer Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627703.3629572","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627703.3629572","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T01:07:37Z","timestamp":1755824857000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627703.3629572"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,22]]},"references-count":74,"alternative-id":["10.1145\/3627703.3629572","10.1145\/3627703"],"URL":"https:\/\/doi.org\/10.1145\/3627703.3629572","relation":{},"subject":[],"published":{"date-parts":[[2024,4,22]]},"assertion":[{"value":"2024-04-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}