{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,25]],"date-time":"2025-07-25T10:10:34Z","timestamp":1753438234514,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":56,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,3]],"date-time":"2024-06-03T00:00:00Z","timestamp":1717372800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"National Key Research and Development Program of China","award":["2023YFB3308702"],"award-info":[{"award-number":["2023YFB3308702"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No.62302489"],"award-info":[{"award-number":["No.62302489"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,3]]},"DOI":"10.1145\/3625549.3658658","type":"proceedings-article","created":{"date-parts":[[2024,8,30]],"date-time":"2024-08-30T15:55:29Z","timestamp":1725033329000},"page":"56-68","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["ETS: Deep Learning Training Iteration Time Prediction based on Execution Trace Sliding Window"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0699-394X","authenticated-orcid":false,"given":"Zichao","family":"Yang","sequence":"first","affiliation":[{"name":"University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2514-2638","authenticated-orcid":false,"given":"Hao","family":"Guo","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7903-5879","authenticated-orcid":false,"given":"Heng","family":"Wu","sequence":"additional","affiliation":[{"name":"Institute of Software, Chinese Academy of Sciences, Beijing, China"},{"name":"University of Chinese Academy of Sciences, Nanjing, China"},{"name":"Nanjing Institute of Software Technology, Nanjing, China"},{"name":"Key Laboratory of System Software (Chinese Academy of Sciences) and State Key Laboratory of Computer Science, Institute of Software, Chinese Academy of Sciences., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1323-2455","authenticated-orcid":false,"given":"Yuewen","family":"Wu","sequence":"additional","affiliation":[{"name":"Institute of Software, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3650-1475","authenticated-orcid":false,"given":"Hua","family":"Zhong","sequence":"additional","affiliation":[{"name":"Institute of Software, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0237-5100","authenticated-orcid":false,"given":"Wenbo","family":"Zhang","sequence":"additional","affiliation":[{"name":"Institute of Software, Chinese Academy of Sciences, Beijing, China"},{"name":"University of Chinese Academy of Sciences, Nanjing, China"},{"name":"Nanjing Institute of Software Technology, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9193-281X","authenticated-orcid":false,"given":"Chuan","family":"Zhou","sequence":"additional","affiliation":[{"name":"Minzu University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-9806-1243","authenticated-orcid":false,"given":"Yan","family":"Liu","sequence":"additional","affiliation":[{"name":"Inspur Software Co., Ltd., Jinan, China"}]}],"member":"320","published-online":{"date-parts":[[2024,8,30]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2020. Paddle Quantum. https:\/\/github.com\/PaddlePaddle\/Quantum"},{"key":"e_1_3_2_1_2_1","unstructured":"2022. BERT. https:\/\/github.com\/google-research\/bert"},{"key":"e_1_3_2_1_3_1","unstructured":"2024. NVIDIA Nsight Systems. https:\/\/developer.nvidia.com\/nsight-systems"},{"key":"e_1_3_2_1_4_1","unstructured":"Mart\u00edn Abadi Ashish Agarwal Paul Barham Eugene Brevdo Zhifeng Chen Craig Citro Greg S. Corrado Andy Davis Jeffrey Dean Matthieu Devin Sanjay Ghemawat Ian Goodfellow Andrew Harp Geoffrey Irving Michael Isard Yangqing Jia Rafal Jozefowicz Lukasz Kaiser Manjunath Kudlur Josh Levenberg Dandelion Man\u00e9 Rajat Monga Sherry Moore Derek Murray Chris Olah Mike Schuster Jonathon Shlens Benoit Steiner Ilya Sutskever Kunal Talwar Paul Tucker Vincent Vanhoucke Vijay Vasudevan Fernanda Vi\u00e9gas Oriol Vinyals Pete Warden Martin Wattenberg Martin Wicke Yuan Yu and Xiaoqiang Zheng. 2015. TensorFlow: Large-Scale Machine Learning on Heterogeneous Systems. https:\/\/www.tensorflow.org\/ Software available from tensorflow.org."},{"key":"e_1_3_2_1_5_1","volume-title":"Dnnabacus: Toward accurate computational cost prediction for deep neural networks. arXiv preprint arXiv:2205.12095","author":"Bai Lu","year":"2022","unstructured":"Lu Bai, Weixing Ji, Qinyuan Li, Xilai Yao, Wei Xin, and Wanyi Zhu. 2022. Dnnabacus: Toward accurate computational cost prediction for deep neural networks. arXiv preprint arXiv:2205.12095 (2022)."},{"key":"e_1_3_2_1_6_1","volume-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555","author":"Chung Junyoung","year":"2014","unstructured":"Junyoung Chung, Caglar Gulcehre, KyungHyun Cho, and Yoshua Bengio. 2014. Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555 (2014)."},{"key":"e_1_3_2_1_7_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_8_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_9_1","first-page":"10480","article-title":"Brp-nas: Prediction-based nas using gcns","volume":"33","author":"Dudziak Lukasz","year":"2020","unstructured":"Lukasz Dudziak, Thomas Chau, Mohamed Abdelfattah, Royson Lee, Hyeji Kim, and Nicholas Lane. 2020. Brp-nas: Prediction-based nas using gcns. Advances in Neural Information Processing Systems 33 (2020), 10480--10490.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472883.3486978"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-SEIP58684.2023.00039"},{"key":"e_1_3_2_1_12_1","volume-title":"2021 USENIX Annual Technical Conference (USENIX ATC 21)","author":"Geoffrey X Yu","year":"2021","unstructured":"X Yu Geoffrey, Yubo Gao, Pavel Golikov, and Gennady Pekhimenko. 2021. Habitat: A {Runtime-Based} computational performance predictor for deep neural network training. In 2021 USENIX Annual Technical Conference (USENIX ATC 21). 503--521."},{"key":"e_1_3_2_1_13_1","volume-title":"16th USENIX Symposium on Networked Systems Design and Implementation (NSDI 19)","author":"Gu Juncheng","year":"2019","unstructured":"Juncheng Gu, Mosharaf Chowdhury, Kang G Shin, Yibo Zhu, Myeongjae Jeon, Junjie Qian, Hongqiang Liu, and Chuanxiong Guo. 2019. Tiresias: A {GPU} cluster manager for distributed deep learning. In 16th USENIX Symposium on Networked Systems Design and Implementation (NSDI 19). 485--500."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASE.2013.6693089"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2019.00113"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2020.106622"},{"key":"e_1_3_2_1_19_1","volume-title":"Long short-term memory. Neural computation 9, 8","author":"Hochreiter Sepp","year":"1997","unstructured":"Sepp Hochreiter and J\u00fcrgen Schmidhuber. 1997. Long short-term memory. Neural computation 9, 8 (1997), 1735--1780."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476223"},{"key":"e_1_3_2_1_21_1","volume-title":"Scalable and Interpretable Scheduler for Deep Learning Training Jobs. In Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","volume":"2","author":"Hu Qinghao","year":"2023","unstructured":"Qinghao Hu, Meng Zhang, Peng Sun, Yonggang Wen, and Tianwei Zhang. 2023. Lucid: A Non-intrusive, Scalable and Interpretable Scheduler for Deep Learning Training Jobs. In Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2. 457--472."},{"key":"e_1_3_2_1_22_1","volume-title":"SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and< 0.5 MB model size. arXiv preprint arXiv:1602.07360","author":"Iandola Forrest N","year":"2016","unstructured":"Forrest N Iandola, Song Han, Matthew W Moskewicz, Khalid Ashraf, William J Dally, and Kurt Keutzer. 2016. SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and< 0.5 MB model size. arXiv preprint arXiv:1602.07360 (2016)."},{"key":"e_1_3_2_1_23_1","volume-title":"2019 USENIX Annual Technical Conference (USENIX ATC 19)","author":"Jeon Myeongjae","year":"2019","unstructured":"Myeongjae Jeon, Shivaram Venkataraman, Amar Phanishayee, Junjie Qian, Wencong Xiao, and Fan Yang. 2019. Analysis of {Large-Scale}{Multi-Tenant}{GPU} Clusters for {DNN} Training Workloads. In 2019 USENIX Annual Technical Conference (USENIX ATC 19). 947--960."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359630"},{"key":"e_1_3_2_1_25_1","volume-title":"2009 International Conference on High Performance Computing (HiPC). IEEE, 463--472","author":"Kothapalli Kishore","year":"2009","unstructured":"Kishore Kothapalli, Rishabh Mukherjee, M Suhail Rehman, Suryakant Patidar, PJ Narayanan, and Kannan Srinathan. 2009. A performance prediction model for the CUDA GPGPU platform. In 2009 International Conference on High Performance Computing (HiPC). IEEE, 463--472."},{"key":"e_1_3_2_1_26_1","volume-title":"Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems 25","author":"Krizhevsky Alex","year":"2012","unstructured":"Alex Krizhevsky, Ilya Sutskever, and Geoffrey E Hinton. 2012. Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems 25 (2012)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3342195.3387547"},{"key":"e_1_3_2_1_28_1","volume-title":"2021 30th International Conference on Parallel Architectures and Compilation Techniques (PACT). IEEE, 173--185","author":"Liu Guodong","year":"2021","unstructured":"Guodong Liu, Sa Wang, and Yungang Bao. 2021. SEER: A Time Prediction Model for CNNs from GPU Kernel's View. In 2021 30th International Conference on Parallel Architectures and Compilation Techniques (PACT). IEEE, 173--185."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"e_1_3_2_1_31_1","unstructured":"Paulius Micikevicius Sharan Narang Jonah Alben Gregory Diamos Erich Elsen David Garcia Boris Ginsburg Michael Houston Oleksii Kuchaiev Ganesh Venkatesh et al. 2017. Mixed precision training. arXiv preprint arXiv:1710.03740 (2017)."},{"volume-title":"PyTorch: An Imperative Style","author":"Paszke Adam","key":"e_1_3_2_1_32_1","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In Advances in Neural Information Processing Systems 32. Curran Associates, Inc., 8024--8035. http:\/\/papers.neurips.cc\/paper\/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2916550"},{"key":"e_1_3_2_1_34_1","volume-title":"International Conference on Learning Representations.","author":"Qi Hang","year":"2016","unstructured":"Hang Qi, Evan R Sparks, and Ameet Talwalkar. 2016. Paleo: A performance model for deep neural networks. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01044"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"crossref","unstructured":"David E Rumelhart Geoffrey E Hinton Ronald J Williams et al. 1985. Learning internal representations by error propagation.","DOI":"10.21236\/ADA164453"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASE.2015.45"},{"key":"e_1_3_2_1_38_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00293"},{"key":"e_1_3_2_1_42_1","volume-title":"International conference on machine learning. PMLR, 6105--6114","author":"Tan Mingxing","year":"2019","unstructured":"Mingxing Tan and Quoc Le. 2019. Efficientnet: Rethinking model scaling for convolutional neural networks. In International conference on machine learning. PMLR, 6105--6114."},{"key":"e_1_3_2_1_43_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_44_1","volume-title":"Proceedings of the International Conference on Research in Adaptive and Convergent Systems. 90--95","author":"Wang Chuan-Chi","year":"2020","unstructured":"Chuan-Chi Wang, Ying-Chiao Liao, Ming-Chang Kao, Wen-Yew Liang, and Shih-Hao Hung. 2020. PerfNet: Platform-aware performance modeling for deep neural networks. In Proceedings of the International Conference on Research in Adaptive and Convergent Systems. 90--95."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477133.3477137"},{"key":"e_1_3_2_1_46_1","volume-title":"19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22)","author":"Weng Qizhen","year":"2022","unstructured":"Qizhen Weng, Wencong Xiao, Yinghao Yu, Wei Wang, Cheng Wang, Jian He, Yong Li, Liping Zhang, Wei Lin, and Yu Ding. 2022. {MLaaS} in the Wild: Workload Analysis and Scheduling in {Large-Scale} Heterogeneous {GPU} Clusters. In 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22). 945--960."},{"key":"e_1_3_2_1_47_1","volume-title":"Proceedings of the 13th Symposium on Cloud Computing. 461--476","author":"Wu Yuewen","year":"2022","unstructured":"Yuewen Wu, Heng Wu, Diaohan Luo, Yuanjia Xu, Yi Hu, Wenbo Zhang, and Hua Zhong. 2022. Serving unseen deep learning models with near-optimal configurations: a fast adaptive search approach. In Proceedings of the 13th Symposium on Cloud Computing. 461--476."},{"key":"e_1_3_2_1_48_1","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision. 1284--1293","author":"Xie Saining","year":"2019","unstructured":"Saining Xie, Alexander Kirillov, Ross Girshick, and Kaiming He. 2019. Exploring randomly wired neural networks for image recognition. In Proceedings of the IEEE\/CVF International Conference on Computer Vision. 1284--1293."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3530895","article-title":"Prediction of the resource consumption of distributed deep learning systems","volume":"6","author":"Yang Gyeongsik","year":"2022","unstructured":"Gyeongsik Yang, Changyong Shin, Jeunghwan Lee, Yeonho Yoo, and Chuck Yoo. 2022. Prediction of the resource consumption of distributed deep learning systems. Proceedings of the ACM on Measurement and Analysis of Computing Systems 6, 2 (2022), 1--25.","journal-title":"Proceedings of the ACM on Measurement and Analysis of Computing Systems"},{"key":"e_1_3_2_1_50_1","volume-title":"Hydra: Deadline-aware and Efficiency-oriented Scheduling for Deep Learning Jobs on Heterogeneous GPUs","author":"Yang Zichao","year":"2023","unstructured":"Zichao Yang, Heng Wu, Yuanjia Xu, Yuewen Wu, Hua Zhong, and Wenbo Zhang. 2023. Hydra: Deadline-aware and Efficiency-oriented Scheduling for Deep Learning Jobs on Heterogeneous GPUs. IEEE Trans. Comput. (2023)."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2021.3079202"},{"key":"e_1_3_2_1_52_1","volume-title":"Wide residual networks. arXiv preprint arXiv:1605.07146","author":"Zagoruyko Sergey","year":"2016","unstructured":"Sergey Zagoruyko and Nikos Komodakis. 2016. Wide residual networks. arXiv preprint arXiv:1605.07146 (2016)."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00716"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASE.2015.15"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.5555\/2014698.2014875"},{"key":"e_1_3_2_1_56_1","volume-title":"2020 USENIX Annual Technical Conference (USENIX ATC 20)","author":"Zhu Hongyu","year":"2020","unstructured":"Hongyu Zhu, Amar Phanishayee, and Gennady Pekhimenko. 2020. Daydream: Accurately estimating the efficacy of optimizations for {DNN} training. In 2020 USENIX Annual Technical Conference (USENIX ATC 20). 337--352."}],"event":{"name":"HPDC '24: 33rd International Symposium on High-Performance Parallel and Distributed Computing","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"],"location":"Pisa Italy","acronym":"HPDC '24"},"container-title":["Proceedings of the 33rd International Symposium on High-Performance Parallel and Distributed Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3625549.3658658","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3625549.3658658","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T22:50:38Z","timestamp":1750287038000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3625549.3658658"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,3]]},"references-count":56,"alternative-id":["10.1145\/3625549.3658658","10.1145\/3625549"],"URL":"https:\/\/doi.org\/10.1145\/3625549.3658658","relation":{},"subject":[],"published":{"date-parts":[[2024,6,3]]},"assertion":[{"value":"2024-08-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}