{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,26]],"date-time":"2025-09-26T00:20:36Z","timestamp":1758846036547,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,5,5]],"date-time":"2025-05-05T00:00:00Z","timestamp":1746403200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,5,5]]},"DOI":"10.1145\/3676151.3719373","type":"proceedings-article","created":{"date-parts":[[2025,5,3]],"date-time":"2025-05-03T00:57:09Z","timestamp":1746233829000},"page":"81-91","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["PreNeT: Leveraging Computational Features to Predict Deep Neural Network Training Time"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-9387-2099","authenticated-orcid":false,"given":"Alireza","family":"Pourali","sequence":"first","affiliation":[{"name":"York University, Toronto, Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-8871-9256","authenticated-orcid":false,"given":"Arian","family":"Boukani","sequence":"additional","affiliation":[{"name":"York University, Toronto, Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5439-8024","authenticated-orcid":false,"given":"Hamzeh","family":"Khazaei","sequence":"additional","affiliation":[{"name":"York University, Toronto, Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,5,5]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00051"},{"key":"e_1_3_2_1_2_1","volume-title":"Language models are few-shot learners. arXiv preprint arXiv:2005.14165","author":"Brown Tom B","year":"2020","unstructured":"Tom B Brown. 2020. Language models are few-shot learners. arXiv preprint arXiv:2005.14165 (2020)."},{"key":"e_1_3_2_1_3_1","unstructured":"Cerebras Systems. 2024. Cerebras Wafer Scale Engine (WSE). https:\/\/www.cerebras.net\/wafer-scale-engine\/. Accessed: 2024--10--19."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14--4012"},{"key":"e_1_3_2_1_5_1","first-page":"1","article-title":"Palm: Scaling language modeling with pathways","volume":"24","author":"Chowdhery Aakanksha","year":"2023","unstructured":"Aakanksha Chowdhery, Sharan Narang, Jacob Devlin, Maarten Bosma, Gaurav Mishra, Adam Roberts, Paul Barham, Hyung Won Chung, Charles Sutton, Sebastian Gehrmann, et al. 2023. Palm: Scaling language modeling with pathways. Journal of Machine Learning Research 24, 240 (2023), 1--113.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_6_1","first-page":"1735","article-title":"Long short-term memory","volume":"9","author":"Computation Neural","year":"2016","unstructured":"Neural Computation. 2016. Long short-term memory. Neural Comput 9 (2016), 1735--1780.","journal-title":"Neural Comput"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_8_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_9_1","volume-title":"Daiyi Peng, Bogdan Gabrys, and Quoc V Le.","author":"Dong Xuanyi","year":"2020","unstructured":"Xuanyi Dong, Mingxing Tan, Adams Wei Yu, Daiyi Peng, Bogdan Gabrys, and Quoc V Le. 2020. AutoHAS: Efficient hyperparameter and architecture search. arXiv preprint arXiv:2006.03656 (2020)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-SEIP58684.2023.00039"},{"key":"e_1_3_2_1_11_1","volume-title":"2021 USENIX Annual Technical Conference (USENIX ATC 21)","author":"Geoffrey X Yu","year":"2021","unstructured":"X Yu Geoffrey, Yubo Gao, Pavel Golikov, and Gennady Pekhimenko. 2021. Habitat: A {Runtime-Based} computational performance predictor for deep neural network training. In 2021 USENIX Annual Technical Conference (USENIX ATC 21). 503--521."},{"key":"e_1_3_2_1_12_1","unstructured":"Graphcore. 2024. Graphcore Intelligence Processing Units (IPUs). https:\/\/www.graphcore.ai\/. Accessed: 2024--10--19."},{"key":"e_1_3_2_1_13_1","volume-title":"Sequence Transduction with Recurrent Neural Networks. In International Conference on Machine Learning (ICML) Workshop on Representation Learning. https:\/\/arxiv.org\/abs\/1211","author":"Graves Alex","year":"2012","unstructured":"Alex Graves. 2012. Sequence Transduction with Recurrent Neural Networks. In International Conference on Machine Learning (ICML) Workshop on Representation Learning. https:\/\/arxiv.org\/abs\/1211.3711"},{"key":"e_1_3_2_1_14_1","volume-title":"Habana Labs: AI Processors for Deep Learning. https:\/\/habana.ai\/. Accessed: 2024--10--19.","author":"Labs Habana","year":"2024","unstructured":"Habana Labs. 2024. Habana Labs: AI Processors for Deep Learning. https:\/\/habana.ai\/. Accessed: 2024--10--19."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_16_1","unstructured":"Andrew G. Howard Menglong Zhu Bo Chen Dmitry Kalenichenko Weijun Wang Tobias Weyand Marco Andreetto and Hartwig Adam. 2017. MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications. In arXiv preprint arXiv:1704.04861. https:\/\/arxiv.org\/abs\/1704.04861"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2018.8622396"},{"key":"e_1_3_2_1_19_1","volume-title":"Hinton","author":"Krizhevsky Alex","year":"2012","unstructured":"Alex Krizhevsky, Ilya Sutskever, and Geoffrey E. Hinton. 2012. ImageNet Classification with Deep Convolutional Neural Networks. In Advances in Neural Information Processing Systems (NeurIPS). 1097--1105. https:\/\/papers.nips.cc\/paper\/2012\/file\/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/5.726791"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3629526.3645035"},{"key":"e_1_3_2_1_22_1","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR). https:\/\/arxiv.org\/abs\/1301","author":"Mikolov Tomas","year":"2013","unstructured":"Tomas Mikolov, Kai Chen, Greg Corrado, and Jeffrey Dean. 2013. Efficient Estimation of Word Representations in Vector Space. In Proceedings of the International Conference on Learning Representations (ICLR). https:\/\/arxiv.org\/abs\/1301.3781"},{"key":"e_1_3_2_1_23_1","unstructured":"NVIDIA Corporation. 2016. NVIDIA Tesla P100 GPU Architecture. NVIDIA Product Page. https:\/\/www.nvidia.com\/en-us\/data-center\/tesla-p100\/."},{"key":"e_1_3_2_1_24_1","unstructured":"NVIDIA Corporation. 2016. NVIDIA Tesla P4 GPU Architecture. NVIDIA Whitepaper. https:\/\/www.nvidia.com\/en-us\/data-center\/tesla-p4\/."},{"key":"e_1_3_2_1_25_1","unstructured":"NVIDIA Corporation. 2017. NVIDIA Tesla V100 GPU Architecture. NVIDIA Product Page. https:\/\/www.nvidia.com\/en-us\/data-center\/v100\/."},{"key":"e_1_3_2_1_26_1","unstructured":"NVIDIA Corporation. 2018. NVIDIA T4 Tensor Core GPU. NVIDIA Product Page. https:\/\/www.nvidia.com\/en-us\/data-center\/tesla-t4\/."},{"key":"e_1_3_2_1_27_1","unstructured":"NVIDIA Corporation. 2021. NVIDIA RTX A4000 Graphics Card. NVIDIA Product Page. https:\/\/www.nvidia.com\/en-us\/design-visualization\/rtx-a4000\/."},{"key":"e_1_3_2_1_28_1","unstructured":"NVIDIA Corporation. 2022. NVIDIA GeForce RTX 4090 Graphics Card. NVIDIA Product Page. https:\/\/www.nvidia.com\/en-us\/geforce\/graphics-cards\/40-series\/rtx-4090\/."},{"key":"e_1_3_2_1_29_1","unstructured":"NVIDIA Corporation. 2023. NVIDIA L4 Tensor Core GPU. NVIDIA Product Page. https:\/\/www.nvidia.com\/en-us\/data-center\/l4\/."},{"key":"e_1_3_2_1_30_1","unstructured":"Long Ouyang Jeffrey Wu Xu Jiang Diogo Almeida Carroll Wainwright Pamela Mishkin Chong Zhang Sandhini Agarwal Katarina Slama Alex Ray et al. 2022. Training language models to follow instructions with human feedback. Advances in neural information processing systems 35 (2022) 27730--27744."},{"key":"e_1_3_2_1_31_1","volume-title":"2020 USENIX Annual Technical Conference (USENIX ATC 20)","author":"Park Jay H","year":"2020","unstructured":"Jay H Park, Gyeongchan Yun, M Yi Chang, Nguyen T Nguyen, Seungmin Lee, Jaesik Choi, Sam H Noh, and Young-ri Choi. 2020. {HetPipe}: Enabling large {DNN} training on (whimpy) heterogeneous {GPU} clusters through integration of pipelined model parallelism and data parallelism. In 2020 USENIX Annual Technical Conference (USENIX ATC 20). 307--321."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2916550"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2021.3052895"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3629526.3645042"},{"key":"e_1_3_2_1_35_1","volume-title":"International Conference on Learning Representations.","author":"Qi Hang","year":"2017","unstructured":"Hang Qi, Evan R Sparks, and Ameet Talwalkar. 2017. Paleo: A performance model for deep neural networks. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_36_1","first-page":"1","article-title":"Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer","volume":"21","author":"Raffel Colin","year":"2020","unstructured":"Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, and Peter J. Liu. 2020. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. Journal of Machine Learning Research 21, 140 (2020), 1--67. http:\/\/jmlr.org\/papers\/v21\/20-074.html","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_37_1","volume-title":"On Challenges in Machine Learning Model Management. Data Engineering","author":"Schelter Sebastian","year":"2018","unstructured":"Sebastian Schelter, Felix Biessmann, Tim Januschowski, David Salinas, Stephan Seufert, and Gyuri Szarvas. 2018. On Challenges in Machine Learning Model Management. Data Engineering (2018), 5."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/78.650093"},{"key":"e_1_3_2_1_39_1","volume-title":"Very Deep Convolutional Networks for Large-Scale Image Recognition. In 3rd International Conference on Learning Representations (ICLR). https:\/\/arxiv.org\/abs\/1409","author":"Simonyan Karen","year":"2015","unstructured":"Karen Simonyan and Andrew Zisserman. 2015. Very Deep Convolutional Networks for Large-Scale Image Recognition. In 3rd International Conference on Learning Representations (ICLR). https:\/\/arxiv.org\/abs\/1409.1556"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3629526.3645045"},{"key":"e_1_3_2_1_41_1","volume-title":"Le","author":"Sutskever Ilya","year":"2014","unstructured":"Ilya Sutskever, Oriol Vinyals, and Quoc V. Le. 2014. Sequence to Sequence Learning with Neural Networks. In Advances in Neural Information Processing Systems (NeurIPS). 3104--3112. https:\/\/arxiv.org\/abs\/1409.3215"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"volume-title":"Proceedings of the 36th International Conference on Machine Learning (ICML). 6105--6114","author":"Tan Mingxing","key":"e_1_3_2_1_43_1","unstructured":"Mingxing Tan and Quoc V. Le. 2019. EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks. In Proceedings of the 36th International Conference on Machine Learning (ICML). 6105--6114. http:\/\/proceedings.mlr.press\/v97\/tan19a.html"},{"key":"e_1_3_2_1_44_1","volume-title":"Attention is all you need. Advances in Neural Information Processing Systems","author":"Vaswani A","year":"2017","unstructured":"A Vaswani. 2017. Attention is all you need. Advances in Neural Information Processing Systems (2017)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477133.3477137"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3530895"},{"key":"e_1_3_2_1_47_1","first-page":"6136","article-title":"Predicting training time without training","volume":"33","author":"Zancato Luca","year":"2020","unstructured":"Luca Zancato, Alessandro Achille, Avinash Ravichandran, Rahul Bhotika, and Stefano Soatto. 2020. Predicting training time without training. Advances in Neural Information Processing Systems 33 (2020), 6136--6146.","journal-title":"Advances in Neural Information Processing Systems"}],"event":{"name":"ICPE '25: 16th ACM\/SPEC International Conference on Performance Engineering","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering","SIGMETRICS ACM Special Interest Group on Measurement and Evaluation"],"location":"Toronto ON Canada","acronym":"ICPE '25"},"container-title":["Proceedings of the 16th ACM\/SPEC International Conference on Performance Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3676151.3719373","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3676151.3719373","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,25]],"date-time":"2025-09-25T16:23:08Z","timestamp":1758817388000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3676151.3719373"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,5]]},"references-count":47,"alternative-id":["10.1145\/3676151.3719373","10.1145\/3676151"],"URL":"https:\/\/doi.org\/10.1145\/3676151.3719373","relation":{},"subject":[],"published":{"date-parts":[[2025,5,5]]},"assertion":[{"value":"2025-05-05","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}