{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:39:51Z","timestamp":1766219991614,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","funder":[{"name":"the National Natural Science Foundation of China","award":["No. 62372330"],"award-info":[{"award-number":["No. 62372330"]}]},{"name":"the Shenzhen Science and Technology Plan Project of China","award":["CJGJZD20240729113801003"],"award-info":[{"award-number":["CJGJZD20240729113801003"]}]},{"name":"Special Fund of Fundamental Scientific Research Business Expense for Higher School of Central Government","award":["No. 22120240563"],"award-info":[{"award-number":["No. 22120240563"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,9,8]]},"DOI":"10.1145\/3754598.3754634","type":"proceedings-article","created":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:34:32Z","timestamp":1766219672000},"page":"43-52","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["CoreTuner: Predicting and Scheduling Framework for Optimizing the Joint Allocation of CPU and GPU in Training Cluster"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-4092-4096","authenticated-orcid":false,"given":"Hao","family":"Dong","sequence":"first","affiliation":[{"name":"Tongji University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4172-8084","authenticated-orcid":false,"given":"Yuehao","family":"Xu","sequence":"additional","affiliation":[{"name":"Tongji University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-1281-7987","authenticated-orcid":false,"given":"Xiaohui","family":"Wang","sequence":"additional","affiliation":[{"name":"UCloud Technology Co., Ltd, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-9885-4000","authenticated-orcid":false,"given":"Xinhua","family":"Ji","sequence":"additional","affiliation":[{"name":"UCloud Technology Co., Ltd, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2178-6201","authenticated-orcid":false,"given":"Zhijun","family":"Ding","sequence":"additional","affiliation":[{"name":"Tongji University, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2025,12,20]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-3302"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","unstructured":"Brendan Burns Brian Grant David Oppenheimer Eric Brewer and John Wilkes. 2016. Borg Omega and Kubernetes: Lessons learned from three container-management systems over a decade. Queue 14 1 (Jan. 2016) 70\u201393. 10.1145\/2898442.2898444","DOI":"10.1145\/2898442.2898444"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1145\/3673038.3673134"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","unstructured":"Mark Everingham Luc Gool Christopher\u00a0K. Williams John Winn and Andrew Zisserman. 2010. The Pascal Visual Object Classes (VOC) Challenge. Int. J. Comput. Vision 88 2 (June 2010) 303\u2013338. 10.1007\/s11263-009-0275-4","DOI":"10.1007\/s11263-009-0275-4"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-SEIP58684.2023.00039"},{"key":"e_1_3_3_1_7_2","unstructured":"Awni Hannun Carl Case Jared Casper Bryan Catanzaro Greg Diamos Erich Elsen Ryan Prenger Sanjeev Satheesh Shubho Sengupta Adam Coates and Andrew\u00a0Y. Ng. 2014. Deep Speech: Scaling up end-to-end speech recognition. arxiv:https:\/\/arXiv.org\/abs\/1412.5567\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/1412.5567"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","unstructured":"F.\u00a0Maxwell Harper and Joseph\u00a0A. Konstan. 2015. The MovieLens Datasets: History and Context. ACM Trans. Interact. Intell. Syst. 5 4 Article 19 (Dec. 2015) 19\u00a0pages. 10.1145\/2827872","DOI":"10.1145\/2827872"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/3038912.3052569"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"crossref","unstructured":"S Hochreiter and J Schmidhuber. 1997. Long Short-Term Memory. Neural Computation 9 8 (1997) 1735\u20131780.","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476223"},{"key":"e_1_3_3_1_13_2","first-page":"947","volume-title":"2019 USENIX Annual Technical Conference (USENIX ATC 19)","author":"Jeon Myeongjae","year":"2019","unstructured":"Myeongjae Jeon, Shivaram Venkataraman, Amar Phanishayee, Junjie Qian, Wencong Xiao, and Fan Yang. 2019. Analysis of Large-Scale Multi-Tenant GPU Clusters for DNN Training Workloads. In 2019 USENIX Annual Technical Conference (USENIX ATC 19). USENIX Association, Renton, WA, 947\u2013960. https:\/\/www.usenix.org\/conference\/atc19\/presentation\/jeon"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.5555\/3358807.3358888"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2018.8622396"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"crossref","unstructured":"John Kominek and Alan Black. 2004. The CMU Arctic speech databases. SSW5-2004 (01 2004).","DOI":"10.1038\/sj.ebd.6400232"},{"key":"e_1_3_3_1_17_2","volume-title":"Learning multiple layers of features from tiny images","author":"Krizhevsky Alex","year":"2009","unstructured":"Alex Krizhevsky. 2009. Learning multiple layers of features from tiny images. Technical Report."},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","unstructured":"Alex Krizhevsky Ilya Sutskever and Geoffrey\u00a0E. Hinton. 2017. ImageNet classification with deep convolutional neural networks. Commun. ACM 60 6 (May 2017) 84\u201390. 10.1145\/3065386","DOI":"10.1145\/3065386"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1145\/3342195.3387547"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS51616.2021.00085"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.5555\/2685048.2685095"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","unstructured":"Shen Li Yanli Zhao Rohan Varma Omkar Salpekar Pieter Noordhuis Teng Li Adam Paszke Jeff Smith Brian Vaughan Pritam Damania and Soumith Chintala. 2020. PyTorch distributed: experiences on accelerating data parallel training. Proc. VLDB Endow. 13 12 (Aug. 2020) 3005\u20133018. 10.14778\/3415478.3415530","DOI":"10.14778\/3415478.3415530"},{"key":"e_1_3_3_1_23_2","series-title":"(OSDI\u201920)","volume-title":"Proceedings of the 14th USENIX Conference on Operating Systems Design and Implementation","author":"Narayanan Deepak","year":"2020","unstructured":"Deepak Narayanan, Keshav Santhanam, Fiodar Kazhamiaka, Amar Phanishayee, and Matei Zaharia. 2020. Heterogeneity-aware cluster scheduling policies for deep learning workloads. In Proceedings of the 14th USENIX Conference on Operating Systems Design and Implementation(OSDI\u201920). USENIX Association, USA, Article 27, 18\u00a0pages."},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/3190508.3190517"},{"key":"e_1_3_3_1_25_2","first-page":"1","volume-title":"15th USENIX Symposium on Operating Systems Design and Implementation (OSDI 21)","author":"Qiao Aurick","year":"2021","unstructured":"Aurick Qiao, Sang\u00a0Keun Choe, Suhas\u00a0Jayaram Subramanya, Willie Neiswanger, Qirong Ho, Hao Zhang, Gregory\u00a0R. Ganger, and Eric\u00a0P. Xing. 2021. Pollux: Co-adaptive Cluster Scheduling for Goodput-Optimized Deep Learning. In 15th USENIX Symposium on Operating Systems Design and Implementation (OSDI 21). USENIX Association, 1\u201318. https:\/\/www.usenix.org\/conference\/osdi21\/presentation\/qiao"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICSCA57840.2023.10087604"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","unstructured":"Shaoqing Ren Kaiming He Ross Girshick and Jian Sun. 2017. Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks. IEEE Transactions on Pattern Analysis and Machine Intelligence 39 6 (2017) 1137\u20131149. 10.1109\/TPAMI.2016.2577031","DOI":"10.1109\/TPAMI.2016.2577031"},{"key":"e_1_3_3_1_28_2","unstructured":"Dharma Shukla Muthian Sivathanu Srinidhi Viswanatha Bhargav\u00a0S. Gulavani Rimma Nehme Amey Agrawal Chen Chen Nipun Kwatra Ramachandran Ramjee Pankaj Sharma Atul Katiyar Vipul Modi Vaibhav Sharma Abhishek Singh Shreshth Singhal Kaustubh Welankar Lu Xun Ravi Anupindi Karthik Elangovan Hasibur Rahman Zhou Lin Rahul Seetharaman Cheng Xu Eddie Ailijiang Suresh Krishnappa and Mark Russinovich. 2022. Singularity: Planet-Scale Preemptive and Elastic Scheduling of AI Workloads. CoRR abs\/2202.07848 (2022). arXiv:https:\/\/arXiv.org\/abs\/2202.07848https:\/\/arxiv.org\/abs\/2202.07848"},{"key":"e_1_3_3_1_29_2","unstructured":"Karen Simonyan and Andrew Zisserman. 2015. Very Deep Convolutional Networks for Large-Scale Image Recognition. arxiv:https:\/\/arXiv.org\/abs\/1409.1556\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/1409.1556"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.5555\/3295222.3295349"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/2523616.2523633"},{"key":"e_1_3_3_1_33_2","first-page":"533","volume-title":"14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Xiao Wencong","year":"2020","unstructured":"Wencong Xiao, Shiru Ren, Yong Li, Yang Zhang, Pengyang Hou, Zhi Li, Yihui Feng, Wei Lin, and Yangqing Jia. 2020. AntMan: Dynamic Scaling on GPU Clusters for Deep Learning. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20). USENIX Association, 533\u2013548. https:\/\/www.usenix.org\/conference\/osdi20\/presentation\/xiao"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","DOI":"10.1145\/3673038.3673089"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","unstructured":"Han Zhao Weihao Cui Quan Chen Jingwen Leng Deze Zeng and Minyi Guo. 2023. Improving Cluster Utilization Through Adaptive Resource Management for Deep Neural Network and CPU Jobs Colocation. IEEE Trans. Comput. 72 12 (2023) 3458\u20133472. 10.1109\/TC.2023.3303988","DOI":"10.1109\/TC.2023.3303988"}],"event":{"name":"ICPP '25: 54th International Conference on Parallel Processing","location":"San Diego CA USA","acronym":"ICPP '25"},"container-title":["Proceedings of the 54th International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3754598.3754634","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:37:18Z","timestamp":1766219838000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3754598.3754634"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,8]]},"references-count":34,"alternative-id":["10.1145\/3754598.3754634","10.1145\/3754598"],"URL":"https:\/\/doi.org\/10.1145\/3754598.3754634","relation":{},"subject":[],"published":{"date-parts":[[2025,9,8]]},"assertion":[{"value":"2025-12-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}