{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T23:40:10Z","timestamp":1768347610470,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":41,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,19]]},"DOI":"10.1145\/3772052.3772266","type":"proceedings-article","created":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T16:19:00Z","timestamp":1768321140000},"page":"847-859","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["C\n                    <scp>uckoo<\/scp>\n                    : Deadline-Aware Job Packing on Heterogeneous GPUs for DL Model Training"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-9736-214X","authenticated-orcid":false,"given":"Yuzheng","family":"Zhang","sequence":"first","affiliation":[{"name":"Beihang University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6334-4925","authenticated-orcid":false,"given":"Renyu","family":"Yang","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-5332-1855","authenticated-orcid":false,"given":"Junhong","family":"Liu","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-2101-1302","authenticated-orcid":false,"given":"Weihan","family":"Jiang","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-6241-8531","authenticated-orcid":false,"given":"Tianyu","family":"Ye","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2635-1541","authenticated-orcid":false,"given":"Yiqiao","family":"Liao","sequence":"additional","affiliation":[{"name":"Kuaishou Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-7958-3378","authenticated-orcid":false,"given":"Penghao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Kuaishou Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6203-5260","authenticated-orcid":false,"given":"Tiezi","family":"Zhang","sequence":"additional","affiliation":[{"name":"Kuaishou Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-0188-8628","authenticated-orcid":false,"given":"Kun","family":"Shang","sequence":"additional","affiliation":[{"name":"Kuaishou Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5331-3364","authenticated-orcid":false,"given":"Tianyu","family":"Wo","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3473-9703","authenticated-orcid":false,"given":"Chunming","family":"Hu","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-3826-8436","authenticated-orcid":false,"given":"Chengru","family":"Song","sequence":"additional","affiliation":[{"name":"Kuaishou Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6591-7717","authenticated-orcid":false,"given":"Jin","family":"Ouyang","sequence":"additional","affiliation":[{"name":"Kuaishou Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2026,1,13]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3126908.3126933"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2019.8737460"},{"key":"e_1_3_2_1_3_1","first-page":"634","article-title":"Deep learning-based job placement in distributed machine learning clusters with heterogeneous workloads","volume":"31","author":"Bao Yixin","year":"2022","unstructured":"Yixin Bao, Yanghua Peng, and Chuan Wu. 2022. Deep learning-based job placement in distributed machine learning clusters with heterogeneous workloads. IEEE\/ACM Transactions on Networking 31, 2 (2022), 634\u2013647.","journal-title":"IEEE\/ACM Transactions on Networking"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2021.3061394"},{"key":"e_1_3_2_1_5_1","volume-title":"Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies","volume":"1","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. Bert: Pre-training of deep bidirectional transformers for language understanding. In Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies, volume 1 (long and short papers). 4171\u20134186."},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the ACM Symposium on Cloud Computing. 609\u2013623","author":"Gao Wei","year":"2021","unstructured":"Wei Gao, Zhisheng Ye, Peng Sun, Yonggang Wen, and Tianwei Zhang. 2021. Chronus: A novel deadline-aware scheduler for deep learning training jobs. In Proceedings of the ACM Symposium on Cloud Computing. 609\u2013623."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575721"},{"key":"e_1_3_2_1_8_1","volume-title":"16th USENIX Symposium on Networked Systems Design and Implementation (NSDI 19)","author":"Gu Juncheng","year":"2019","unstructured":"Juncheng Gu, Mosharaf Chowdhury, Kang G Shin, Yibo Zhu, Myeongjae Jeon, Junjie Qian, Hongqiang Liu, and Chuanxiong Guo. 2019. Tiresias: A {GPU} cluster manager for distributed deep learning. In 16th USENIX Symposium on Networked Systems Design and Implementation (NSDI 19). 485\u2013500."},{"key":"e_1_3_2_1_9_1","first-page":"2808","article-title":"Liquid: Intelligent resource estimation and network-efficient scheduling for deep learning jobs on distributed GPU clusters","volume":"33","author":"Gu Rong","year":"2021","unstructured":"Rong Gu, Yuquan Chen, Shuai Liu, Haipeng Dai, Guihai Chen, Kai Zhang, Yang Che, and Yihua Huang. 2021. Liquid: Intelligent resource estimation and network-efficient scheduling for deep learning jobs on distributed GPU clusters. IEEE Transactions on Parallel and Distributed Systems 33, 11 (2021), 2808\u20132820.","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476223"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476223"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613175"},{"key":"e_1_3_2_1_14_1","volume-title":"2020 IEEE International Conference on Cluster Computing (CLUSTER). IEEE, 104\u2013108","author":"Kim Sejin","year":"2020","unstructured":"Sejin Kim and Yoonhee Kim. 2020. Co-scheML: Interference-aware container coscheduling scheme using machine learning application profiles for GPU clusters. In 2020 IEEE International Conference on Cluster Computing (CLUSTER). IEEE, 104\u2013108."},{"key":"e_1_3_2_1_15_1","volume-title":"Proceedings of the ACM Symposium on Cloud Computing. 61\u201373","author":"Liaw Richard","year":"2019","unstructured":"Richard Liaw, Romil Bhardwaj, Lisa Dunlap, Yitian Zou, Joseph E Gonzalez, Ion Stoica, and Alexey Tumanov. 2019. Hypersched: Dynamic resource reallocation for model development on a deadline. In Proceedings of the ACM Symposium on Cloud Computing. 61\u201373."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3545008.3545027"},{"key":"e_1_3_2_1_17_1","volume-title":"Proceedings of the 2024 ACM Symposium on Cloud Computing. 36\u201351","author":"Liu Ziyang","year":"2024","unstructured":"Ziyang Liu, Renyu Yang, Jin Ouyang, Weihan Jiang, Tianyu Ye, Menghao Zhang, Sui Huang, Jiaming Huang, Chengru Song, Di Zhang, et al. 2024. Kale: Elastic GPU Scheduling for Online DL Model Training. In Proceedings of the 2024 ACM Symposium on Cloud Computing. 36\u201351."},{"key":"e_1_3_2_1_18_1","volume-title":"2019 IEEE Global Communications Conference (GLOBECOM). IEEE, 1\u20137.","author":"Luan Yunteng","year":"2019","unstructured":"Yunteng Luan, Xukun Chen, Hanyu Zhao, Zhi Yang, and Yafei Dai. 2019. SCHED2: Scheduling Deep Learning Training via Deep Reinforcement Learning. In 2019 IEEE Global Communications Conference (GLOBECOM). IEEE, 1\u20137."},{"key":"e_1_3_2_1_19_1","volume-title":"17th USENIX Symposium on Networked Systems Design and Implementation (NSDI 20)","author":"Mahajan Kshiteej","year":"2020","unstructured":"Kshiteej Mahajan, Arjun Balasubramanian, Arjun Singhvi, Shivaram Venkataraman, Aditya Akella, Amar Phanishayee, and Shuchi Chawla. 2020. Themis: Fair and efficient {GPU} cluster scheduling. In 17th USENIX Symposium on Networked Systems Design and Implementation (NSDI 20). 289\u2013304."},{"key":"e_1_3_2_1_20_1","volume-title":"International conference on machine learning. PmLR","author":"Mnih Volodymyr","year":"2016","unstructured":"Volodymyr Mnih, Adria Puigdomenech Badia, Mehdi Mirza, Alex Graves, Timothy Lillicrap, Tim Harley, David Silver, and Koray Kavukcuoglu. 2016. Asynchronous methods for deep reinforcement learning. In International conference on machine learning. PmLR, 1928\u20131937."},{"key":"e_1_3_2_1_21_1","volume-title":"Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602","author":"Mnih Volodymyr","year":"2013","unstructured":"Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Alex Graves, Ioannis Antonoglou, Daan Wierstra, and Martin Riedmiller. 2013. Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.5555\/3488766.3488793"},{"key":"e_1_3_2_1_23_1","volume-title":"Proceedings of the Thirteenth EuroSys Conference. 1\u201317","author":"Park Jun Woo","year":"2018","unstructured":"Jun Woo Park, Alexey Tumanov, Angela Jiang, Michael A Kozuch, and Gregory R Ganger. 2018. 3sigma: distribution-based cluster scheduling for runtime uncertainty. In Proceedings of the Thirteenth EuroSys Conference. 1\u201317."},{"key":"e_1_3_2_1_24_1","unstructured":"Alec Radford Jeffrey Wu Rewon Child David Luan Dario Amodei Ilya Sutskever et al. 2019. Language models are unsupervised multitask learners. OpenAI blog 1 8 (2019) 9."},{"key":"e_1_3_2_1_25_1","volume-title":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis. 1\u201314","author":"Ranganath Kiran","year":"2021","unstructured":"Kiran Ranganath, Joshua D Suetterlein, Joseph B Manzano, Shuaiwen Leon Song, and Daniel Wong. 2021. Mapa: Multi-accelerator pattern allocation policy for multi-tenant gpu servers. In Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis. 1\u201314."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3642970.3655827"},{"key":"e_1_3_2_1_27_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404397.3404415"},{"key":"e_1_3_2_1_29_1","volume-title":"Serving DNN models with multi-instance gpus: A case of the reconfigurable machine scheduling problem. arXiv preprint arXiv:2109.11067","author":"Tan Cheng","year":"2021","unstructured":"Cheng Tan, Zhichao Li, Jian Zhang, Yu Cao, Sikai Qi, Zherui Liu, Yibo Zhu, and Chuanxiong Guo. 2021. Serving DNN models with multi-instance gpus: A case of the reconfigurable machine scheduling problem. arXiv preprint arXiv:2109.11067 (2021)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/2901318.2901355"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3386367.3432588"},{"key":"e_1_3_2_1_32_1","volume-title":"Communication contention aware scheduling of multiple deep learning training jobs. arXiv preprint arXiv:2002.10105","author":"Wang Qiang","year":"2020","unstructured":"Qiang Wang, Shaohuai Shi, Canhui Wang, and Xiaowen Chu. 2020. Communication contention aware scheduling of multiple deep learning training jobs. arXiv preprint arXiv:2002.10105 (2020)."},{"key":"e_1_3_2_1_33_1","volume-title":"19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22)","author":"Weng Qizhen","year":"2022","unstructured":"Qizhen Weng, Wencong Xiao, Yinghao Yu, Wei Wang, Cheng Wang, Jian He, Yong Li, Liping Zhang, Wei Lin, and Yu Ding. 2022. {MLaaS} in the wild: Workload analysis and scheduling in { Large-Scale} heterogeneous {GPU} clusters. In 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22). 945\u2013960."},{"key":"e_1_3_2_1_34_1","volume-title":"13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Xiao Wencong","year":"2018","unstructured":"Wencong Xiao, Romil Bhardwaj, Ramachandran Ramjee, Muthian Sivathanu, Nipun Kwatra, Zhenhua Han, Pratyush Patel, Xuan Peng, Hanyu Zhao, Quanlu Zhang, et al. 2018. Gandiva: Introspective cluster scheduling for deep learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18). 595\u2013610."},{"key":"e_1_3_2_1_35_1","volume-title":"14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Xiao Wencong","year":"2020","unstructured":"Wencong Xiao, Shiru Ren, Yong Li, Yang Zhang, Pengyang Hou, Zhi Li, Yihui Feng, Wei Lin, and Yangqing Jia. 2020. {AntMan}: Dynamic scaling on {GPU} clusters for deep learning. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20). 533\u2013548."},{"key":"e_1_3_2_1_36_1","volume-title":"Hydra: Deadline-aware and efficiency-oriented scheduling for deep learning jobs on heterogeneous gpus","author":"Yang Zichao","year":"2023","unstructured":"Zichao Yang, Heng Wu, Yuanjia Xu, Yuewen Wu, Hua Zhong, and Wenbo Zhang. 2023. Hydra: Deadline-aware and efficiency-oriented scheduling for deep learning jobs on heterogeneous gpus. IEEE Trans. Comput. (2023)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3638757"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2021.3079202"},{"key":"e_1_3_2_1_39_1","volume-title":"Salus: Fine-grained gpu sharing primitives for deep learning applications. arXiv preprint arXiv:1902.04610","author":"Yu Peifeng","year":"2019","unstructured":"Peifeng Yu and Mosharaf Chowdhury. 2019. Salus: Fine-grained gpu sharing primitives for deep learning applications. arXiv preprint arXiv:1902.04610 (2019)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544216.3544224"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jnca.2020.102590"}],"event":{"name":"SoCC '25: ACM Symposium on Cloud Computing","location":"Online USA","acronym":"SoCC '25","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems","SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the 2025 ACM Symposium on Cloud Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3772052.3772266","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T16:21:47Z","timestamp":1768321307000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3772052.3772266"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,19]]},"references-count":41,"alternative-id":["10.1145\/3772052.3772266","10.1145\/3772052"],"URL":"https:\/\/doi.org\/10.1145\/3772052.3772266","relation":{},"subject":[],"published":{"date-parts":[[2025,11,19]]},"assertion":[{"value":"2026-01-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}