{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T15:44:53Z","timestamp":1768405493410,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,8,29]],"date-time":"2022-08-29T00:00:00Z","timestamp":1661731200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2019YFB1704102"],"award-info":[{"award-number":["2019YFB1704102"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,8,29]]},"DOI":"10.1145\/3545008.3545027","type":"proceedings-article","created":{"date-parts":[[2023,1,15]],"date-time":"2023-01-15T01:04:08Z","timestamp":1673744648000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Adaptive and Efficient GPU Time Sharing for Hyperparameter Tuning in Cloud"],"prefix":"10.1145","author":[{"given":"Liu","family":"Liu","sequence":"first","affiliation":[{"name":"Tongji University, China"}]},{"given":"Jian","family":"Yu","sequence":"additional","affiliation":[{"name":"Tongji University, China"}]},{"given":"Zhijun","family":"Ding","sequence":"additional","affiliation":[{"name":"Tongji University, China"}]}],"member":"320","published-online":{"date-parts":[[2023,1,13]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3342195.3387555"},{"key":"e_1_3_2_1_2_1","unstructured":"[2] Wikitext-2 Dataset.2022. https:\/\/www.salesforce.com\/products\/einstein\/ai-research\/the-wikitext-dependency-language-modeling-dataset"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_4_1","volume-title":"Proceedings of the USENIX Symposium on NSDI. 485\u2013500","author":"Gu Juncheng","year":"2019","unstructured":"Juncheng Gu, Mosharaf Chowdhury, Kang\u00a0G Shin, Yibo Zhu, Myeongjae Jeon, Junjie Qian, Hongqiang Liu, and Chuanxiong Guo. 2019. Tiresias: A GPU cluster manager for distributed deep learning. In Proceedings of the USENIX Symposium on NSDI. 485\u2013500."},{"key":"e_1_3_2_1_5_1","volume-title":"The collected works of Wassily Hoeffding","author":"Hoeffding Wassily","unstructured":"Wassily Hoeffding. 1994. Probability inequalities for sums of bounded random variables. In The collected works of Wassily Hoeffding. Springer, 409\u2013426."},{"key":"e_1_3_2_1_6_1","unstructured":"Safwan Hossain Evi Micha and Nisarg Shah. 2020. Fair Algorithms for Multi-Agent Multi-Armed Bandits. arXiv preprint arXiv:2007.06699(2020)."},{"key":"e_1_3_2_1_7_1","unstructured":"Max Jaderberg Valentin Dalibard Simon Osindero Wojciech\u00a0M Czarnecki Jeff Donahue Ali Razavi Oriol Vinyals Tim Green Iain Dunning Karen Simonyan 2017. Population based training of neural networks. arXiv preprint arXiv:1711.09846(2017)."},{"key":"e_1_3_2_1_8_1","volume-title":"Proceedings of the USENIX ATC. 947\u2013960","author":"Jeon Myeongjae","year":"2019","unstructured":"Myeongjae Jeon, Shivaram Venkataraman, Amar Phanishayee, Junjie Qian, Wencong Xiao, and Fan Yang. 2019. Analysis of large-scale multi-tenant GPU clusters for DNN training workloads. In Proceedings of the USENIX ATC. 947\u2013960."},{"key":"e_1_3_2_1_9_1","unstructured":"Jared Kaplan Sam McCandlish Tom Henighan Tom\u00a0B Brown Benjamin Chess Rewon Child Scott Gray Alec Radford Jeffrey Wu and Dario Amodei. 2020. Scaling laws for neural language models. arXiv preprint arXiv:2001.08361(2020)."},{"key":"e_1_3_2_1_10_1","unstructured":"[10] Katib.2022. https:\/\/github.com\/kubeflow\/katib"},{"key":"e_1_3_2_1_11_1","unstructured":"[11] Kubeflow.2022. https:\/\/github.com\/kubeflow\/kubeflow"},{"key":"e_1_3_2_1_12_1","unstructured":"[12] Kubernetes.2022. https:\/\/github.com\/kubernetes\/kubernetes"},{"key":"e_1_3_2_1_13_1","volume-title":"Proceedings of the MLSys. 230\u2013246","author":"Li Liam","year":"2020","unstructured":"Liam Li, Kevin Jamieson, Afshin Rostamizadeh, Ekaterina Gonina, Jonathan Ben-Tzur, Moritz Hardt, Benjamin Recht, and Ameet Talwalkar. 2020. A System for Massively Parallel Hyperparameter Tuning. In Proceedings of the MLSys. 230\u2013246."},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of the ICLR.","author":"Li Lisha","year":"2017","unstructured":"Lisha Li, Kevin\u00a0G Jamieson, Giulia DeSalvo, Afshin Rostamizadeh, and Ameet Talwalkar. 2017. Hyperband: Bandit-based configuration evaluation for hyperparameter optimization. In Proceedings of the ICLR."},{"key":"e_1_3_2_1_15_1","volume-title":"Proceedings of the VLDB. 607\u2013620","author":"Li Tian","year":"2018","unstructured":"Tian Li, Jie Zhong, Ji Liu, Wentao Wu, and Ce Zhang. 2018. Ease. ml: Towards multi-tenant resource sharing for machine learning workloads. In Proceedings of the VLDB. 607\u2013620."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357223.3362719"},{"key":"e_1_3_2_1_17_1","volume-title":"Proceedings of the USENIX ATC. 161\u2013175","author":"Lim Gangmuk","year":"2021","unstructured":"Gangmuk Lim, Jeongseob Ahn, Wencong Xiao, Youngjin Kwon, and Myeongjae Jeon. 2021. Zico: Efficient GPU Memory Sharing for Concurrent DNN Training. In Proceedings of the USENIX ATC. 161\u2013175."},{"key":"e_1_3_2_1_18_1","volume-title":"Proceedings of the USENIX Symposium on OSDI. 937\u2013954","author":"Mai Luo","year":"2020","unstructured":"Luo Mai, Guo Li, Marcel Wagenl\u00e4nder, Konstantinos Fertakis, Andrei-Octavian Brabete, and Peter Pietzuch. 2020. KungFu: Making Training in Distributed Machine Learning Adaptive. In Proceedings of the USENIX Symposium on OSDI. 937\u2013954."},{"key":"e_1_3_2_1_19_1","volume-title":"Fair division and collective welfare","author":"Moulin Herv\u00e9","unstructured":"Herv\u00e9 Moulin. 2003. Fair division and collective welfare. MIT press."},{"key":"e_1_3_2_1_20_1","unstructured":"[20] Gemini open source.2022. https:\/\/github.com\/NTHU-LSALAB\/Gemini"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3190508.3190517"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3135974.3135994"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"crossref","unstructured":"Aleksandrs Slivkins. 2019. Introduction to multi-armed bandits. arXiv preprint arXiv:1904.07272(2019).","DOI":"10.1561\/9781680836219"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/2806777.2806945"},{"key":"e_1_3_2_1_25_1","unstructured":"[25] Microsoft\u00a0Philly Trace.2022. https:\/\/github.com\/msr-fiddle\/philly-traces"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2019.8737583"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00094"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33015289"},{"key":"e_1_3_2_1_29_1","volume-title":"Proceedings of the USENIX Symposium on OSDI. 595\u2013610","author":"Xiao Wencong","year":"2018","unstructured":"Wencong Xiao, Romil Bhardwaj, Ramachandran Ramjee, Muthian Sivathanu, Nipun Kwatra, Zhenhua Han, Pratyush Patel, Xuan Peng, Hanyu Zhao, Quanlu Zhang, 2018. Gandiva: Introspective cluster scheduling for deep learning. In Proceedings of the USENIX Symposium on OSDI. 595\u2013610."},{"key":"e_1_3_2_1_30_1","volume-title":"Proceedings of the USENIX Symposium on OSDI. 533\u2013548","author":"Xiao Wencong","year":"2020","unstructured":"Wencong Xiao, Shiru Ren, Yong Li, Yang Zhang, Pengyang Hou, Zhi Li, Yihui Feng, Wei Lin, and Yangqing Jia. 2020. AntMan: Dynamic Scaling on GPU Clusters for Deep Learning. In Proceedings of the USENIX Symposium on OSDI. 533\u2013548."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3369583.3392679"},{"key":"e_1_3_2_1_32_1","volume-title":"Proceedings of the MLSys. 98\u2013111","author":"Yu Peifeng","year":"2020","unstructured":"Peifeng Yu and Mosharaf Chowdhury. 2020. Fine-grained GPU sharing primitives for deep learning applications. In Proceedings of the MLSys. 98\u2013111."},{"key":"e_1_3_2_1_33_1","volume-title":"Proceedings of the MLSys. 502\u2013516","author":"Yu Peifeng","year":"2021","unstructured":"Peifeng Yu, Jiachen Liu, and Mosharaf Chowdhury. 2021. Fluid: Resource-aware Hyperparameter Tuning Engine. In Proceedings of the MLSys. 502\u2013516."},{"key":"e_1_3_2_1_34_1","volume-title":"Proceedings of the USENIX Symposium on OSDI. 515\u2013532","author":"Zhao Hanyu","year":"2020","unstructured":"Hanyu Zhao, Zhenhua Han, Zhi Yang, Quanlu Zhang, Fan Yang, Lidong Zhou, Mao Yang, Francis\u00a0CM Lau, Yuqi Wang, Yifan Xiong, 2020. Hived: sharing a GPU cluster for deep learning with guarantees. In Proceedings of the USENIX Symposium on OSDI. 515\u2013532."}],"event":{"name":"ICPP '22: 51st International Conference on Parallel Processing","location":"Bordeaux France","acronym":"ICPP '22"},"container-title":["Proceedings of the 51st International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3545008.3545027","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3545008.3545027","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:02:43Z","timestamp":1750186963000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3545008.3545027"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,29]]},"references-count":34,"alternative-id":["10.1145\/3545008.3545027","10.1145\/3545008"],"URL":"https:\/\/doi.org\/10.1145\/3545008.3545027","relation":{},"subject":[],"published":{"date-parts":[[2022,8,29]]},"assertion":[{"value":"2023-01-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}