{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,16]],"date-time":"2026-01-16T07:04:55Z","timestamp":1768547095147,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":32,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,8,12]],"date-time":"2024-08-12T00:00:00Z","timestamp":1723420800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Key R&D Program of China","doi-asserted-by":"publisher","award":["2022YFB2901300"],"award-info":[{"award-number":["2022YFB2901300"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"Quan Cheng Laboratory","doi-asserted-by":"publisher","award":["QCLZD202304"],"award-info":[{"award-number":["QCLZD202304"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"Quan Cheng Laboratory","doi-asserted-by":"publisher","award":["SYS202201"],"award-info":[{"award-number":["SYS202201"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CNS-2225949"],"award-info":[{"award-number":["CNS-2225949"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CNS-2047719"],"award-info":[{"award-number":["CNS-2047719"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Natural Science Foundation of China","award":["62072117"],"award-info":[{"award-number":["62072117"]}]},{"name":"Shanghai Natural Science Foundation","award":["22ZR1407000"],"award-info":[{"award-number":["22ZR1407000"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,8,12]]},"DOI":"10.1145\/3673038.3673083","type":"proceedings-article","created":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T18:29:01Z","timestamp":1723141741000},"page":"357-366","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Online Scheduling and Pricing for Multi-LoRA Fine-Tuning Tasks"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8823-2460","authenticated-orcid":false,"given":"Ying","family":"Zheng","sequence":"first","affiliation":[{"name":"Fudan University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3964-3172","authenticated-orcid":false,"given":"Lei","family":"Jiao","sequence":"additional","affiliation":[{"name":"University of Oregon, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-4154-5267","authenticated-orcid":false,"given":"Han","family":"Yang","sequence":"additional","affiliation":[{"name":"Tsinghua University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5108-8116","authenticated-orcid":false,"given":"Lulu","family":"Chen","sequence":"additional","affiliation":[{"name":"Fudan University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-8003-817X","authenticated-orcid":false,"given":"Ying","family":"Liu","sequence":"additional","affiliation":[{"name":"Tsinghua University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-0269-3214","authenticated-orcid":false,"given":"Yuxiao","family":"Wang","sequence":"additional","affiliation":[{"name":"Fudan University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4168-3998","authenticated-orcid":false,"given":"Yuedong","family":"Xu","sequence":"additional","affiliation":[{"name":"Fudan University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5753-8096","authenticated-orcid":false,"given":"Xin","family":"Wang","sequence":"additional","affiliation":[{"name":"Fudan University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5351-2075","authenticated-orcid":false,"given":"Zongpeng","family":"Li","sequence":"additional","affiliation":[{"name":"Tsinghua University, China"}]}],"member":"320","published-online":{"date-parts":[[2024,8,12]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.5555\/1643707"},{"key":"e_1_3_2_2_2_1","unstructured":"Tim Dettmers Artidoro Pagnoni 2023. QLoRA: Efficient Finetuning of Quantized LLMs. In NeurIPS."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF01719736"},{"key":"e_1_3_2_2_4_1","volume-title":"Titan: A Scheduler for Foundation Model Fine-tuning Workloads. In ACM SoCC.","author":"Gao Wei","year":"2022","unstructured":"Wei Gao, Peng Sun, 2022. Titan: A Scheduler for Foundation Model Fine-tuning Workloads. In ACM SoCC."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/1496091.1496103"},{"key":"e_1_3_2_2_6_1","volume-title":"LoRA: Low-Rank Adaptation of Large Language Models. arXiv preprint arXiv:2106.09685","author":"Hu J","year":"2021","unstructured":"Edward\u00a0J Hu, Yelong Shen, 2021. LoRA: Low-Rank Adaptation of Large Language Models. arXiv preprint arXiv:2106.09685 (2021)."},{"key":"e_1_3_2_2_7_1","volume-title":"Characterization and Prediction of Deep Learning Workloads in Large-Scale GPU Datacenters","author":"Hu Qinghao","unstructured":"Qinghao Hu, Peng Sun, 2021. Characterization and Prediction of Deep Learning Workloads in Large-Scale GPU Datacenters. In ACM\/IEEE SC."},{"key":"e_1_3_2_2_8_1","unstructured":"[8] Hugging Face Fine-Tunig Data Format. [n. d.]. https:\/\/huggingface.co\/docs\/autotrain\/llm_finetuning."},{"key":"e_1_3_2_2_9_1","unstructured":"[9] Hugging Face Models. [n. d.]. https:\/\/huggingface.co\/models."},{"key":"e_1_3_2_2_10_1","unstructured":"[10] Hugging Face Pricing. [n. d.]. https:\/\/huggingface.co\/pricing#spaces."},{"key":"e_1_3_2_2_11_1","unstructured":"Changho Hwang Taehyun Kim 2021. Elastic Resource Sharing for Distributed Deep Learning. In USENIX NSDI."},{"key":"e_1_3_2_2_12_1","unstructured":"Myeongjae Jeon Shivaram Venkataraman 2019. Analysis of Large-Scale Multi-Tenant GPU Clusters for DNN Training Workloads. In USENIX ATC."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1287\/msom.2019.0832"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2019.2955935"},{"key":"e_1_3_2_2_15_1","volume-title":"DoRA: Weight-Decomposed Low-Rank Adaptation. arXiv preprint arXiv:2402.09353","author":"Liu Shih-Yang","year":"2024","unstructured":"Shih-Yang Liu, Chien-Yi Wang, 2024. DoRA: Weight-Decomposed Low-Rank Adaptation. arXiv preprint arXiv:2402.09353 (2024)."},{"key":"e_1_3_2_2_16_1","volume-title":"Themis: Fair and Efficient GPU Cluster Scheduling. In USENIX NSDI.","author":"Mahajan Kshiteej","year":"2020","unstructured":"Kshiteej Mahajan, Arjun Balasubramanian, 2020. Themis: Fair and Efficient GPU Cluster Scheduling. In USENIX NSDI."},{"key":"e_1_3_2_2_17_1","unstructured":"[17] Microsoft Azure Labeling Service. [n. d.]. https:\/\/learn.microsoft.com\/en-us\/azure\/machine-learning\/how-to-outsource-data-labeling?view=azureml-api-2."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2023.3333368"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3190508.3190517"},{"key":"e_1_3_2_2_20_1","volume-title":"Pollux: Co-Adaptive Cluster Scheduling for Goodput-Optimized Deep Learning. In USENIX OSDI.","author":"Qiao Aurick","year":"2021","unstructured":"Aurick Qiao, Sang\u00a0Keun Choe, 2021. Pollux: Co-Adaptive Cluster Scheduling for Goodput-Optimized Deep Learning. In USENIX OSDI."},{"key":"e_1_3_2_2_21_1","volume-title":"Minimizing Electricity Cost: Optimization of Distributed Internet Data Centers in a Multi-Electricity-Market Environment","author":"Rao Lei","unstructured":"Lei Rao, Xue Liu, 2010. Minimizing Electricity Cost: Optimization of Distributed Internet Data Centers in a Multi-Electricity-Market Environment. In IEEE INFOCOM."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"crossref","unstructured":"Suraiya Tairin Haiying Shen 2023. Embracing Uncertainty for Equity in Resource Allocation in ML Training. In ICPP.","DOI":"10.1145\/3605573.3605583"},{"key":"e_1_3_2_2_23_1","volume-title":"LLAMA 2: Open Foundation and Fine-Tuned Chat Models. arXiv preprint arXiv:2307.09288","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Louis Martin, Kevin Stone, 2023. LLAMA 2: Open Foundation and Fine-Tuned Chat Models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_2_24_1","unstructured":"Ashish Vaswani Noam Shazeer 2017. Attention is All You Need. In NIPS."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2022.3180772"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"crossref","unstructured":"Qizhen Weng Wencong Xiao 2022. MLaaS in the wild: Workload analysis and scheduling in Large-Scaleheterogeneous GPU clusters. In USENIX NSDI.","DOI":"10.21203\/rs.3.rs-2266264\/v1"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.comnet.2015.02.028"},{"key":"e_1_3_2_2_28_1","volume-title":"ASPEN: High-Throughput LoRA Fine-Tuning of Large Language Models with a Single GPU. arXiv preprint arXiv:2312.02515","author":"Ye Zhengmao","year":"2023","unstructured":"Zhengmao Ye, Dengchun Li, 2023. ASPEN: High-Throughput LoRA Fine-Tuning of Large Language Models with a Single GPU. arXiv preprint arXiv:2312.02515 (2023)."},{"key":"e_1_3_2_2_29_1","unstructured":"Qingru Zhang Minshuo Chen 2022. Adaptive Budget Allocation for Parameter-Efficient Fine-Tuning. In ICLR."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"crossref","unstructured":"Qin Zhang Ruiting Zhou 2020. Online Scheduling of Heterogeneous Distributed Machine Learning Jobs. In ACM MOBIHOC.","DOI":"10.1145\/3397166.3409128"},{"key":"e_1_3_2_2_31_1","volume-title":"LLaMA-Adapter: Efficient Fine-tuning of Language Models with Zero-init Attention. arXiv preprint arXiv:2303.16199","author":"Zhang Renrui","year":"2023","unstructured":"Renrui Zhang, Jiaming Han, 2023. LLaMA-Adapter: Efficient Fine-tuning of Language Models with Zero-init Attention. arXiv preprint arXiv:2303.16199 (2023)."},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCC.2022.3143153"}],"event":{"name":"ICPP '24: the 53rd International Conference on Parallel Processing","location":"Gotland Sweden","acronym":"ICPP '24"},"container-title":["Proceedings of the 53rd International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3673038.3673083","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3673038.3673083","content-type":"text\/html","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3673038.3673083","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3673038.3673083","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,23]],"date-time":"2025-09-23T17:28:56Z","timestamp":1758648536000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3673038.3673083"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,12]]},"references-count":32,"alternative-id":["10.1145\/3673038.3673083","10.1145\/3673038"],"URL":"https:\/\/doi.org\/10.1145\/3673038.3673083","relation":{},"subject":[],"published":{"date-parts":[[2024,8,12]]},"assertion":[{"value":"2024-08-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}