{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T05:03:42Z","timestamp":1773378222162,"version":"3.50.1"},"reference-count":61,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100021171","name":"Basic and Applied Basic Research Foundation of Guangdong Province","doi-asserted-by":"publisher","award":["2024A1515011333"],"award-info":[{"award-number":["2024A1515011333"]}],"id":[{"id":"10.13039\/501100021171","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Guangdong Provincial Young Innovative Talent Program"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Netw. Serv. Manage."],"published-print":{"date-parts":[[2026]]},"DOI":"10.1109\/tnsm.2026.3663316","type":"journal-article","created":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T21:05:09Z","timestamp":1770757509000},"page":"2528-2542","source":"Crossref","is-referenced-by-count":0,"title":["REACH: Reinforcement Learning for Efficient Allocation in Community and Heterogeneous Networks"],"prefix":"10.1109","volume":"23","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-7776-4985","authenticated-orcid":false,"given":"Zhiwei","family":"Yu","sequence":"first","affiliation":[{"name":"Computer Science and Control Engineering, Shenzhen University of Advanced Technology, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-5313-7750","authenticated-orcid":false,"given":"Chengze","family":"Du","sequence":"additional","affiliation":[{"name":"Computer Science and Control Engineering, Shenzhen University of Advanced Technology, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Heng","family":"Xu","sequence":"additional","affiliation":[{"name":"Computer Science and Control Engineering, Shenzhen University of Advanced Technology, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ying","family":"Zhou","sequence":"additional","affiliation":[{"name":"School of Electronic Information Engineering, Beijing Jiaotong University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8695-7342","authenticated-orcid":false,"given":"Bo","family":"Liu","sequence":"additional","affiliation":[{"name":"Computer Science and Control Engineering, Shenzhen University of Advanced Technology, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3416-5551","authenticated-orcid":false,"given":"Jialong","family":"Li","sequence":"additional","affiliation":[{"name":"Computer Science and Control Engineering, Shenzhen University of Advanced Technology, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","volume-title":"Pricing for GPU Instances","year":"2025"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/s10723-019-09497-9"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/581571.581573"},{"key":"ref4","article-title":"PaLM 2 technical report","author":"Anil","year":"2023","journal-title":"arXiv:2305.10403"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW52791.2021.00144"},{"key":"ref6","volume-title":"How Much VRAM Do You Need for Blender?","author":"Bach","year":"2024"},{"key":"ref7","volume-title":"The Untapped Potential of Idle GPUs","author":"Bains","year":"2025"},{"key":"ref8","volume-title":"Slurm on Kubernetes","year":"2021"},{"key":"ref9","article-title":"Language models are few-shot learners","author":"Brown","year":"2020","journal-title":"arXiv:2005.14165"},{"key":"ref10","first-page":"1","article-title":"Gandivafair: A fair GPU cluster scheduler for deep learning workloads","volume-title":"Proc. 15th Eur. Conf. Comput. Syst.","author":"Chaudhary"},{"key":"ref11","first-page":"3010","article-title":"Elastic deep learning in multi-tenant GPU clusters","volume":"32","author":"Cheng","year":"2021","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"ref12","article-title":"GPU-as-a-service, harvesting idle capacity, and the rise of alternative processing units","author":"Chong","year":"2025"},{"key":"ref13","article-title":"PaLM: Scaling language modeling with pathways","author":"Chowdhery","year":"2022","journal-title":"arXiv:2204.02311"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3731569.3764818"},{"key":"ref15","article-title":"The promise of analog deep learning: Recent advances, challenges and opportunities","author":"Datar","year":"2024","journal-title":"arXiv:2406.12911"},{"key":"ref16","article-title":"QLoRA: Efficient finetuning of quantized LLMs","author":"Dettmers","year":"2023","journal-title":"arXiv:2305.14314"},{"key":"ref17","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018","journal-title":"arXiv:1810.04805"},{"key":"ref18","article-title":"Temporal-aware GPU resource allocation for distributed LLM inference via reinforcement learning","author":"Du","year":"2025","journal-title":"arXiv:2507.10259"},{"key":"ref19","article-title":"PLATONT: Learning a platonic representation for unified network tomography","author":"Du","year":"2025","journal-title":"arXiv:2511.15251"},{"key":"ref20","article-title":"Measuring GPU utilization one level deeper","author":"Elvinger","year":"2025","journal-title":"arXiv:2501.16909"},{"key":"ref21","first-page":"109","article-title":"Scheduling strategies for BOINC projects: A case study with SimBA","volume-title":"Proc. 2nd IEEE Int. Conf. E-Sci. Grid Comput. (E-Sci.)","author":"Estrada"},{"key":"ref22","first-page":"189","article-title":"Topology-aware GPU scheduling for learning workloads in cloud environments","volume-title":"Proc. IEEE\/ACM Int. Conf. Utility Cloud Comput. (UCC)","author":"Garcia"},{"key":"ref23","article-title":"The llama 3 herd of models","author":"Grattafiori","year":"2024","journal-title":"arXiv:2407.21783"},{"key":"ref24","article-title":"Deep reinforcement learning for job scheduling and resource management in cloud computing: An algorithm-level review","author":"Gu","year":"2025","journal-title":"arXiv:2501.01007"},{"key":"ref25","first-page":"1","article-title":"Network simulations with the NS-3 simulator","volume-title":"Proc. ACM CoNEXT Conf.","author":"Henderson"},{"key":"ref26","first-page":"85","article-title":"ARK: GPU-driven execution for distributed deep learning","volume-title":"Proc. 20th USENIX Symp. Networked Syst. Design Implement. (NSDI)","author":"Hwang"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/s10586-024-04893-7"},{"key":"ref28","first-page":"947","article-title":"Analysis of large-scale multi-tenant GPU clusters for dnn training workloads","volume-title":"Proc. USENIX Annu. Tech. Conf. (ATC)","author":"Jeon"},{"issue":"11","key":"ref29","doi-asserted-by":"crossref","first-page":"4697","DOI":"10.3390\/app14114697","article-title":"A genetic algorithm-based scheduling method for optimizing GPU utilization in multi-tenant cloud environments","volume":"14","author":"Kim","year":"2024","journal-title":"Appl. Sci."},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-earth-040809-152348"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1016\/b978-0-12-820488-7.00025-6"},{"issue":"1","key":"ref32","first-page":"153","article-title":"Volunteer computing on mobile devices: State of the art and future research directions","volume":"7","author":"L\u2019Hanafi","year":"2016","journal-title":"Enabling Real-Time Mobile Cloud Computing Through Emerging Technologies"},{"key":"ref33","article-title":"Optimizing mixture-of-experts inference time combining model deployment and communication scheduling","author":"Li","year":"2024","journal-title":"arXiv:2410.17043"},{"issue":"11","key":"ref34","first-page":"3194","article-title":"Astraea: A fair deep learning scheduler for multi-tenant GPU clusters","volume":"33","author":"Li","year":"2022","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/comst.2021.3106401"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/IWQoS61813.2024.10682877"},{"key":"ref37","first-page":"63","article-title":"Themis: Fair and efficient GPU cluster scheduling for ML training","volume-title":"Proc. 17th USENIX Symp. Networked Syst. Design Implement.","author":"Mahajan"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/3721145.3728488"},{"key":"ref39","first-page":"481","article-title":"Gavel: Heterogeneity-aware cluster scheduling policies for deep learning workloads","volume-title":"Proc. 14th USENIX Symp. Operating Syst. Design Implement.","author":"Narayanan"},{"key":"ref40","article-title":"GPT-4 technical report","author":"Achiam","year":"2023","journal-title":"arXiv:2303.08774"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/3563766.3564096"},{"key":"ref42","article-title":"Robust speech recognition via large-scale weak supervision","author":"Radford","year":"2022","journal-title":"arXiv:2212.04356"},{"key":"ref43","first-page":"1","article-title":"CASSINI: Network-aware job scheduling for ML training","volume-title":"Proc. 21st USENIX Symp. Networked Syst. Design Implement.","author":"Rajasekaran"},{"key":"ref44","article-title":"High-resolution image synthesis with latent diffusion models","author":"Rombach","year":"2021","journal-title":"arXiv:2112.10752"},{"key":"ref45","first-page":"1","article-title":"Towards topology aware pre-emptive job scheduling with deep reinforcement learning","volume-title":"Proc. 30th Annu. Int. Conf. Comput. Sci. Softw. Eng.","author":"Ryu"},{"key":"ref46","article-title":"DistilBERT, a distilled version of BERT: Smaller, faster, cheaper and lighter","author":"Sanh","year":"2019","journal-title":"arXiv:1910.01108"},{"key":"ref47","article-title":"Will LLMs scaling hit the wall? Breaking barriers via distributed resources on massive edge devices","author":"Shen","year":"2025","journal-title":"arXiv:2503.08223"},{"key":"ref48","article-title":"SDXL: Improving latent diffusion models for high-resolution image synthesis","volume-title":"arXiv:2307.01952","author":"Podell","year":"2023"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1145\/3627703.3629578"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS57955.2024.00066"},{"key":"ref51","volume-title":"Saladcloud: Rent and Share GPUS","year":"2025"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/71.993206"},{"key":"ref53","article-title":"LLaMA: Open and efficient foundation language models","author":"Touvron","year":"2023","journal-title":"arXiv:2302.13971"},{"key":"ref54","volume-title":"GPU Marketplace Offerings Data, 2025","year":"2025"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1145\/3645102"},{"key":"ref56","first-page":"993","article-title":"Taming GPU fragmentation in large-scale ML clusters with fragmentation gradient descent","volume-title":"Proc. USENIX Annu. Tech. Conf.","author":"Weng"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1145\/2774993.2775012"},{"key":"ref58","first-page":"847","article-title":"GPU-disaggregated serving for deep learning recommendation models at scale","volume-title":"Proc. 22nd USENIX Symp. Networked Syst. Design Implement. (NSDI)","author":"Yang"},{"key":"ref59","first-page":"383","article-title":"Salus: Fine-grained GPU sharing primitives for deep learning applications","volume-title":"Proc. 3rd MLSys Conf.","author":"Yu"},{"issue":"10","key":"ref60","first-page":"2846","article-title":"TAG: An automatic framework for topology-aware and heterogeneity-aware distributed DNN training","volume":"34","author":"Zhang","year":"2023","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"ref61","article-title":"A survey on scheduling in deep learning systems","author":"Zheng","year":"2022","journal-title":"arXiv:2205.11913"}],"container-title":["IEEE Transactions on Network and Service Management"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/4275028\/11319294\/11390693.pdf?arnumber=11390693","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T20:36:54Z","timestamp":1773347814000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11390693\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"references-count":61,"URL":"https:\/\/doi.org\/10.1109\/tnsm.2026.3663316","relation":{},"ISSN":["1932-4537","2373-7379"],"issn-type":[{"value":"1932-4537","type":"electronic"},{"value":"2373-7379","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]}}}