{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T07:54:38Z","timestamp":1775634878644,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":27,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819584017","type":"print"},{"value":"9789819584024","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-8402-4_14","type":"book-chapter","created":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T07:18:37Z","timestamp":1775632717000},"page":"267-287","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["DeFragS: Mitigating Resource Fragmentation in\u00a0GPU Clusters Through Spatial-Temporal Scheduling"],"prefix":"10.1007","author":[{"given":"Haifeng","family":"Ni","sequence":"first","affiliation":[]},{"given":"Bowen","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Zhuozhao","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,4,9]]},"reference":[{"key":"14_CR1","doi-asserted-by":"crossref","unstructured":"Silver, D., et\u00a0al.: Mastering the game of go without human knowledge. Nature 550(7676), 354\u2013359 (2017)","DOI":"10.1038\/nature24270"},{"issue":"2","key":"14_CR2","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3485128","volume":"55","author":"D Rolnick","year":"2022","unstructured":"Rolnick, D., et al.: Tackling climate change with machine learning. ACM Comput. Surv. (CSUR) 55(2), 1\u201396 (2022)","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"14_CR3","doi-asserted-by":"crossref","unstructured":"Gu, S., Holly, E., Lillicrap, T., Levine, S.: Deep reinforcement learning for robotic manipulation with asynchronous off-policy updates. In: 2017 IEEE International Conference on Robotics and Automation (ICRA), pp. 3389\u20133396. IEEE (2017)","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"14_CR4","unstructured":"Jeon, M., Venkataraman, S., Phanishayee, A., Qian, J., Xiao, W., Yang, F.: Analysis of $$\\{$$Large-Scale$$\\}$$$$ \\{$$Multi-Tenant$$\\}$$$$\\{$$GPU$$\\}$$ clusters for $$\\{$$DNN$$\\}$$ training workloads. In: 2019 USENIX Annual Technical Conference (USENIX ATC 19), pp. 947\u2013960 (2019)"},{"key":"14_CR5","unstructured":"Weng, Q., et al.: $$\\{$$MLaaS$$\\}$$ in the wild: workload analysis and scheduling in $$\\{$$Large-Scale$$\\}$$ heterogeneous $$\\{$$GPU$$\\}$$ clusters. In: 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22), pp. 945\u2013960 (2022)"},{"key":"14_CR6","doi-asserted-by":"crossref","unstructured":"Hu, Q., Sun, P., Yan, S., Wen, Y., Zhang, T.: Characterization and prediction of deep learning workloads in large-scale GPU datacenters. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201315 (2021)","DOI":"10.1145\/3458817.3476223"},{"key":"14_CR7","doi-asserted-by":"crossref","unstructured":"Luo, L., Nelson, J., Ceze, L., Phanishayee, A., Krishnamurthy, A.: Parameter hub: a rack-scale parameter server for distributed deep neural network training. In: Proceedings of the ACM Symposium on Cloud Computing, pp. 41\u201354 (2018)","DOI":"10.1145\/3267809.3267840"},{"key":"14_CR8","doi-asserted-by":"crossref","unstructured":"Feitelson, D.G.: Packing schemes for gang scheduling. In: Workshop on Job Scheduling Strategies for Parallel Processing, pp. 89\u2013110. Springer (1996)","DOI":"10.1007\/BFb0022289"},{"key":"14_CR9","unstructured":"Gu, J., et al.: Tiresias: a $$\\{$$GPU$$\\}$$ cluster manager for distributed deep learning. In: 16th USENIX Symposium on Networked Systems Design and Implementation (NSDI 19), pp. 485\u2013500 (2019)"},{"key":"14_CR10","unstructured":"Xiao, W., et\u00a0al.: Gandiva: introspective cluster scheduling for deep learning. In: 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18), pp. 595\u2013610 (2018)"},{"key":"14_CR11","doi-asserted-by":"crossref","unstructured":"Luan, Y., Chen, X., Zhao, H., Yang, Z., Dai, Y.: Sched$$^2$$: scheduling deep learning training via deep reinforcement learning. In: 2019 IEEE Global Communications Conference (GLOBECOM), pp.\u00a01\u20137. IEEE (2019)","DOI":"10.1109\/GLOBECOM38437.2019.9014110"},{"key":"14_CR12","unstructured":"Weng, Q., et al.: Beware of fragmentation: scheduling $$\\{$$GPU-Sharing$$\\}$$ workloads with fragmentation gradient descent. In: 2023 USENIX Annual Technical Conference (USENIX ATC 23), pp. 995\u20131008 (2023)"},{"key":"14_CR13","unstructured":"Hadary, O., et\u00a0al.: Protean:$$\\{$$VM$$\\}$$ allocation service at scale. In: 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20), pp. 845\u2013861 (2020)"},{"issue":"4","key":"14_CR14","doi-asserted-by":"publisher","first-page":"455","DOI":"10.1145\/2740070.2626334","volume":"44","author":"R Grandl","year":"2014","unstructured":"Grandl, R., Ananthanarayanan, G., Kandula, S., Rao, S., Akella, A.: Multi-resource packing for cluster schedulers. ACM SIGCOMM Comput. Commun. Rev. 44(4), 455\u2013466 (2014)","journal-title":"ACM SIGCOMM Comput. Commun. Rev."},{"key":"14_CR15","unstructured":"Zhao, H., et\u00a0al.: $$\\{$$HiveD$$\\}$$: sharing a $$\\{$$GPU$$\\}$$ cluster for deep learning with guarantees. In: 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20), pp. 515\u2013532 (2020)"},{"key":"14_CR16","doi-asserted-by":"crossref","unstructured":"Vavilapalli, V.K., et\u00a0al.: Apache hadoop yarn: yet another resource negotiator. In: Proceedings of the 4th annual Symposium on Cloud Computing, pp. 1\u201316 (2013)","DOI":"10.1145\/2523616.2523633"},{"key":"14_CR17","unstructured":"Zaharia, M., Chowdhury, M., Franklin, M.J., Shenker, S., Stoica, I.: Spark: cluster computing with working sets. In: 2nd USENIX Workshop on Hot Topics in Cloud Computing (HotCloud 10) (2010)"},{"key":"14_CR18","volume-title":"Connection scheduling in web servers","author":"ME Crovella","year":"1999","unstructured":"Crovella, M.E., Frangioso, R., Harchol-Balter, M.: Connection scheduling in web servers. Tech. rep, Citeseer (1999)"},{"issue":"12","key":"14_CR19","first-page":"42","volume":"5","author":"M Hamayun","year":"2015","unstructured":"Hamayun, M., Khurshid, H.: An optimized shortest job first scheduling algorithm for CPU scheduling. J. Appl. Environ. Biol. Sci. 5(12), 42\u201346 (2015)","journal-title":"J. Appl. Environ. Biol. Sci."},{"key":"14_CR20","unstructured":"Lee, S., et al.: Validating heuristics for virtual machines consolidation. Microsoft Research, MSR-TR-2011-9, pp. 1\u201314 (2011)"},{"key":"14_CR21","unstructured":"Panigrahy, R., Talwar, K., Uyeda, L., Wieder, U.: Heuristics for vector bin packing. research. microsoft.com (2011)"},{"key":"14_CR22","doi-asserted-by":"crossref","unstructured":"Sultana, A., Chen, L., Xu, F., Yuan, X.: E-las: design and analysis of completion-time agnostic scheduling for distributed deep learning cluster. In: Proceedings of the 49th International Conference on Parallel Processing, pp. 1\u201311 (2020)","DOI":"10.1145\/3404397.3404415"},{"key":"14_CR23","doi-asserted-by":"crossref","unstructured":"Peng, Y., Bao, Y., Chen, Y., Wu, C., Guo, C.: Optimus: an efficient dynamic resource scheduler for deep learning clusters. In: Proceedings of the Thirteenth EuroSys Conference, pp. 1\u201314 (2018)","DOI":"10.1145\/3190508.3190517"},{"key":"14_CR24","unstructured":"Shukla, D., et\u00a0al.: Singularity: planet-scale, preemptive and elastic scheduling of ai workloads. arXiv preprint arXiv:2202.07848 (2022)"},{"issue":"3","key":"14_CR25","doi-asserted-by":"publisher","first-page":"212","DOI":"10.4103\/0256-4602.81230","volume":"28","author":"A Murtazaev","year":"2011","unstructured":"Murtazaev, A., Oh, S.: Sercon: server consolidation algorithm using live migration of virtual machines for green computing. IETE Tech. Rev. 28(3), 212\u2013231 (2011)","journal-title":"IETE Tech. Rev."},{"key":"14_CR26","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1016\/j.future.2014.09.009","volume":"50","author":"KS Rao","year":"2015","unstructured":"Rao, K.S., Thilagam, P.S.: Heuristics based server consolidation with residual resource defragmentation in cloud data centers. Futur. Gener. Comput. Syst. 50, 87\u201398 (2015)","journal-title":"Futur. Gener. Comput. Syst."},{"key":"14_CR27","doi-asserted-by":"crossref","unstructured":"Zhu, J., et al.: PHECON: fine-grained VM consolidation with nimble resource defragmentation in public cloud platforms. In: Proceedings of the 53rd International Conference on Parallel Processing, pp. 712\u2013721 (2024)","DOI":"10.1145\/3673038.3673139"}],"container-title":["Lecture Notes in Computer Science","Algorithms and Architectures for Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-8402-4_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T07:18:47Z","timestamp":1775632727000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-8402-4_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819584017","9789819584024"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-8402-4_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"9 April 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICA3PP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Algorithms and Architectures for Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Zhengzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 November 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ica3pp2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ieee-cybermatics.org\/2025\/ica3pp\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}