{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T09:17:54Z","timestamp":1773911874872,"version":"3.50.1"},"reference-count":59,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Open J. Comput. Soc."],"published-print":{"date-parts":[[2026]]},"DOI":"10.1109\/ojcs.2026.3667549","type":"journal-article","created":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T21:00:26Z","timestamp":1771966826000},"page":"560-573","source":"Crossref","is-referenced-by-count":0,"title":["LLM-Driven Adaptive Cloud Resource Scheduling: Bridging Reasoning Intelligence With Optimization Guarantees"],"prefix":"10.1109","volume":"7","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-2201-732X","authenticated-orcid":false,"given":"Guanyu","family":"Ding","sequence":"first","affiliation":[{"name":"New York University, New York, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8173-5120","authenticated-orcid":false,"given":"Shiyu","family":"Yang","sequence":"additional","affiliation":[{"name":"University of California, Los Angeles, Los Angeles, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-5165-9264","authenticated-orcid":false,"given":"Han","family":"Lin","sequence":"additional","affiliation":[{"name":"University of Wisconsin&#x2013;Madison, Madison, WI, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-5460-8519","authenticated-orcid":false,"given":"Zifan","family":"Chen","sequence":"additional","affiliation":[{"name":"University of Pennsylvania, Philadelphia, PA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9826-4252","authenticated-orcid":false,"given":"Jie Si","family":"Yang","sequence":"additional","affiliation":[{"name":"The University of Utah, Salt Lake City, UT, USA"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/1721654.1721672"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/s13174-010-0007-6"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-01741-4"},{"key":"ref4","first-page":"14","article-title":"Taxonomy and survey of scheduling algorithms in cloud computing","volume":"53","author":"Delgado","year":"2015","journal-title":"J. Netw. Comput. Appl."},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/2741948.2741964"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/2465351.2465386"},{"key":"ref7","first-page":"295","article-title":"Mesos: A platform for fine-grained resource sharing in the data center","volume-title":"Proc. USENIX Symp. Netw. Syst. Des. Implementation","author":"Hindman","year":"2011"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/2523616.2523633"},{"key":"ref9","first-page":"99","article-title":"Firmament: Fast, centralized cluster scheduling at scale","volume-title":"Proc. USENIX Symp. Operating Syst. Des. Implementation","author":"Gog","year":"2016"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3132747.3132772"},{"key":"ref11","first-page":"65","article-title":"Altruistic scheduling in multi-resource clusters","volume-title":"Proc. USENIX Symp. Operating Syst. Des. Implementation","author":"Grandl","year":"2016"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3005745.3005750"},{"key":"ref13","first-page":"1460","article-title":"Deep learning with long short-term memory networks for cloud workload prediction","volume-title":"Proc. IEEE Int. Conf. Distrib. Comput. Syst.","author":"Zhang","year":"2018"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/tpds.2021.3052895"},{"issue":"4","key":"ref15","first-page":"1","article-title":"Understanding data storage and ingestion for large-scale deep learning training workloads","volume":"17","author":"Chen","year":"2021","journal-title":"ACM Trans. Storage"},{"key":"ref16","first-page":"936","article-title":"Sage: Practical and scalable ML-driven performance debugging in microservices","volume-title":"Proc. ACM Int. Conf. Architectural Support Program. Lang. Operating Syst.","author":"Zheng","year":"2022"},{"key":"ref17","first-page":"945","article-title":"MLaaS in the wild: Workload analysis and scheduling in large-scale heterogeneous GPU clusters","volume-title":"Proc. USENIX Symp. Netw. Syst. Des. Implementation","author":"Weng","year":"2022"},{"key":"ref18","article-title":"GPT-4 technical report","year":"2023","journal-title":"OpenAI"},{"key":"ref19","article-title":"The Claude 3 model family: Opus, Sonnet, Haiku","volume-title":"Anthropic","year":"2024"},{"key":"ref20","article-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023"},{"key":"ref21","article-title":"Sparks of artificial general intelligence: Early experiments with GPT-4","author":"Bubeck","year":"2023"},{"key":"ref22","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Brown","year":"2020"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.64"},{"key":"ref24","first-page":"24824","article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Wei","year":"2022"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3560815"},{"key":"ref26","article-title":"Evaluating large language models trained on code","author":"Chen","year":"2021"},{"key":"ref27","first-page":"405","article-title":"LLMAO: Optimizing cloud systems with large language models","volume-title":"Proc. Workshop Mach. Learn. Syst. NeurIPS","author":"Chen","year":"2023"},{"key":"ref28","article-title":"Large language models for workflow optimization","author":"Yang","year":"2023"},{"key":"ref29","first-page":"75993","article-title":"On the planning abilities of large language modelsA critical investigation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Valmeekam","year":"2023"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541941"},{"key":"ref31","article-title":"Position: LLMs can\u2019t plan, but can help planning in LLM-modulo frameworks","volume-title":"Proc. 41st Int. Conf. Mach. Learn.","author":"Kambhampati","year":"2024"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.938"},{"key":"ref33","article-title":"Kubernetes: Production-grade container orchestration","volume-title":"Kubernetes","year":"2024"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/2890784"},{"key":"ref35","first-page":"323","article-title":"Dominant resource fairness: Fair allocation of multiple resource types","volume-title":"Proc. USENIX Symp. Netw. Syst. Des. Implementation","author":"Ghodsi","year":"2011"},{"key":"ref36","first-page":"1","article-title":"Tetris: Multi-resource packing for cluster schedulers","volume-title":"Proc. ACM Eur. Conf. Comput. Syst.","author":"Tumanov","year":"2014"},{"key":"ref37","first-page":"444","article-title":"Resource bundling for parallel jobs: Tradeoffs and algorithms","volume-title":"Proc. IEEE\/ACM Int. Symp. Cluster Cloud Grid Comput.","author":"Tang","year":"2016"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/3341302.3342080"},{"key":"ref39","first-page":"363","article-title":"Ernest: Efficient performance prediction for large-scale advanced analytics","volume-title":"Proc. USENIX Symp. Netw. Syst. Des. Implementation","author":"Venkataraman","year":"2016"},{"key":"ref40","first-page":"127","article-title":"Neural network based resource allocation in cloud computing environments","volume-title":"Proc. IEEE Int. Conf. Cloud Comput.","author":"Zhang","year":"2019"},{"key":"ref41","first-page":"609","article-title":"Chronus: A novel deadline-aware scheduler for deep learning training jobs","volume-title":"Proc. ACM Symp. Cloud Comput.","author":"Wu","year":"2021"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1145\/3445814.3446693"},{"issue":"3","key":"ref43","first-page":"1876","article-title":"Uncertainty quantification for deep learning-based resource management","volume":"10","author":"Hu","year":"2021","journal-title":"IEEE Trans. Cloud Comput."},{"key":"ref44","article-title":"The next decade in ai: Four steps towards robust artificial intelligence","author":"Marcus","year":"2022"},{"key":"ref45","first-page":"148","article-title":"Large language models for cloud systems: Opportunities and challenges","volume-title":"Proc. Workshop Hot Topics Operating Syst.","author":"Kim","year":"2023"},{"key":"ref46","first-page":"985","article-title":"Cilantro: Performance-aware resource allocation for general objectives via online feedback","volume-title":"Proc. USENIX Symp. Operating Syst. Des. Implementation","author":"Narayanan","year":"2020"},{"key":"ref47","first-page":"339","article-title":"Harmony: Overcoming the hurdles of GPU memory capacity to train massive DNN models on commodity servers","volume-title":"Proc. USENIX Annu. Tech. Conf.","author":"Liu","year":"2022"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1126\/science.abq1158"},{"key":"ref49","first-page":"4850","article-title":"D-Bot: Database diagnosis system using large language models","volume-title":"Proc. ACM Int. Conf. Inf. Knowl. Manage.","author":"Ding","year":"2023"},{"key":"ref50","first-page":"2335","article-title":"Large language models for traffic signal control","volume-title":"Proc. Workshop Large Lang. Models Auton. Driving NeurIPS","author":"Ji","year":"2023"},{"key":"ref51","article-title":"Can foundation models help us achieve perfect query optimization?","author":"Liu","year":"2023"},{"key":"ref52","first-page":"33","article-title":"Automatic configuration tuning with large language models","volume-title":"Proc. Workshop ML Syst. NeurIPS","author":"Wang","year":"2023"},{"key":"ref53","article-title":"AutoAdmin: Automatic database administration with large language models","author":"Fan","year":"2023"},{"key":"ref54","article-title":"On the tool manipulation capability of open-source large language models","volume-title":"Proc. NeurIPS Found. Models Decis. Mak. Workshop","author":"Xu","year":"2023"},{"key":"ref55","article-title":"TrustLLM: Trustworthiness in large language models","author":"Liu","year":"2024"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1016\/S0022-0000(75)80008-0"},{"issue":"114","key":"ref57","article-title":"Google cluster-usage traces: Format + schema","volume":"1","author":"Reiss","year":"2011","journal-title":"Google Inc., White Paper"},{"issue":"1","key":"ref58","first-page":"1","article-title":"The Borg job and machine event traces","volume":"48","author":"Wilkes","year":"2020","journal-title":"ACM SIGMETRICS Perform. Eval. Rev."},{"key":"ref59","volume-title":"Computers and Intractability: A Guide to the Theory of NP-Completeness","author":"Garey","year":"1979"}],"container-title":["IEEE Open Journal of the Computer Society"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/8782664\/11319293\/11409427.pdf?arnumber=11409427","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T04:51:27Z","timestamp":1773895887000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11409427\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"references-count":59,"URL":"https:\/\/doi.org\/10.1109\/ojcs.2026.3667549","relation":{},"ISSN":["2644-1268"],"issn-type":[{"value":"2644-1268","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]}}}