{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,1,11]],"date-time":"2025-01-11T05:33:20Z","timestamp":1736573600301,"version":"3.32.0"},"reference-count":11,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,9,20]],"date-time":"2024-09-20T00:00:00Z","timestamp":1726790400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,9,20]],"date-time":"2024-09-20T00:00:00Z","timestamp":1726790400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,9,20]]},"DOI":"10.1109\/icbase63199.2024.10762029","type":"proceedings-article","created":{"date-parts":[[2024,11,26]],"date-time":"2024-11-26T18:45:22Z","timestamp":1732646722000},"page":"463-467","source":"Crossref","is-referenced-by-count":0,"title":["TSM-LLM: Task Scheduling Management System for Large Language Models"],"prefix":"10.1109","author":[{"given":"Ziqiang","family":"Wen","sequence":"first","affiliation":[{"name":"Beijing University of Posts and Telecommunications,School of Computer Science,Beijing,China"}]},{"given":"Guoping","family":"Zhu","sequence":"additional","affiliation":[{"name":"China Electronics Cloud Computing Technology Co., Ltd,Cloud Product R&#x0026;D Department,Wuhan,China"}]},{"given":"Yong","family":"Wang","sequence":"additional","affiliation":[{"name":"China Electronics Cloud Computing Technology Co., Ltd,Cloud Product R&#x0026;D Department,Wuhan,China"}]},{"given":"Haijun","family":"Luo","sequence":"additional","affiliation":[{"name":"China Electronics Cloud Computing Technology Co., Ltd,Cloud Product R&#x0026;D Department,Wuhan,China"}]},{"given":"Nianchao","family":"Liu","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications,School of Computer Science,Beijing,China"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Language models are few-shot learners","author":"Brown","year":"2020","journal-title":"arXiv preprint arXiv:2005.14165."},{"issue":"240","key":"ref2","first-page":"1","article-title":"Palm: Scaling language modeling with pathways","volume":"24","author":"Chowdhery","year":"2023","journal-title":"Journal of Machine Learning Research"},{"key":"ref3","article-title":"The llama 3 herd of models","author":"Dubey","year":"2024","journal-title":"arXiv preprint arXiv:2407.21783."},{"key":"ref4","article-title":"Learned BestEffort LLM Serving","author":"Jha","year":"2024","journal-title":"arXiv preprint arXiv:2401.07886."},{"article-title":"Attention is all you need","volume-title":"Advances in Neural Information Processing Systems.","author":"Vaswani","key":"ref5"},{"key":"ref6","first-page":"521","article-title":"Orca: A distributed serving system for Transformer-Based generative models","volume-title":"16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Yu"},{"issue":"173","key":"ref7","first-page":"2","article-title":"Nginx: the high-performance web server and reverse proxy","volume":"2008","author":"Reese","year":"2008","journal-title":"Linux Journal"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"ref9","article-title":"Hugging Face (2022) Text Generation Inference"},{"key":"ref10","first-page":"606","article-title":"Efficiently scaling transformer inference","volume-title":"Proceedings of Machine Learning and Systems","volume":"5","author":"Pope"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72643-9_22"}],"event":{"name":"2024 5th International Conference on Big Data &amp; Artificial Intelligence &amp; Software Engineering (ICBASE)","start":{"date-parts":[[2024,9,20]]},"location":"Wenzhou, China","end":{"date-parts":[[2024,9,22]]}},"container-title":["2024 5th International Conference on Big Data &amp;amp; Artificial Intelligence &amp;amp; Software Engineering (ICBASE)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10761118\/10761986\/10762029.pdf?arnumber=10762029","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,10]],"date-time":"2025-01-10T19:50:03Z","timestamp":1736538603000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10762029\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,20]]},"references-count":11,"URL":"https:\/\/doi.org\/10.1109\/icbase63199.2024.10762029","relation":{},"subject":[],"published":{"date-parts":[[2024,9,20]]}}}