{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T21:56:47Z","timestamp":1757627807296,"version":"3.44.0"},"reference-count":31,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,7,2]],"date-time":"2025-07-02T00:00:00Z","timestamp":1751414400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,7,2]],"date-time":"2025-07-02T00:00:00Z","timestamp":1751414400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2024YFB4505904"],"award-info":[{"award-number":["2024YFB4505904"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62272495"],"award-info":[{"award-number":["62272495"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,7,2]]},"DOI":"10.1109\/iwqos65803.2025.11143449","type":"proceedings-article","created":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T17:29:52Z","timestamp":1757438992000},"page":"1-10","source":"Crossref","is-referenced-by-count":0,"title":["LLMConf: Knowledge-Enhanced Configuration Optimization for Large Language Model Inference"],"prefix":"10.1109","author":[{"given":"Jingkai","family":"He","sequence":"first","affiliation":[{"name":"School of Systems Science and Engineering, Sun Yat-sen University,Guangzhou,China"}]},{"given":"Pengfei","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Sun Yat-sen University,Guangzhou,China"}]},{"given":"Yilun","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Systems Science and Engineering, Sun Yat-sen University,Guangzhou,China"}]},{"given":"Haiyu","family":"Huang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Sun Yat-sen University,Guangzhou,China"}]},{"given":"Chuanfu","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Systems Science and Engineering, Sun Yat-sen University,Guangzhou,China"}]},{"given":"Haojia","family":"Huang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Sun Yat-sen University,Guangzhou,China"}]},{"given":"Danwen","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Sun Yat-sen University,Guangzhou,China"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Language models are few-shot learners","author":"Brown","year":"2020","journal-title":"arXiv preprint"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3604930.3605705"},{"key":"ref3","volume-title":"vllm","year":"2024"},{"key":"ref4","article-title":"Uellm: A unified and efficient approach for llm inference serving","author":"He","year":"2024","journal-title":"ICSOC"},{"key":"ref5","article-title":"Slo-aware gpu frequency scaling for energy efficient 1 lm inference serving","author":"Kakolyris","year":"2024","journal-title":"arXiv preprint"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3545008.3545018"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TPWRS.2012.2223241"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1080\/23311916.2018.1502242"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.624"},{"key":"ref10","article-title":"Tpot: A tree-based pipeline optimization tool for automating machine learning","volume-title":"AutoML.","author":"Olson","year":"2016"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/tevc.2013.2281535"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2013.2281534"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3696410.3714930"},{"key":"ref14","volume-title":"Tensorrt-1lm","year":"2024"},{"key":"ref15","article-title":"B-vllm: A vision large language model with balanced spatio-temporal tokens","author":"Lu","year":"2024","journal-title":"arXiv preprint"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/s10586-023-04071-1"},{"key":"ref18","article-title":"Revisiting slo and goodput metrics in llm serving","author":"Wang","year":"2024","journal-title":"arXiv preprint"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3514221.3517882"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2012.6227196"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ASE.2017.8115661"},{"key":"ref22","volume-title":"Meta-Llama-3","year":"2024"},{"key":"ref23","author":"Team","year":"2024","journal-title":"Qwen2.5: A party of foundation models"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.acl-long.315"},{"key":"ref25","article-title":"A novel approach to emotion classification with llama3-8b: Integrating lora for efficient training","author":"Nishat","year":"2024","journal-title":"Aitoz Multidisciplinary Review"},{"key":"ref26","article-title":"Cjeval: A benchmark for assessing large language models using chinese junior high school exam data","author":"Zhang","year":"2024","journal-title":"arXiv preprint"},{"key":"ref27","article-title":"Bert: Pretraining of deep bidirectional transformers for language understanding","volume-title":"Proceedings of naacL-HLT","author":"Kenton","year":"2019"},{"key":"ref28","article-title":"Taming throughputlatency tradeoff in llm inference with sarathi-serve","author":"Agrawal","year":"2024","journal-title":"arXiv preprint"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3299869.3300085"},{"key":"ref30","first-page":"2494","article-title":"To tune or not to tune? in search of optimal configurations for data analytics","author":"Fekry","year":"2020","journal-title":"SIGKDD"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM41043.2020.9155363"}],"event":{"name":"2025 IEEE\/ACM International Symposium on Quality of Service (IWQoS)","location":"Gold Coast, Australia","start":{"date-parts":[[2025,7,2]]},"end":{"date-parts":[[2025,7,4]]}},"container-title":["2025 IEEE\/ACM 33rd International Symposium on Quality of Service (IWQoS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11143240\/11143247\/11143449.pdf?arnumber=11143449","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,10]],"date-time":"2025-09-10T05:04:43Z","timestamp":1757480683000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11143449\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,2]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/iwqos65803.2025.11143449","relation":{},"subject":[],"published":{"date-parts":[[2025,7,2]]}}}