{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T16:42:43Z","timestamp":1759941763746,"version":"3.28.0"},"reference-count":9,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,6,19]],"date-time":"2024-06-19T00:00:00Z","timestamp":1718755200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,6,19]],"date-time":"2024-06-19T00:00:00Z","timestamp":1718755200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,6,19]]},"DOI":"10.1109\/iwqos61813.2024.10682929","type":"proceedings-article","created":{"date-parts":[[2024,9,26]],"date-time":"2024-09-26T17:41:00Z","timestamp":1727372460000},"page":"1-2","source":"Crossref","is-referenced-by-count":1,"title":["Towards Efficient Compound Large Language Model System Serving in the Wild"],"prefix":"10.1109","author":[{"given":"Yifei","family":"Zhu","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University,UM-SJTU Joint Institute"}]},{"given":"Botao","family":"Zhu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University,UM-SJTU Joint Institute"}]},{"given":"Chen","family":"Chen","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University,John Hopcroft Center"}]},{"given":"Xiaoyi","family":"Fan","sequence":"additional","affiliation":[{"name":"Jiangxing Intelligence Inc"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.34133\/icomputing.0063"},{"article-title":"Large language models for supply chain optimization","year":"2023","author":"Li","key":"ref2"},{"article-title":"Llama: Open and efficient foundation language models","year":"2023","author":"Touvron","key":"ref3"},{"article-title":"Autogen: Enabling next-gen llm applications via multi-agent conversation framework","year":"2023","author":"Wu","key":"ref4"},{"article-title":"Evaluating large language models trained on code","year":"2021","author":"Chen","key":"ref5"},{"key":"ref6","article-title":"Hugginggpt: Solving ai tasks with chatgpt and its friends in hugging face","volume-title":"Proc. NeurIPS","volume":"36","author":"Shen"},{"article-title":"Taskbench: Benchmarking large language models for task automation","year":"2023","author":"Shen","key":"ref7"},{"key":"ref8","article-title":"Response length perception and sequence scheduling: An llm-empowered llm inference pipeline","volume-title":"Proc. NeurIPS","volume":"36","author":"Zheng"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i16.29720"}],"event":{"name":"2024 IEEE\/ACM 32nd International Symposium on Quality of Service (IWQoS)","start":{"date-parts":[[2024,6,19]]},"location":"Guangzhou, China","end":{"date-parts":[[2024,6,21]]}},"container-title":["2024 IEEE\/ACM 32nd International Symposium on Quality of Service (IWQoS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10682818\/10682608\/10682929.pdf?arnumber=10682929","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,27]],"date-time":"2024-09-27T04:40:49Z","timestamp":1727412049000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10682929\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,19]]},"references-count":9,"URL":"https:\/\/doi.org\/10.1109\/iwqos61813.2024.10682929","relation":{},"subject":[],"published":{"date-parts":[[2024,6,19]]}}}