{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,2]],"date-time":"2025-08-02T17:00:45Z","timestamp":1754154045408,"version":"3.41.2"},"publisher-location":"Singapore","reference-count":29,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819699001"},{"type":"electronic","value":"9789819699018"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-9901-8_14","type":"book-chapter","created":{"date-parts":[[2025,7,23]],"date-time":"2025-07-23T07:58:23Z","timestamp":1753257503000},"page":"169-179","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Accelerating Multi\u2011turn LLM Agent Workflows via Context Templating and Opportunistic Prefill"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-3897-9820","authenticated-orcid":false,"given":"Hanjing","family":"Wang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1247-2382","authenticated-orcid":false,"given":"Ying","family":"Wen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0127-2425","authenticated-orcid":false,"given":"Weinan","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,24]]},"reference":[{"key":"14_CR1","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Lu, J., Jaitly, N.: Probing the multi-turn planning capabilities of LLMs via 20 question games. arXiv preprint arXiv:2310.01468. (2023)","DOI":"10.18653\/v1\/2024.acl-long.82"},{"key":"14_CR2","doi-asserted-by":"crossref","unstructured":"Wang, Z., Mao, S., Wu, W., Ge, T., Wei, F., Ji, H.: Unleashing the emergent cognitive synergy in large language models: a task-solving agent through multi-persona self-collaboration. arXiv preprint arXiv:2307.05300. (2023)","DOI":"10.18653\/v1\/2024.naacl-long.15"},{"key":"14_CR3","unstructured":"Chen, W., et al.: Agentverse: Facilitating multi-agent collaboration and exploring emergent behaviors in agents, vol. 2(4), p. 6. arXiv preprint arXiv:2308.10848,\u00a0 (2023)"},{"key":"14_CR4","unstructured":"Du, Y., Li, S., Torralba, A., Tenenbaum, J. B., Mordatch, I.: Improving factuality and reasoning in language models through multiagent debate. In Forty-first International Conference on Machine Learning. (2023)"},{"key":"14_CR5","doi-asserted-by":"crossref","unstructured":"Liang, T., et al.: Encouraging divergent thinking in large language models through multi-agent debate. arXiv preprint arXiv:2305.19118. (2023)","DOI":"10.18653\/v1\/2024.emnlp-main.992"},{"key":"14_CR6","doi-asserted-by":"crossref","unstructured":"Zhang, J., Xu, X., Zhang, N., Liu, R., Hooi, B., Deng, S.: Exploring collaboration mechanisms for llm agents: A social psychology view. arXiv preprint arXiv:2310.02124. (2023)","DOI":"10.18653\/v1\/2024.acl-long.782"},{"key":"14_CR7","unstructured":"OpenAI. 2023. ChatGPT. https:\/\/chat.openai.com. Accessed 19 November 2024"},{"key":"14_CR8","unstructured":"OpenAI. 2024. ChatGPT Plugins. https:\/\/openai.com\/index\/chatgpt-plugins\/, Accessed 21 Nov 2024"},{"key":"14_CR9","doi-asserted-by":"crossref","unstructured":"Kwon, D., Lee, S., Kim, K. H., Lee, S., Kim, T., Davis, E.: What, when, and how to ground: designing user persona-aware conversational agents for engaging dialogue. arXiv preprint arXiv:2306.03361. (2023)","DOI":"10.18653\/v1\/2023.acl-industry.68"},{"key":"14_CR10","unstructured":"Moshkovich, D., Mulian, H., Zeltyn, S., Eder, N., Skarbovsky, I., Abitbol, R.: Beyond Black-Box Benchmarking: Observability, Analytics, and Optimization of Agentic Systems. arXiv preprint arXiv:2503.06745. (2025)"},{"key":"14_CR11","unstructured":"Spector, B., Re, C.: Accelerating llm inference with staged speculative decoding. arXiv preprint arXiv:2308.04623. (2023)"},{"key":"14_CR12","unstructured":"Lin, B., et al.: Infinite-llm: Efficient llm service for long context with distattention and distributed kvcache. arXiv preprint arXiv:2401.02669. (2024)"},{"key":"14_CR13","unstructured":"Liu, T., et al.: Groupdebate: enhancing the efficiency of multi-agent debate using group discussion. arXiv preprint arXiv:2409.14051. (2024)"},{"key":"14_CR14","unstructured":"Zhong, Y., et al.: DistServe: disaggregating prefill and decoding for goodput-optimized large language model serving. In 18th USENIX Symposium on Operating Systems Design and Implementation (OSDI 2024), pp. 193\u2013210. (2024)"},{"key":"14_CR15","first-page":"62557","volume":"37","author":"L Zheng","year":"2024","unstructured":"Zheng, L., et al.: Sglang: efficient execution of structured language model programs. Adv. Neural. Inf. Process. Syst. 37, 62557\u201362583 (2024)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"14_CR16","doi-asserted-by":"crossref","unstructured":"Kwon, W., et al.: Efficient memory management for large language model serving with pagedattention. In: Proceedings of the 29th Symposium on Operating Systems Principles, pp. 611\u2013626 (October 2023)","DOI":"10.1145\/3600006.3613165"},{"key":"14_CR17","unstructured":"Chase, H.: LangChain [Computer software] (2022). https:\/\/github.com\/langchain-ai\/langchain"},{"key":"14_CR18","first-page":"24824","volume":"35","author":"J Wei","year":"2022","unstructured":"Wei, J., et al.: Chain-of-thought prompting elicits reasoning in large language models. Adv. Neural. Inf. Process. Syst. 35, 24824\u201324837 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"14_CR19","unstructured":"Wang, X., et al.: Self-consistency improves chain of thought reasoning in language models. arXiv preprint arXiv:2203.11171. (2022)"},{"key":"14_CR20","unstructured":"Kamath, A. K., Prabhu, R., Mohan, J., Peter, S., Ramjee, R., Panwar, A.: Pod-attention: unlocking full prefill-decode overlap for faster llm inference. arXiv preprint arXiv:2410.18038. (2024)"},{"key":"14_CR21","doi-asserted-by":"crossref","unstructured":"Ye, L., Tao, Z., Huang, Y., Li, Y.: Chunkattention: Efficient self-attention with prefix-aware kv cache and two-phase partition. arXiv preprint arXiv:2402.15220. (2024)","DOI":"10.18653\/v1\/2024.acl-long.623"},{"key":"14_CR22","unstructured":"NVIDIA. TensorRT-LLM [Computer software]. https:\/\/github.com\/NVIDIA\/TensorRT-LLM"},{"key":"14_CR23","unstructured":"Wolf, T., et al.: Huggingface's transformers: State-of-the-art natural language processing. arXiv preprint arXiv:1910.03771. (2019)"},{"key":"14_CR24","unstructured":"LangChain-AI. LangGraph [Computer software]. https:\/\/github.com\/langchain-ai\/langgraph"},{"key":"14_CR25","unstructured":"Wu, Q., et al.: Autogen: Enabling next-gen llm applications via multi-agent conversation. arXiv preprint arXiv:2308.08155. (2023)"},{"key":"14_CR26","unstructured":"Significant Gravitas. AutoGPT [Computer software]. https:\/\/github.com\/Significant-Gravitas\/AutoGPT"},{"key":"14_CR27","unstructured":"Leviathan, Y., Kalman, M., Matias, Y.: Fast inference from transformers via speculative decoding. In International Conference on Machine Learning, pp. 19274\u201319286. PMLR (July 2023)"},{"key":"14_CR28","unstructured":"Cai, T., et al.: Medusa: Simple llm inference acceleration framework with multiple decoding heads. arXiv preprint arXiv:2401.10774. (2024)"},{"key":"14_CR29","first-page":"64735","volume":"37","author":"D Zhang","year":"2024","unstructured":"Zhang, D., Zhoubian, S., Hu, Z., Yue, Y., Dong, Y., Tang, J.: Rest-mcts*: Llm self-training via process reward guided tree search. Adv. Neural. Inf. Process. Syst. 37, 64735\u201364772 (2024)","journal-title":"Adv. Neural. Inf. Process. Syst."}],"container-title":["Lecture Notes in Computer Science","Advanced Intelligent Computing Technology and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-9901-8_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,23]],"date-time":"2025-07-23T22:11:02Z","timestamp":1753308662000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-9901-8_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819699001","9789819699018"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-9901-8_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"24 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ningbo","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icic2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ic-icc.cn\/icg\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}