{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T12:39:37Z","timestamp":1766061577964,"version":"3.48.0"},"reference-count":35,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iros60139.2025.11247595","type":"proceedings-article","created":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T18:54:45Z","timestamp":1764269685000},"page":"11271-11276","source":"Crossref","is-referenced-by-count":0,"title":["ContextCache: Task-Aware Lifecycle Management for Memory-Efficient LLM Agent Deployment"],"prefix":"10.1109","author":[{"given":"Tao","family":"Liu","sequence":"first","affiliation":[{"name":"InchiTech,Nanjing,China"}]},{"given":"Ping","family":"Guo","sequence":"additional","affiliation":[{"name":"Intel Labs China,Beijing,China"}]},{"given":"Dong","family":"Feng","sequence":"additional","affiliation":[{"name":"InchiTech,Nanjing,China"}]},{"given":"Peng","family":"Wang","sequence":"additional","affiliation":[{"name":"Intel Labs China,Beijing,China"}]}],"member":"263","reference":[{"issue":"4","key":"ref1","first-page":"6","volume":"3","author":"Hong","year":"2023","journal-title":"Metagpt: Meta programming for multi-agent collaborative framework"},{"journal-title":"Autogen: Enabling next-gen llm applications via multi-agent conversation","year":"2023","author":"Wu","key":"ref2"},{"author":"Li","key":"ref3","article-title":"Personal llm agents: Insights and survey about the capability, efficiency and security, 2024"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10802322"},{"key":"ref5","first-page":"371","article-title":"Understanding large-language model (llm)-powered human-robot interaction","volume-title":"Proceedings of the 2024 ACM\/IEEE international conference on human-robot interaction","author":"Kim"},{"volume":"3","journal-title":"Agentlens: Visual analysis for agent behaviors in llm-based autonomous systems. arxiv, 2024. doi: 10.48550","author":"Lu","key":"ref6"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ETFA54631.2023.10275362"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599572"},{"journal-title":"Exploring large language model based intelligent agents: Definitions, methods, and prospects","year":"2024","author":"Cheng","key":"ref9"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.cirp.2024.04.002"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ISDFS60797.2024.10527275"},{"journal-title":"Agent hospital: A simulacrum of hospital with evolvable medical agents","year":"2024","author":"Li","key":"ref12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbi.2020.103637"},{"journal-title":"Conversational health agents: A personalized llm-powered agent framework. arxiv 2023","year":"2023","author":"Abbasian","key":"ref14"},{"journal-title":"From llms to llm-based agents for software engineering: A survey of current, challenges and future","year":"2024","author":"Jin","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/FLLM63129.2024.10852491"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"journal-title":"Rewoo: Decoupling reasoning from observations for efficient augmented language models","year":"2023","author":"Xu","key":"ref18"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-024-4222-0"},{"article-title":"React: Synergizing reasoning and acting in language models","volume-title":"International Conference on Learning Representations (ICLR)","author":"Yao","key":"ref20"},{"journal-title":"Large language model based multi-agents: A survey of progress and challenges","year":"2024","author":"Guo","key":"ref21"},{"author":"Zheng","key":"ref22","article-title":"Sglang: Efficient execution of structured language model programs, 2024"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3689031.3696098"},{"key":"ref24","first-page":"325","article-title":"Prompt cache: Modular attention reuse for low-latency inference","volume-title":"Proceedings of Machine Learning and Systems","volume":"6","author":"Gim"},{"first-page":"2023","article-title":"Auto-gpt for online decision making: Benchmarks and additional opinions.(2023)","author":"Yang","key":"ref25"},{"article-title":"Palm-e: An embodied multimodal language model","year":"2023","author":"Driess","key":"ref26"},{"journal-title":"Rt-2: Vision-language-action models transfer web knowledge to robotic control","year":"2023","author":"Brohan","key":"ref27"},{"journal-title":"Minillm: Knowledge distillation of large language models","year":"2023","author":"Gu","key":"ref28"},{"key":"ref29","first-page":"10 323","article-title":"Sparsegpt: Massive language models can be accurately pruned in one-shot","volume-title":"International Conference on Machine Learning","author":"Frantar"},{"key":"ref30","first-page":"31 094","article-title":"Flexgen: High-throughput generative inference of large language models with a single gpu","volume-title":"International Conference on Machine Learning","author":"Sheng"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.623"},{"key":"ref32","first-page":"929","article-title":"Parrot: Efficient serving of {LLM-based} applications with semantic variable","volume-title":"18th USENIX Symposium on Operating Systems Design and Implementation (OSDI 24)","author":"Lin"},{"key":"ref33","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-84628-970-5","volume-title":"Graph theory","author":"Bondy","year":"2008"},{"journal-title":"Llama 2: Open foundation and fine-tuned chat models","year":"2023","author":"Touvron","key":"ref34"},{"journal-title":"The refined-web dataset for falcon llm: outperforming curated corpora with web data, and web data only","year":"2023","author":"Penedo","key":"ref35"}],"event":{"name":"2025 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2025,10,19]]},"location":"Hangzhou, China","end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11245651\/11245652\/11247595.pdf?arnumber=11247595","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T12:34:38Z","timestamp":1766061278000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11247595\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/iros60139.2025.11247595","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}