{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T11:27:35Z","timestamp":1777462055614,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":22,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,27]]},"DOI":"10.1145\/3805621.3807627","type":"proceedings-article","created":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T13:08:45Z","timestamp":1777381725000},"page":"192-199","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Asynchronous Verified Semantic Caching for Tiered LLM Architectures"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3313-4518","authenticated-orcid":false,"given":"Asmit Kumar","family":"Singh","sequence":"first","affiliation":[{"name":"Apple, Cupertino, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-5024-9575","authenticated-orcid":false,"given":"Haozhe","family":"Wang","sequence":"additional","affiliation":[{"name":"Apple, Cupertino, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-3964-0872","authenticated-orcid":false,"given":"Laxmi Naga Santosh","family":"Attaluri","sequence":"additional","affiliation":[{"name":"Apple, Cupertino, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-5921-6954","authenticated-orcid":false,"given":"Tak","family":"Chiam","sequence":"additional","affiliation":[{"name":"Apple, Cupertino, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-5180-9435","authenticated-orcid":false,"given":"Weihua","family":"Zhu","sequence":"additional","affiliation":[{"name":"Apple, Cupertino, USA"}]}],"member":"320","published-online":{"date-parts":[[2026,4,28]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Josh Achiam Steven Adler Sandhini Agarwal Lama Ahmad et al. 2023. GPT-4 Technical Report. arXiv:2303.08774 [cs.CL] https:\/\/arxiv.org\/abs\/2303.08774"},{"key":"e_1_3_2_1_2_1","article-title":"Design Trade-offs for Search Engine Caching","volume":"2","author":"Baeza-Yates Ricardo","year":"2008","unstructured":"Ricardo Baeza-Yates, Aristides Gionis, Flavio P. Junqueira, Vanessa Murdock, Vassilis Plachouras, and Fabrizio Silvestri. 2008. Design Trade-offs for Search Engine Caching. ACM Transactions on the Web 2, 4 (2008), 20:1\u201320:28.","journal-title":"ACM Transactions on the Web"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.nlposs-1.24"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","first-page":"51","DOI":"10.1145\/1125857.1125859","article-title":"Boosting the Performance of Web Search Engines: Caching and Prefetching Query Results by Exploiting Historical Usage Data","volume":"24","author":"Fagni Tiziano","year":"2006","unstructured":"Tiziano Fagni, Raffaele Perego, Fabrizio Silvestri, and Salvatore Orlando. 2006. Boosting the Performance of Web Search Engines: Caching and Prefetching Query Results by Exploiting Historical Usage Data. ACM Transactions on Information Systems 24, 1 (2006), 51\u201378.","journal-title":"ACM Transactions on Information Systems"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Waris Gill Mohamed Elidrisi Pallavi Kalapatapu Ammar Ahmed Ali Anwar and Muhammad Ali Gulzar. 2024. MeanCache: User-Centric Semantic Caching for LLM Web Services. arXiv:2403.02694 [cs.CL] https:\/\/arxiv.org\/abs\/2403.02694","DOI":"10.1109\/IPDPS64566.2025.00117"},{"key":"e_1_3_2_1_6_1","unstructured":"Jiawei Gu Xuhui Jiang Zhichao Shi Hexiang Tan Xuehao Zhai Chengjin Xu Wei Li Yinghan Shen Shengjie Ma Honghao Liu Saizhuo Wang Kun Zhang Yuanzhuo Wang Wen Gao Lionel Ni and Jian Guo. 2024. A Survey on LLM-as-a-Judge. arXiv:2411.15594 [cs.CL] https:\/\/arxiv.org\/abs\/2411.15594"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"e_1_3_2_1_8_1","volume-title":"SCALM: Towards Semantic Caching for Automated Chat Services with Large Language Models. arXiv:2406.00025 [cs.CL] https:\/\/arxiv.org\/abs\/2406.00025","author":"Li Jiaxing","year":"2024","unstructured":"Jiaxing Li, Chen Xu, Fali Wang, Ignaz Riedemann, Chao Zhang, and Jiangchuan Liu. 2024. SCALM: Towards Semantic Caching for Automated Chat Services with Large Language Models. arXiv:2406.00025 [cs.CL] https:\/\/arxiv.org\/abs\/2406.00025"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","first-page":"269","DOI":"10.1016\/j.ject.2024.08.007","article-title":"LLM Technologies and Information Search","volume":"2","author":"Liu Lin","year":"2024","unstructured":"Lin Liu, Jiajun Meng, and Yongliang Yang. 2024. LLM Technologies and Information Search. Journal of Economy and Technology 2 (2024), 269\u2013277.","journal-title":"Journal of Economy and Technology"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","first-page":"102193","DOI":"10.1016\/j.ipm.2019.102193","article-title":"Topical Result Caching in Web Search Engines","volume":"57","author":"Mele Ilaria","year":"2020","unstructured":"Ilaria Mele, Nicola Tonellotto, Ophir Frieder, and Raffaele Perego. 2020. Topical Result Caching in Web Search Engines. Information Processing & Management 57, 3 (2020), 102193.","journal-title":"Information Processing & Management"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-024-00864-x"},{"key":"e_1_3_2_1_12_1","volume-title":"Advances in Information Retrieval: 39th European Conference on IR Research (ECIR 2017)","author":"Rekabsaz Navid","year":"2017","unstructured":"Navid Rekabsaz, Mihai Lupu, and Allan Hanbury. 2017. Exploration of a Threshold for Similarity Based on Uncertainty in Word Embedding. In Advances in Information Retrieval: 39th European Conference on IR Research (ECIR 2017) (Lecture Notes in Computer Science). Springer, Cham, Switzerland, 396\u2013409."},{"key":"e_1_3_2_1_13_1","volume-title":"Ion Stoica Matei Zaharia, and Joseph E. Gonzalez","author":"Schroeder Luis Gaspar","year":"2025","unstructured":"Luis Gaspar Schroeder, Aditya Desai, Alejandro Cuadron, Kyle Chu, Shu Liu, Mark Zhao, Stephan Krusche, Alfons Kemper, Ion Stoica Matei Zaharia, and Joseph E. Gonzalez. 2025. vCache: Verified Semantic Prompt Caching. arXiv:2502.03771 [cs.IR] https:\/\/arxiv.org\/abs\/2502.03771"},{"key":"e_1_3_2_1_14_1","volume-title":"Raluca Ada Popa, and Ion Stoica","author":"Tan Sijun","year":"2024","unstructured":"Sijun Tan, Siyuan Zhuang, Kyle Montgomery, William Y. Tang, Alejandro Cuadron, Chenguang Wang, Raluca Ada Popa, and Ion Stoica. 2024. JudgeBench: A Benchmark for Evaluating LLM-Based Judges. arXiv:2410.12784 [cs.AI] https:\/\/arxiv.org\/abs\/2410.12784"},{"key":"e_1_3_2_1_15_1","unstructured":"Lei Wang Chen Ma Xueyang Feng Zeyu Zhang Hao Yang et al. 2023. A Survey on Large Language Model Based Autonomous Agents. arXiv:2308.11432 [cs.AI] https:\/\/arxiv.org\/abs\/2308.11432"},{"key":"e_1_3_2_1_16_1","volume-title":"ACM SIGIR Forum","volume":"57","author":"Wang Liang","year":"2024","unstructured":"Liang Wang, Nan Yang, Xiaolong Huang, Binxing Jiao, Linjun Yang, Rangan Majumder, and Furu Wei. 2024. Large Search Model: Redefining Search Stack in the Era of LLMs. In ACM SIGIR Forum, Vol. 57. ACM, New York, NY, USA, 1\u201316."},{"key":"e_1_3_2_1_17_1","volume-title":"When Search Engine Services Meet Large Language Models: Visions and Challenges. IEEE Transactions on Services Computing","author":"Xiong Haoyi","year":"2024","unstructured":"Haoyi Xiong, Jiang Bian, Yuchen Li, Xuhong Li, Mengnan Du, Shuaiqiang Wang, Dawei Yin, and Sumi Helal. 2024. When Search Engine Services Meet Large Language Models: Visions and Challenges. IEEE Transactions on Services Computing to appear (2024), xx-yy."},{"key":"e_1_3_2_1_18_1","unstructured":"Qingyun Zeng Simin Ma Arash Niknafs Ashish Basran and Carol Szabo. 2025. Taming SQL Complexity: LLM-Based Equivalence Evaluation for Text-to-SQL. arXiv:2506.09359 [cs.CL] https:\/\/arxiv.org\/abs\/2506.09359"},{"key":"e_1_3_2_1_19_1","unstructured":"Wayne Xin Zhao Kun Zhou Junyi Li Tianyi Tang Xiaolei Wang Yupeng Hou Yingqian Min Beichen Zhang Junjie Zhang Zican Dong Yifan Du Chen Yang Yushuo Chen Zhipeng Chen Jinhao Jiang Ruiyang Ren Yifan Li Xinyu Tang Zikang Liu Peiyu Liu Jian-Yun Nie and Ji-Rong Wen. 2023. A Survey of Large Language Models. arXiv:2303.18223 [cs.CL] https:\/\/arxiv.org\/abs\/2303.18223"},{"key":"e_1_3_2_1_20_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Zheng Lianmin","year":"2023","unstructured":"Lianmin Zheng, Wei-Lin Chiang, Ying Sheng, Siyuan Zhuang, Zhanghao Wu, Yonghao Zhuang, Zi Lin, Zhuohan Li, Dacheng Li, Eric Xing, et al. 2023. Judging LLM-as-a-Judge with MT-Bench and Chatbot Arena. In Advances in Neural Information Processing Systems, Vol. 36. Curran Associates, Inc., Red Hook, NY, USA, 46595\u201346623."},{"key":"e_1_3_2_1_21_1","unstructured":"Hanlin Zhu Banghua Zhu and Jiantao Jiao. 2024. Efficient Prompt Caching via Embedding Similarity. arXiv:2402.01173 [cs.CL] https:\/\/arxiv.org\/abs\/2402 01173"},{"key":"e_1_3_2_1_22_1","volume-title":"GPTCache: Semantic Cache for LLMs. https:\/\/github.com\/zilliztech\/GPTCache Accessed","author":"Tech Zilliz","year":"2025","unstructured":"Zilliz Tech. 2023. GPTCache: Semantic Cache for LLMs. https:\/\/github.com\/zilliztech\/GPTCache Accessed 28 November 2025."}],"event":{"name":"EuroSys '26: 21st European Conference on Computer Systems","location":"Edinburgh Scotland Uk","acronym":"EuroMLSys '26","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the Sixth European Workshop on Machine Learning and Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3805621.3807627","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T13:16:16Z","timestamp":1777382176000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805621.3807627"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,27]]},"references-count":22,"alternative-id":["10.1145\/3805621.3807627","10.1145\/3805621"],"URL":"https:\/\/doi.org\/10.1145\/3805621.3807627","relation":{},"subject":[],"published":{"date-parts":[[2026,4,27]]},"assertion":[{"value":"2026-04-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}