{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T04:43:26Z","timestamp":1770353006031,"version":"3.49.0"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031781827","type":"print"},{"value":"9783031781834","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,12,4]],"date-time":"2024-12-04T00:00:00Z","timestamp":1733270400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,4]],"date-time":"2024-12-04T00:00:00Z","timestamp":1733270400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-78183-4_15","type":"book-chapter","created":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T12:01:51Z","timestamp":1733227311000},"page":"232-248","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["waLLMartCache: A Distributed, Multi-tenant and Enhanced Semantic Caching System for LLMs"],"prefix":"10.1007","author":[{"given":"Soumik","family":"Dasgupta","sequence":"first","affiliation":[]},{"given":"Anurag","family":"Wagh","sequence":"additional","affiliation":[]},{"given":"Lalitdutt","family":"Parsai","sequence":"additional","affiliation":[]},{"given":"Binay","family":"Gupta","sequence":"additional","affiliation":[]},{"given":"Geet","family":"Vudata","sequence":"additional","affiliation":[]},{"given":"Shally","family":"Sangal","sequence":"additional","affiliation":[]},{"given":"Sohom","family":"Majumdar","sequence":"additional","affiliation":[]},{"given":"Hema","family":"Rajesh","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0605-630X","authenticated-orcid":false,"given":"Kunal","family":"Banerjee","sequence":"additional","affiliation":[]},{"given":"Anirban","family":"Chatterjee","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,4]]},"reference":[{"key":"15_CR1","unstructured":"Agarwal, S., Mitra, S., Chakraborty, S., Karanam, S., Mukherjee, K., Saini, S.K.: Approximate caching for efficiently serving text-to-image diffusion models. In: USENIX NSDI (2024)"},{"key":"15_CR2","unstructured":"Almazrouei, E., Alobeidli, H., Alshamsi, A., Cappelli, A., Cojocaru, R., Debbah, M., Goffinet, \u00c9., Hesslow, D., Launay, J., Malartic, Q., Mazzotta, D., Noune, B., Pannier, B., Penedo, G.: The falcon series of open language models. CoRR abs\/2311.16867 (2023)"},{"key":"15_CR3","unstructured":"Anil, R., Borgeaud, S., Wu, Y., Alayrac, J., Yu, J., Soricut, R., Schalkwyk, J., Dai, A.M., Hauth, A., Millican, K., Silver, D., Petrov, S., Johnson, M., Antonoglou, I., Schrittwieser, J., Glaese, A., Chen, J., Pitler, E., Lillicrap, T.P., Lazaridou, A., Firat, O., Molloy, J., Isard, M., Barham, P.R., Hennigan, T., Lee, B., Viola, F., Reynolds, M., Xu, Y., Doherty, R., Collins, E., Meyer, C., Rutherford, E., Moreira, E., Ayoub, K., Goel, M., Tucker, G., Piqueras, E., Krikun, M., Barr, I., Savinov, N., Danihelka, I., Roelofs, B., White, A., Andreassen, A., von Glehn, T., Yagati, L., Kazemi, M., Gonzalez, L., Khalman, M., Sygnowski, J., et\u00a0al.: Gemini: A family of highly capable multimodal models. CoRR abs\/2312.11805 (2023)"},{"key":"15_CR4","unstructured":"Anthropic: Introducing Claude. https:\/\/www.anthropic.com\/news\/introducing-claude, accessed: 2024-04-05"},{"key":"15_CR5","doi-asserted-by":"publisher","unstructured":"Bang, F.: GPTCache: An open-source semantic cache for LLM applications enabling faster answers and cost savings. In: Proceedings of the 3rd Workshop for Natural Language Processing Open Source Software (NLP-OSS 2023). pp. 212\u2013218. Association for Computational Linguistics (2023). https:\/\/doi.org\/10.18653\/v1\/2023.nlposs-1.24","DOI":"10.18653\/v1\/2023.nlposs-1.24"},{"key":"15_CR6","doi-asserted-by":"crossref","unstructured":"Barnett, S., Kurniawan, S., Thudumu, S., Brannelly, Z., Abdelrazek, M.: Seven failure points when engineering a retrieval augmented generation system. In: CAIN. pp. 194\u2013199 (2024)","DOI":"10.1145\/3644815.3644945"},{"key":"15_CR7","unstructured":"Bengio, Y., Ducharme, R., Vincent, P.: A neural probabilistic language model. In: NeurIPS (2000)"},{"key":"15_CR8","doi-asserted-by":"crossref","unstructured":"Bertsimas, D., Perakis, G.: Dynamic Pricing: A Learning Approach, pp. 45\u201379. Springer US (2006)","DOI":"10.1007\/0-387-29645-X_3"},{"key":"15_CR9","unstructured":"Brown, T.B., Mann, B., Ryder, N., Subbiah, M., Kaplan, J., Dhariwal, P., Neelakantan, A., Shyam, P., Sastry, G., Askell, A., Agarwal, S., Herbert-Voss, A., Krueger, G., Henighan, T., Child, R., Ramesh, A., Ziegler, D.M., Wu, J., Winter, C., Hesse, C., Chen, M., Sigler, E., Litwin, M., Gray, S., Chess, B., Clark, J., Berner, C., McCandlish, S., Radford, A., Sutskever, I., Amodei, D.: Language models are few-shot learners. In: NeurIPS (2020)"},{"key":"15_CR10","doi-asserted-by":"crossref","unstructured":"Cheng, K., Hu, W., Wang, Z., Du, P., Li, J., Zhang, S.: Enabling efficient batch serving for lmaas via generation length prediction. CoRR abs\/2406.04785 (2024)","DOI":"10.1109\/ICWS62655.2024.00104"},{"key":"15_CR11","unstructured":"Dar, S., Franklin, M.J., J\u00f3nsson, B.\u00de., Srivastava, D., Tan, M.: Semantic data caching and replacement. In: VLDB. pp. 330\u2013341 (1996)"},{"key":"15_CR12","unstructured":"Databricks: Dolly. https:\/\/github.com\/databrickslabs\/dolly, accessed: 2024-04-05"},{"key":"15_CR13","unstructured":"Gallegos, I.O., Rossi, R.A., Barrow, J., Tanjim, M.M., Kim, S., Dernoncourt, F., Yu, T., Zhang, R., Ahmed, N.K.: Bias and fairness in large language models: A survey. CoRR abs\/2309.00770 (2023)"},{"key":"15_CR14","unstructured":"Gill, W., Elidrisi, M., Kalapatapu, P., Anwar, A., Gulzar, M.A.: Privacy-aware semantic cache for large language models. CoRR abs\/2403.02694 (2024)"},{"key":"15_CR15","volume-title":"The cache memory book","author":"J Handy","year":"1993","unstructured":"Handy, J.: The cache memory book. Academic Press Professional, Inc (1993)"},{"key":"15_CR16","unstructured":"Jiang, A.Q., Sablayrolles, A., Mensch, A., Bamford, C., Chaplot, D.S., de\u00a0Las\u00a0Casas, D., Bressand, F., Lengyel, G., Lample, G., Saulnier, L., Lavaud, L.R., Lachaux, M., Stock, P., Scao, T.L., Lavril, T., Wang, T., Lacroix, T., Sayed, W.E.: Mistral 7b. CoRR abs\/2310.06825 (2023)"},{"key":"15_CR17","doi-asserted-by":"crossref","unstructured":"Lee, D., Chu, W.W.: Semantic caching via query matching for web sources. In: CIKM. pp. 77\u201385 (1999)","DOI":"10.1145\/319950.319960"},{"key":"15_CR18","doi-asserted-by":"crossref","unstructured":"Li, J., Xu, C., Wang, F., von Riedemann, I.M., Zhang, C., Liu, J.: SCALM: Towards semantic caching for automated chat services with large language models. CoRR abs\/2406.00025 (2024)","DOI":"10.1109\/IWQoS61813.2024.10682957"},{"issue":"4","key":"15_CR19","doi-asserted-by":"publisher","first-page":"574","DOI":"10.14778\/3574245.3574246","volume":"16","author":"M Mazmudar","year":"2022","unstructured":"Mazmudar, M., Humphries, T., Liu, J., Rafuse, M., He, X.: Cache me if you can: Accuracy-aware inference engine for differentially private data exploration. Proc. VLDB Endow. 16(4), 574\u2013586 (2022)","journal-title":"Proc. VLDB Endow."},{"key":"15_CR20","unstructured":"OpenAI: Pricing. https:\/\/openai.com\/pricing, accessed: 2024-04-04"},{"key":"15_CR21","unstructured":"OpenAI, Achiam, J., Adler, S., Agarwal, S., Ahmad, L., Akkaya, I., Aleman, F.L., Almeida, D., Altenschmidt, J., Altman, S., Anadkat, S., Avila, R., Babuschkin, I., Balaji, S., Balcom, V., Baltescu, P., Bao, H., Bavarian, M., Belgum, J., Bello, I., Berdine, J., Bernadett-Shapiro, G., Berner, C., Bogdonoff, L., Boiko, O., Boyd, M., Brakman, A.L., Brockman, G., Brooks, T., Brundage, M., Button, K., Cai, T., Campbell, R., Cann, A., Carey, B., Carlson, C., Carmichael, R., Chan, B., Chang, C., Chantzis, F., Chen, D., Chen, S., Chen, R., Chen, J., Chen, M., Chess, B., Cho, C., Chu, C., Chung, H.W., Cummings, D., Currier, J., Dai, Y., Decareaux, C., Degry, T., Deutsch, N., Deville, D., Dhar, A., Dohan, D., Dowling, S., Dunning, S., Ecoffet, A., Eleti, A., Eloundou, T., Farhi, D., Fedus, L., Felix, N., Fishman, S.P., Forte, J., Fulford, I., Gao, L., Georges, E., Gibson, C., Goel, V., Gogineni, T., Goh, G., Gontijo-Lopes, R., Gordon, J., Grafstein, M., Gray, S., Greene, R., Gross, J., Gu, S.S., Guo, Y., Hallacy, C., Han, J., Harris, J., He, Y., Heaton, M., Heidecke, J., Hesse, C., Hickey, A., Hickey, W., Hoeschele, P., Houghton, B., Hsu, K., Hu, S., Hu, X., Huizinga, J., Jain, S., Jain, S., Jang, J., Jiang, A., Jiang, R., Jin, H., Jin, D., Jomoto, S., Jonn, B., Jun, H., Kaftan, T., \u0141ukasz Kaiser, Kamali, A., Kanitscheider, I., Keskar, N.S., Khan, T., Kilpatrick, L., Kim, J.W., Kim, C., Kim, Y., Kirchner, J.H., Kiros, J., Knight, M., Kokotajlo, D., \u0141ukasz Kondraciuk, Kondrich, A., Konstantinidis, A., Kosic, K., Krueger, G., Kuo, V., Lampe, M., Lan, I., Lee, T., Leike, J., Leung, J., Levy, D., Li, C.M., Lim, R., Lin, M., Lin, S., Litwin, M., Lopez, T., Lowe, R., Lue, P., Makanju, A., Malfacini, K., Manning, S., Markov, T., Markovski, Y., Martin, B., Mayer, K., Mayne, A., McGrew, B., McKinney, S.M., McLeavey, C., McMillan, P., McNeil, J., Medina, D., Mehta, A., Menick, J., Metz, L., Mishchenko, A., Mishkin, P., Monaco, V., Morikawa, E., Mossing, D., Mu, T., Murati, M., Murk, O., M\/\u2019ely, D., Nair, A., Nakano, R., Nayak, R., Neelakantan, A., Ngo, R., Noh, H., Ouyang, L., O\u2019Keefe, C., Pachocki, J., Paino, A., Palermo, J., Pantuliano, A., Parascandolo, G., Parish, J., Parparita, E., Passos, A., Pavlov, M., Peng, A., Perelman, A., de\u00a0Avila Belbute\u00a0Peres, F., Petrov, M.: Gpt-4 technical report (2024)"},{"key":"15_CR22","unstructured":"Pinecone: Build knowledgeable ai. https:\/\/www.pinecone.io\/, accessed: 2024-04-08"},{"key":"15_CR23","unstructured":"Qdrant: qdrant. https:\/\/github.com\/qdrant\/qdrant, accessed: 2024-04-08"},{"key":"15_CR24","unstructured":"Qiu, H., Mao, W., Patke, A., Cui, S., Jha, S., Wang, C., Franke, H., Kalbarczyk, Z.T., Basar, T., Iyer, R.K.: Efficient interactive LLM serving with proxy model-based sequence length prediction. CoRR abs\/2404.08509 (2024)"},{"key":"15_CR25","unstructured":"Qiu, H., Mao, W., Patke, A., Cui, S., Jha, S., Wang, C., Franke, H., Kalbarczyk, Z.T., Basar, T., Iyer, R.K.: Power-aware deep learning model serving with $$\\mu $$-serve. In: USENIX ATC (2024)"},{"key":"15_CR26","doi-asserted-by":"crossref","unstructured":"Rasool, Z., Barnett, S., Willie, D., Kurniawan, S., Balugo, S., Thudumu, S., Abdelrazek, M.: Llms for test input generation for semantic applications. In: CAIN. pp. 160\u2013165 (2024)","DOI":"10.1145\/3644815.3644948"},{"key":"15_CR27","unstructured":"Redis: Get the world\u2019s fastest in-memory database from the ones who built it. https:\/\/redis.io\/, accessed: 2024-04-09"},{"key":"15_CR28","unstructured":"Touvron, H., Martin, L., Stone, K., Albert, P., Almahairi, A., Babaei, Y., Bashlykov, N., Batra, S., Bhargava, P., Bhosale, S., Bikel, D., Blecher, L., Canton-Ferrer, C., Chen, M., Cucurull, G., Esiobu, D., Fernandes, J., Fu, J., Fu, W., Fuller, B., Gao, C., Goswami, V., Goyal, N., Hartshorn, A., Hosseini, S., Hou, R., Inan, H., Kardas, M., Kerkez, V., Khabsa, M., Kloumann, I., Korenev, A., Koura, P.S., Lachaux, M., Lavril, T., Lee, J., Liskovich, D., Lu, Y., Mao, Y., Martinet, X., Mihaylov, T., Mishra, P., Molybog, I., Nie, Y., Poulton, A., Reizenstein, J., Rungta, R., Saladi, K., Schelten, A., Silva, R., Smith, E.M., Subramanian, R., Tan, X.E., Tang, B., Taylor, R., Williams, A., Kuan, J.X., Xu, P., Yan, Z., Zarov, I., Zhang, Y., Fan, A., Kambadur, M., Narang, S., Rodriguez, A., Stojnic, R., Edunov, S., Scialom, T.: Llama 2: Open foundation and fine-tuned chat models. CoRR abs\/2307.09288 (2023)"},{"key":"15_CR29","doi-asserted-by":"crossref","unstructured":"Wang, J., Yi, X., Guo, R., Jin, H., Xu, P., Li, S., Wang, X., Guo, X., Li, C., Xu, X., Yu, K., Yuan, Y., Zou, Y., Long, J., Cai, Y., Li, Z., Zhang, Z., Mo, Y., Gu, J., Jiang, R., Wei, Y., Xie, C.: Milvus: A purpose-built vector data management system. In: SIGMOD. pp. 2614\u20132627 (2021)","DOI":"10.1145\/3448016.3457550"},{"key":"15_CR30","unstructured":"Weaviate: Welcome to weaviate docs. https:\/\/weaviate.io\/developers\/weaviate, accessed: 2024-04-08"},{"key":"15_CR31","doi-asserted-by":"crossref","unstructured":"Xu, D., Zhang, D., Yang, G., Yang, B., Xu, S., Zheng, L., Liang, C.: Survey for landing generative ai in social and e-commerce recsys \u2013 the industry perspectives. CoRR abs\/2406.06475 (2024)","DOI":"10.1145\/3627673.3679099"},{"key":"15_CR32","unstructured":"Ye, J., Chen, X., Xu, N., Zu, C., Shao, Z., Liu, S., Cui, Y., Zhou, Z., Gong, C., Shen, Y., Zhou, J., Chen, S., Gui, T., Zhang, Q., Huang, X.: A comprehensive capability analysis of GPT-3 and GPT-3.5 series models. CoRR abs\/2303.10420 (2023)"},{"key":"15_CR33","unstructured":"Zhu, H., Zhu, B., Jiao, J.: Efficient prompt caching via embedding similarity. CoRR abs\/2402.01173 (2024)"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-78183-4_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T12:12:20Z","timestamp":1733227940000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-78183-4_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,4]]},"ISBN":["9783031781827","9783031781834"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-78183-4_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12,4]]},"assertion":[{"value":"4 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kolkata","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"India","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icpr2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icpr2024.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}