{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T03:05:46Z","timestamp":1780455946430,"version":"3.54.1"},"publisher-location":"Cham","reference-count":22,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031998713","type":"print"},{"value":"9783031998720","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:00:00Z","timestamp":1755820800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:00:00Z","timestamp":1755820800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-031-99872-0_2","type":"book-chapter","created":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T11:08:49Z","timestamp":1755774529000},"page":"18-32","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Cache Management for\u00a0Mixture-of-Experts LLMs"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9819-9158","authenticated-orcid":false,"given":"Spyros","family":"Angelopoulos","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5519-9913","authenticated-orcid":false,"given":"Loris","family":"Marchal","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-6037-9787","authenticated-orcid":false,"given":"Adrien","family":"Obrecht","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2565-1163","authenticated-orcid":false,"given":"Bertrand","family":"Simon","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,8,22]]},"reference":[{"key":"2_CR1","doi-asserted-by":"crossref","unstructured":"Agrawal, K., Bender, M.A., Das, R., Kuszmaul, W., Peserico, E., Scquizzato, M.: Green paging and parallel paging. In: ACM Symposium on Parallelism in Algorithms and Architectures (SPAA), pp. 493\u2013495 (2020)","DOI":"10.1145\/3350755.3400231"},{"key":"2_CR2","doi-asserted-by":"crossref","unstructured":"Angelopoulos, S., Marchal, L., Obrecht, A., Simon, B.: Cache management for mixture-of-experts LLMs \u2013 extended version (2025), https:\/\/hal.science\/hal-04961621","DOI":"10.1007\/978-3-031-99872-0_2"},{"issue":"2","key":"2_CR3","doi-asserted-by":"publisher","first-page":"78","DOI":"10.1147\/sj.52.0078","volume":"5","author":"LA Belady","year":"1966","unstructured":"Belady, L.A.: A study of replacement algorithms for virtual-storage computer. IBM Syst. J. 5(2), 78\u2013101 (1966)","journal-title":"IBM Syst. J."},{"key":"2_CR4","doi-asserted-by":"crossref","unstructured":"Blom, G., Holst, L., Sandell, D.: Problems and Snapshots from the World of Probability. Springer Science & Business Media (1993)","DOI":"10.1007\/978-1-4612-4304-5"},{"key":"2_CR5","volume-title":"Online Computation and Competitive Analysis","author":"A Borodin","year":"1998","unstructured":"Borodin, A., El-Yaniv, R.: Online Computation and Competitive Analysis. Cambridge University Press, New York, NY, USA (1998)"},{"key":"2_CR6","doi-asserted-by":"crossref","unstructured":"Canon, L.C., Dugois, A., Marchal, L., Rivi\u00e8re, E.: Hector: a framework to design and evaluate scheduling strategies in persistent key-value stores. In: International Conference on Parallel Processing, ICPP 2023, pp. 535\u2013545 (2023)","DOI":"10.1145\/3605573.3605614"},{"key":"2_CR7","unstructured":"Du, N., et\u00a0al.: Glam: efficient scaling of language models with mixture-of-experts. In: International Conference on Machine Learning, pp. 5547\u20135569 (2022)"},{"key":"2_CR8","unstructured":"Eliseev, A., Mazur, D.: Fast inference of mixture-of-experts language models with offloading (2023), https:\/\/arxiv.org\/abs\/2312.17238"},{"key":"2_CR9","unstructured":"Face, H.: Helpful instructions dataset, https:\/\/huggingface.co\/datasets\/HuggingFaceH4\/helpful-instructions"},{"key":"2_CR10","unstructured":"Ferrante, M., Tagliavini, A.: On the coupon-collector\u2019s problem with several parallel collections (2016), https:\/\/arxiv.org\/abs\/1609.04174"},{"issue":"4","key":"2_CR11","doi-asserted-by":"publisher","first-page":"685","DOI":"10.1016\/0196-6774(91)90041-V","volume":"12","author":"A Fiat","year":"1991","unstructured":"Fiat, A., Karp, R.M., Luby, M., McGeoch, L.A., Sleator, D.D., Young, N.E.: Competitive paging algorithms. J. Algorithms 12(4), 685\u2013699 (1991)","journal-title":"J. Algorithms"},{"issue":"241","key":"2_CR12","first-page":"1","volume":"22","author":"T Hoefler","year":"2021","unstructured":"Hoefler, T., Alistarh, D., Ben-Nun, T., Dryden, N., Peste, A.: Sparsity in deep learning: pruning and growth for efficient inference and training in neural networks. J. Mach. Learn. Res. 22(241), 1\u2013124 (2021)","journal-title":"J. Mach. Learn. Res."},{"key":"2_CR13","unstructured":"Jiang, A.Q., et al.: Mixtral of experts (2024), https:\/\/doi.org\/10.48550\/arXiv.2401.04088"},{"key":"2_CR14","doi-asserted-by":"crossref","unstructured":"Kotera, I., Egawa, R., Takizawa, H., Kobayashi, H.: Modeling of cache access behavior based on Zipf\u2019s law. In: 9th Workshop on MEmory Performance: DEaling with Applications, Systems and Architecture, MEDEA 2008, pp. 9\u201315 (2008)","DOI":"10.1145\/1509084.1509086"},{"key":"2_CR15","unstructured":"Lykouris, T., Vassilvitskii, S.: Competitive caching with machine learned advice. In: Dy, J.G., Krause, A. (eds.) International Conference on Machine Learning, (ICML), vol.\u00a080, pp. 3302\u20133311 (2018)"},{"key":"2_CR16","unstructured":"Minaee, S., et al.: Large language models: a survey (2024), https:\/\/arxiv.org\/abs\/2402.06196"},{"key":"2_CR17","doi-asserted-by":"crossref","unstructured":"Motwani, R., Raghavan, P.: Randomized Algorithms. Cambridge University Press (1995)","DOI":"10.1017\/CBO9780511814075"},{"key":"2_CR18","unstructured":"Nagel, M., Fournarakis, M., Amjad, R.A., Bondarenko, Y., van Baalen, M., Blankevoort, T.: A white paper on neural network quantization (2021), https:\/\/arxiv.org\/abs\/2106.08295"},{"key":"2_CR19","doi-asserted-by":"crossref","unstructured":"Simon, B., Marchal, L., Angelopoulos, S., Obrecht, A.: Artifact of the paper: cache management for mixture-of-experts LLMs, June 2025, https:\/\/doi.org\/10.5281\/zenodo.15576758","DOI":"10.1007\/978-3-031-99872-0_2"},{"issue":"2","key":"2_CR20","doi-asserted-by":"publisher","first-page":"202","DOI":"10.1145\/2786.2793","volume":"28","author":"DD Sleator","year":"1985","unstructured":"Sleator, D.D., Tarjan, R.E.: Amortized efficiency of list update and paging rules. Commun. ACM 28(2), 202\u2013208 (1985)","journal-title":"Commun. ACM"},{"key":"2_CR21","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Neural Information Processing Systems conference (NeurIPS), pp. 5998\u20136008 (2017)"},{"key":"2_CR22","doi-asserted-by":"crossref","unstructured":"Zhu, T., et al.: Llama-moe: building mixture-of-experts from llama with continual pre-training (2024), https:\/\/arxiv.org\/abs\/2406.16554","DOI":"10.18653\/v1\/2024.emnlp-main.890"}],"container-title":["Lecture Notes in Computer Science","Euro-Par 2025: Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-99872-0_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T13:24:33Z","timestamp":1780406673000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-99872-0_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,22]]},"ISBN":["9783031998713","9783031998720"],"references-count":22,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-99872-0_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,8,22]]},"assertion":[{"value":"22 August 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Euro-Par","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Dresden","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 April 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 April 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"europar2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2025.euro-par.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}