{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T01:06:34Z","timestamp":1777338394037,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":112,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,30]],"date-time":"2025-03-30T00:00:00Z","timestamp":1743292800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100006374","name":"Office of Naval Research","doi-asserted-by":"publisher","award":["N000142412612"],"award-info":[{"award-number":["N000142412612"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006374","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2239020,1565570,2402873"],"award-info":[{"award-number":["2239020,1565570,2402873"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,30]]},"DOI":"10.1145\/3669940.3707264","type":"proceedings-article","created":{"date-parts":[[2025,2,6]],"date-time":"2025-02-06T12:28:01Z","timestamp":1738844881000},"page":"15-32","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":26,"title":["Accelerating Retrieval-Augmented Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-5862-6565","authenticated-orcid":false,"given":"Derrick","family":"Quinn","sequence":"first","affiliation":[{"name":"Cornell University, Ithaca, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-9172-1363","authenticated-orcid":false,"given":"Mohammad","family":"Nouri","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8055-4243","authenticated-orcid":false,"given":"Neel","family":"Patel","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-0942-1432","authenticated-orcid":false,"given":"John","family":"Salihu","sequence":"additional","affiliation":[{"name":"University of Kansas, Lawrence, KS, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-1937-2615","authenticated-orcid":false,"given":"Alireza","family":"Salemi","sequence":"additional","affiliation":[{"name":"UMass Amherst, Amherst, MA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4811-4843","authenticated-orcid":false,"given":"Sukhan","family":"Lee","sequence":"additional","affiliation":[{"name":"Samsung Electronics, Hwasung, Gyeonggi-do, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0800-3340","authenticated-orcid":false,"given":"Hamed","family":"Zamani","sequence":"additional","affiliation":[{"name":"UMass Amherst, Amherst, MA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4622-2181","authenticated-orcid":false,"given":"Mohammad","family":"Alian","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, NY, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,3,30]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2308.16369"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3228964"},{"key":"e_1_3_2_1_3_1","unstructured":"Meta AI. 2024. Llama 3. Online; accessed 2024--12--13. https:\/\/llama.meta.com\/llama3\/"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00070"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2312.11514"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1807.05614"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248038"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.32473\/flairs.v34i1.128369"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627673.3679903"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19--1124"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.375"},{"key":"e_1_3_2_1_12_1","volume-title":"Cohen","author":"Chen Wenhu","year":"2022","unstructured":"Wenhu Chen, Hexiang Hu, Chitwan Saharia, and William W. Cohen. 2022. Re-Imagen: Retrieval-Augmented Text-to-Image Generator. ArXiv abs\/2209.14491 (2022). https:\/\/api.semanticscholar.org\/CorpusID:252596087"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3361682"},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of The 34th International Conference on Algorithmic Learning Theory (Proceedings of Machine Learning Research","volume":"619","author":"Keles Feyza Duman","year":"2023","unstructured":"Feyza Duman Keles, Pruthuvi Mahesakya Wijewardena, and Chinmay Hegde. 2023. On The Computational Complexity of Self-Attention. In Proceedings of The 34th International Conference on Algorithmic Learning Theory (Proceedings of Machine Learning Research, Vol. 201), Shipra Agrawal and Francesco Orabona (Eds.). PMLR, 597--619. https:\/\/proceedings.mlr.press\/v201\/duman-keles23a.html"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i2.16220"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589348"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2311.04934"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12013"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.70"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.5555\/3327546.3327670"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.246"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591687"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2014.6757323"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00047"},{"key":"e_1_3_2_1_25_1","volume-title":"International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=NTEz-6wysdb","author":"Izacard Gautier","year":"2021","unstructured":"Gautier Izacard and Edouard Grave. 2021. Distilling Knowledge from Reader to Retriever for Question Answering. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=NTEz-6wysdb"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.eacl-main.74"},{"key":"e_1_3_2_1_27_1","volume-title":"Faiss: A Library for Efficient Similarity Search. Engineering at Meta. https:\/\/engineering.fb.com\/2017\/03\/29\/data-infrastructure\/faiss-alibrary-for-efficient-similarity-search\/ Accessed: 2024--12--13.","author":"J\u00e9gou Herv\u00e9","year":"2017","unstructured":"Herv\u00e9 J\u00e9gou, Matthijs Douze, and Jeff Johnson. 2017. Faiss: A Library for Efficient Similarity Search. Engineering at Meta. https:\/\/engineering.fb.com\/2017\/03\/29\/data-infrastructure\/faiss-alibrary-for-efficient-similarity-search\/ Accessed: 2024--12--13."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2010.57"},{"key":"e_1_3_2_1_29_1","volume-title":"International Conference on Machine Learning. PMLR, 10697--10707","author":"Kandpal Nikhil","year":"2022","unstructured":"Nikhil Kandpal, Eric Wallace, and Colin Raffel. 2022. Deduplicating training data mitigates privacy risks in language models. In International Conference on Machine Learning. PMLR, 10697--10707. https:\/\/proceedings.mlr.press\/v162\/kandpal22a.html"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"e_1_3_2_1_31_1","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems","author":"Kasai Jungo","year":"2024","unstructured":"Jungo Kasai, Keisuke Sakaguchi, Yoichi Takahashi, Ronan Le Bras, Akari Asai, Xinyan Velocity Yu, Dragomir Radev, Noah A. Smith, Yejin Choi, and Kentaro Inui. 2024. REALTIME QA: what's the answer right now?. In Proceedings of the 37th International Conference on Neural Information Processing Systems (New Orleans, LA, USA) (NIPS '23). Curran Associates Inc., Red Hook, NY, USA, Article 2130, 19 pages. https:\/\/dl.acm.org\/doi\/10.5555\/3666122.3668252"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.535"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2021.3097700"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/VLSITechnologyandCir46769.2022.9830277"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2022.3164651"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2407.12982"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2407.11016"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00276"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1906.00300"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA53966.2022.00021"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.5555\/3495724.3496517"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3318464.3380600"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3578835"},{"key":"e_1_3_2_1_44_1","volume-title":"A Survey on Retrieval-Augmented Text Generation. ArXiv abs\/2202.01110","author":"Li Huayang","year":"2022","unstructured":"Huayang Li, Yixuan Su, Deng Cai, Yan Wang, and Lemao Liu. 2022. A Survey on Retrieval-Augmented Text Generation. ArXiv abs\/2202.01110 (2022). https:\/\/api.semanticscholar.org\/CorpusID:246472929"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2019.2909204"},{"key":"e_1_3_2_1_46_1","volume-title":"LLM Inference Series: 4. KV caching, a deeper look. Pierre Leinhart (Medium) (Jan","author":"Lienhart Pierre","year":"2024","unstructured":"Pierre Lienhart. 2024. LLM Inference Series: 4. KV caching, a deeper look. Pierre Leinhart (Medium) (Jan 2024). https:\/\/medium.com\/@plienhar\/llm-inference-series-4-kv-cachinga-deeper-look-4ba9a77746c8"},{"key":"e_1_3_2_1_47_1","volume-title":"ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out. Association for Computational Linguistics, Barcelona, Spain, 74--81. https:\/\/aclanthology.org\/W04--1013"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2306.00978"},{"key":"e_1_3_2_1_49_1","volume-title":"International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=zv-typ1gPxA","author":"Liu Shangqing","year":"2021","unstructured":"Shangqing Liu, Yu Chen, Xiaofei Xie, Jing Kai Siow, and Yang Liu. 2021. Retrieval-Augmented Generation for Code Summarization via Hybrid GNN. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=zv-typ1gPxA"},{"key":"e_1_3_2_1_50_1","volume-title":"Die Analysis: Samsung Exynos 2200 withRDNA2Graphics. Online","year":"2022","unstructured":"Locuza. 2022. Die Analysis: Samsung Exynos 2200 withRDNA2Graphics. Online; accessed 2024--12--13. https:\/\/locuza.substack.com\/p\/dieanalysis-samsung-exynos-2200"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/2818950.2818951"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2889473"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3620666.3651335"},{"key":"e_1_3_2_1_54_1","volume-title":"He Who Can Pay Top Dollar For HBM Memory Controls AI Training. The Next Platform","author":"Morgan Timothy Prickett","year":"2024","unstructured":"Timothy Prickett Morgan. 2024. He Who Can Pay Top Dollar For HBM Memory Controls AI Training. The Next Platform (2024). https:\/\/www.nextplatform.com\/2024\/02\/27\/he-who-can-pay-topdollar-for-hbm-memory-controls-ai-training\/ Accessed: 2024-06--23."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC19947.2020.9063103"},{"key":"e_1_3_2_1_56_1","volume-title":"ChatGPT plugins. OpenAI Blog","author":"AI.","year":"2023","unstructured":"OpenAI. 2023. ChatGPT plugins. OpenAI Blog (2023). https:\/\/openai.com\/blog\/chatgpt-plugins"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA57654.2024.00078"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-emnlp.232"},{"key":"e_1_3_2_1_59_1","volume-title":"Apple M2 Die Shot and Architecture Analysis -- Big Cost Increase And A15 Based IP. SemiAnalysis (June","author":"Patel Dylan","year":"2022","unstructured":"Dylan Patel. 2022. Apple M2 Die Shot and Architecture Analysis -- Big Cost Increase And A15 Based IP. SemiAnalysis (June 2022). https:\/\/www.semianalysis.com\/p\/apple-m2-die-shot-and-architecture"},{"key":"e_1_3_2_1_60_1","volume-title":"CXL Is Dead In The AI Era. Online","author":"Patel Dylan","year":"2024","unstructured":"Dylan Patel and Jeremie Eliahou Ontiveros. 2024. CXL Is Dead In The AI Era. Online; accessed 2024--12--13. https:\/\/www.semianalysis.com\/p\/cxl-is-dead-in-the-ai-era"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA57654.2024.00032"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3623776"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA59077.2024.00019"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19--1263"},{"key":"e_1_3_2_1_65_1","volume-title":"a new unified benchmark for knowledge-intensive NLP tasks. Online","author":"Petroni Fabio","year":"2024","unstructured":"Fabio Petroni, Aleksandra Piktus, and Angela Fan. 2020. Introducing KILT, a new unified benchmark for knowledge-intensive NLP tasks. Online; accessed 2024--11--22. https:\/\/ai.meta.com\/blog\/introducingkilt-a-new-unified-benchmark-for-knowledge-intensive-nlptasks\/"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.200"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.466"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.5555\/3455716.3455856"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16--1264"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.eacl-main.266"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591629"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657783"},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","unstructured":"Alireza Salemi Sheshera Mysore Michael Bendersky and Hamed Zamani. 2024. LaMP: When Large Language Models Meet Personalization. In Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers) Lun-Wei Ku Andre Martins and Vivek Srikumar (Eds.). Association for Computational Linguistics Bangkok Thailand 7370--7392. https:\/\/doi.org\/10.18653\/v1\/2024.acl-long.399","DOI":"10.18653\/v1\/2024.acl-long.399"},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1145\/3578337.3605137"},{"key":"e_1_3_2_1_75_1","unstructured":"Alireza Salemi and Hamed Zamani. 2024. Comparing Retrieval-Augmentation and Parameter-Efficient Fine-Tuning for Privacy-Preserving Personalization of Large Language Models. arXiv:2409.09510 [cs.CL] https:\/\/arxiv.org\/abs\/2409.09510"},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657957"},{"key":"e_1_3_2_1_77_1","unstructured":"Alireza Salemi andHamed Zamani. 2024. Learning toRankfor Multiple Retrieval-Augmented Models through Iterative Utility Maximization. arXiv:2410.09942 [cs.CL] https:\/\/arxiv.org\/abs\/2410.09942"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657733"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS51556.2021.9401196"},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1145\/3549555.3549585"},{"key":"e_1_3_2_1_81_1","doi-asserted-by":"publisher","DOI":"10.1145\/3617232.3624868"},{"key":"e_1_3_2_1_82_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2311.00502"},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-emnlp.320"},{"key":"e_1_3_2_1_84_1","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2022.3226482"},{"key":"e_1_3_2_1_85_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00530"},{"key":"e_1_3_2_1_86_1","volume-title":"Retrieval Augumented Generation Overview. Microsoft Learn","author":"Steen Heidi","year":"2023","unstructured":"Heidi Steen and DanWahlin. 2023. Retrieval Augumented Generation Overview. Microsoft Learn (2023). https:\/\/learn.microsoft.com\/enus\/azure\/search\/retrieval-augmented-generation-overview"},{"key":"e_1_3_2_1_87_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2403.20306"},{"key":"e_1_3_2_1_88_1","doi-asserted-by":"publisher","DOI":"10.1109\/IEDM.2017.8268306"},{"key":"e_1_3_2_1_89_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-acl.50"},{"key":"e_1_3_2_1_90_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2312.11805"},{"key":"e_1_3_2_1_91_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2102.04643"},{"key":"e_1_3_2_1_92_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19--1371"},{"key":"e_1_3_2_1_93_1","volume-title":"Advances in Neural Information Processing Systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems, I. Guyon, U. Von Luxburg, S. Bengio, H.Wallach, R. Fergus, S. Vishwanathan, and R. Garnett (Eds.), Vol. 30. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2017\/file\/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"},{"key":"e_1_3_2_1_94_1","doi-asserted-by":"publisher","DOI":"10.14778\/3476249.3476255"},{"key":"e_1_3_2_1_95_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2312.03141"},{"key":"e_1_3_2_1_96_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2005.11401"},{"key":"e_1_3_2_1_97_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18--5713"},{"key":"e_1_3_2_1_98_1","unstructured":"WikiChip. 2024. Mask \/ Reticle. Online; accessed 2024--12--13. https:\/\/en.wikichip.org\/wiki\/mask"},{"key":"e_1_3_2_1_99_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33017281"},{"key":"e_1_3_2_1_100_1","doi-asserted-by":"publisher","DOI":"10.5555\/3618408.3619993"},{"key":"e_1_3_2_1_101_1","volume-title":"Efficient Streaming Language Models with Attention Sinks. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=NG7sS51zVF","author":"Xiao Guangxuan","year":"2024","unstructured":"Guangxuan Xiao, Yuandong Tian, Beidi Chen, Song Han, and Mike Lewis. 2024. Efficient Streaming Language Models with Attention Sinks. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=NG7sS51zVF"},{"key":"e_1_3_2_1_102_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.144"},{"key":"e_1_3_2_1_103_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2401.15884"},{"key":"e_1_3_2_1_104_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2304.04487"},{"key":"e_1_3_2_1_105_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10071127"},{"key":"e_1_3_2_1_106_1","volume-title":"Heather Miller, Chris Potts, James Zou, Michael Carbin, Jonathan Frankle, Naveen Rao, and Ali Ghodsi.","author":"Zaharia Matei","year":"2024","unstructured":"Matei Zaharia, Omar Khattab, Lingjiao Chen, Jared Quincy Davis, Heather Miller, Chris Potts, James Zou, Michael Carbin, Jonathan Frankle, Naveen Rao, and Ali Ghodsi. 2024. The Shift from Models to Compound AI Systems. Online; accessed 2024--12--13. https:\/\/bair.berkeley.edu\/blog\/2024\/02\/18\/compound-ai-systems\/"},{"key":"e_1_3_2_1_107_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657923"},{"key":"e_1_3_2_1_108_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531722"},{"key":"e_1_3_2_1_109_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18--1120"},{"key":"e_1_3_2_1_110_1","unstructured":"Yunan Zhang Shige Liu and JianguoWang. [n. d.]. Are There Fundamental Limitations in Supporting Vector Data Management in Relational Databases?ACase Study of PostgreSQL. Preprint ([n. d.]). https:\/\/www.cs.purdue.edu\/homes\/csjgwang\/pubs\/ICDE24_VecDB.pdf Accepted for publication in Proceedings of the International Conference on Data Engineering (ICDE)."},{"key":"e_1_3_2_1_111_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10071005"},{"key":"e_1_3_2_1_112_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2405.16178"}],"event":{"name":"ASPLOS '25: 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","location":"Rotterdam Netherlands","acronym":"ASPLOS '25","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGOPS ACM Special Interest Group on Operating Systems","SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 1"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3669940.3707264","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3669940.3707264","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T14:46:07Z","timestamp":1755787567000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3669940.3707264"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,30]]},"references-count":112,"alternative-id":["10.1145\/3669940.3707264","10.1145\/3669940"],"URL":"https:\/\/doi.org\/10.1145\/3669940.3707264","relation":{},"subject":[],"published":{"date-parts":[[2025,3,30]]},"assertion":[{"value":"2025-03-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}