{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,8]],"date-time":"2026-01-08T03:50:59Z","timestamp":1767844259839,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":88,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,6,20]],"date-time":"2025-06-20T00:00:00Z","timestamp":1750377600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-2239020, CCF-2217071, CCF-2312739, CCF-2312740, CCF-2312741, and CCF-2407690"],"award-info":[{"award-number":["CCF-2239020, CCF-2217071, CCF-2312739, CCF-2312740, CCF-2312741, and CCF-2407690"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000028","name":"Semiconductor Research Corporation","doi-asserted-by":"publisher","award":["JUMP 2.0 research centers ACE and PRISM"],"award-info":[{"award-number":["JUMP 2.0 research centers ACE and PRISM"]}],"id":[{"id":"10.13039\/100000028","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,21]]},"DOI":"10.1145\/3695053.3731079","type":"proceedings-article","created":{"date-parts":[[2025,6,20]],"date-time":"2025-06-20T16:43:11Z","timestamp":1750437791000},"page":"1108-1124","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["DReX: Accurate and Scalable Dense Retrieval Acceleration via Algorithmic-Hardware Codesign"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-5862-6565","authenticated-orcid":false,"given":"Derrick","family":"Quinn","sequence":"first","affiliation":[{"name":"Cornell University, Ithaca, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-0460-8230","authenticated-orcid":false,"given":"E. Ezgi","family":"Y\u00fccel","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-4348-236X","authenticated-orcid":false,"given":"Martin","family":"Prammer","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3346-2136","authenticated-orcid":false,"given":"Zhenxing","family":"Fan","sequence":"additional","affiliation":[{"name":"University of Virginia, Charlottesville, VA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8091-9302","authenticated-orcid":false,"given":"Kevin","family":"Skadron","sequence":"additional","affiliation":[{"name":"University of Virginia, Charlottesville, VA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3653-2538","authenticated-orcid":false,"given":"Jignesh M.","family":"Patel","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5451-5681","authenticated-orcid":false,"given":"Jos\u00e9 F.","family":"Mart\u00ednez","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4622-2181","authenticated-orcid":false,"given":"Mohammad","family":"Alian","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, NY, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,6,20]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"[n. d.]. LPDDR5 Tutorial: Deep dive into its physical structure. https:\/\/www.systemverilog.io\/design\/lpddr5-tutorial-physical-structure\/. Accessed Feb. 21 2025."},{"key":"e_1_3_3_2_3_2","unstructured":"2024. CXL Is Dead In The AI Era. Semianalysis (2024). https:\/\/www.semianalysis.com\/p\/cxl-is-dead-in-the-ai-era"},{"key":"e_1_3_3_2_4_2","unstructured":"Megha Agarwal Asfandyar Qureshi Nikhil Sardana Linden Li Julian Quevedo and Daya Khudia. 2023. LLM Inference Performance Engineering: Best Practices. https:\/\/www.databricks.com\/blog\/llm-inference-performance-engineering-best-practices. Online; accessed 2025-02-22."},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","unstructured":"Yeonchan Ahn Sang-Goo Lee Junho Shim and Jaehui Park. 2022. Retrieval-Augmented Response Generation for Knowledge-Grounded Conversation in the Wild. IEEE Access 10 (2022) 131374\u2013131385. 10.1109\/ACCESS.2022.3228964","DOI":"10.1109\/ACCESS.2022.3228964"},{"key":"e_1_3_3_2_6_2","unstructured":"Nomic AI. [n. d.]. nomic-embed-text-v1.5. https:\/\/huggingface.co\/nomic-ai\/nomic-embed-text-v1.5. Accessed: 2025-02-12."},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"publisher","DOI":"10.5555\/2969239.2969376"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/2020408.2020576"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.5555\/313559.313768"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-68474-1_3"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248038"},{"key":"e_1_3_3_2_12_2","unstructured":"Payal Bajaj Daniel Campos Nick Craswell Li Deng Jianfeng Gao Xiaodong Liu Rangan Majumder Andrew McNamara Bhaskar Mitra Tri Nguyen Mir Rosenberg Xia Song Alina Stoica Saurabh Tiwary and Tong Wang. 2018. MS MARCO: A Human Generated MAchine Reading COmprehension Dataset. arxiv:https:\/\/arXiv.org\/abs\/1611.09268\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/1611.09268"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","unstructured":"Giovanni Bonetta Rossella Cancelliere Ding Liu and Paul Vozila. 2021. Retrieval-Augmented Transformer-XL for Close-Domain Dialog Generation. The International FLAIRS Conference Proceedings 34 (Apr. 2021). 10.32473\/flairs.v34i1.128369","DOI":"10.32473\/flairs.v34i1.128369"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1124"},{"key":"e_1_3_3_2_15_2","unstructured":"Qi Chen Bing Zhao Haidong Wang Mingqin Li Chuanjie Liu Zengzhong Li Mao Yang and Jingdong Wang. 2021. SPANN: Highly-efficient Billion-scale Approximate Nearest Neighbor Search. CoRR abs\/2111.08566 (2021). arXiv:https:\/\/arXiv.org\/abs\/2111.08566https:\/\/arxiv.org\/abs\/2111.08566"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.375"},{"key":"e_1_3_3_2_17_2","unstructured":"Google Cloud. 2023. Best practices with large language models (LLMs). https:\/\/cloud.google.com\/vertex-ai\/generative-ai\/docs\/learn\/prompt-best-practices. Online; accessed 2025-02-22."},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","unstructured":"William\u00a0J. Dally Yatish Turakhia and Song Han. 2020. Domain-specific hardware accelerators. Commun. ACM 63 7 (2020) 48\u201357. 10.1145\/3361682","DOI":"10.1145\/3361682"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/ReCoSoC48741.2019.9034938"},{"key":"e_1_3_3_2_20_2","unstructured":"Sanjoy Dasgupta and Kaushik Sinha. 2013. Randomized partition trees for exact nearest neighbor search. CoRR abs\/1302.1948 (2013). arXiv:https:\/\/arXiv.org\/abs\/1302.1948http:\/\/arxiv.org\/abs\/1302.1948"},{"key":"e_1_3_3_2_21_2","volume-title":"Proceedings of the 31st Hot Chips Symposium (HC31)","author":"Devaux Fabrice","year":"2019","unstructured":"Fabrice Devaux. 2019. UPMEM Processing in Memory: DRAM is Becoming a True Processing Unit. In Proceedings of the 31st Hot Chips Symposium (HC31). Stanford, CA, USA. https:\/\/old.hotchips.org\/hc31\/HC31_1.4_UPMEM.FabriceDevaux.v2_1.pdf Accessed: November 23, 2024."},{"key":"e_1_3_3_2_22_2","unstructured":"Angela\u00a0Fan Fabio\u00a0Petroni Aleksandra\u00a0Piktus. [n. d.]. Introducing KILT a new unified benchmark for knowledge-intensive NLP tasks \u2014 ai.meta.com. Meta AI Blog ([n. d.]). https:\/\/ai.meta.com\/blog\/introducing-kilt-a-new-unified-benchmark-for-knowledge-intensive-nlp-tasks\/ [Accessed 22-11-2023]."},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589348"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","unstructured":"Cong Fu Chao Xiang Changxu Wang and Deng Cai. 2019. Fast approximate nearest neighbor search with the navigating spreading-out graph. Proc. VLDB Endow. 12 5 (Jan. 2019) 461\u2013474. 10.14778\/3303753.3303754","DOI":"10.14778\/3303753.3303754"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","unstructured":"Daniel Gillick Sayali Kulkarni Larry Lansing Alessandro Presta Jason Baldridge Eugene Ie and Diego Garcia-Olano. 2019. Learning Dense Representations for Entity Retrieval. arXiv (2019). 10.48550\/arXiv.1909.10506 arxiv:https:\/\/arXiv.org\/abs\/1909.10506","DOI":"10.48550\/arXiv.1909.10506"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995432"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.70"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2014.6757323"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO56248.2022.00058"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","DOI":"10.1145\/276698.276876"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.eacl-main.74"},{"key":"e_1_3_3_2_32_2","unstructured":"Herv\u00e9 Jegou Matthijs Douze and Jeff Johnson. [n. d.]. Faiss: A library for efficient similarity search \u2014 engineering.fb.com. Meta Engineering Blog ([n. d.]). https:\/\/engineering.fb.com\/2017\/03\/29\/data-infrastructure\/faiss-a-library-for-efficient-similarity-search\/ [Accessed 12-11-2023]."},{"key":"e_1_3_3_2_33_2","unstructured":"Wenqi Jiang Shuai Zhang Boran Han Jie Wang Bernie Wang and Tim Kraska. 2024. PipeRAG: Fast Retrieval-Augmented Generation via Algorithm-System Co-design. (2024). arxiv:https:\/\/arXiv.org\/abs\/2403.05676\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2403.05676"},{"key":"e_1_3_3_2_34_2","unstructured":"Jeff Johnson and Matthijs Douze. 2023. Faiss on the GPU. https:\/\/github.com\/facebookresearch\/faiss\/wiki\/Faiss-on-the-GPU"},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"publisher","unstructured":"Herve J\u00e9gou Matthijs Douze and Cordelia Schmid. 2011. Product Quantization for Nearest Neighbor Search. IEEE Transactions on Pattern Analysis and Machine Intelligence 33 1 (2011) 117\u2013128. 10.1109\/TPAMI.2010.57","DOI":"10.1109\/TPAMI.2010.57"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.298"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"publisher","unstructured":"Liu Ke Xuan Zhang Jinin So Jong-Geon Lee Shin-Haeng Kang Sukhan Lee Songyi Han YeonGon Cho Jin\u00a0Hyun Kim Yongsuk Kwon KyungSoo Kim Jin Jung Ilkwon Yun Sung\u00a0Joo Park Hyunsun Park Joonho Song Jeonghyeon Cho Kyomin Sohn Nam\u00a0Sung Kim and Hsien-Hsin\u00a0S. Lee. 2022. Near-Memory Processing in Action: Accelerating Personalized Recommendation With AxDIMM. IEEE Micro 42 1 (Jan. 2022) 116\u2013127. 10.1109\/MM.2021.3097700Conference Name: IEEE Micro.","DOI":"10.1109\/MM.2021.3097700"},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"publisher","unstructured":"Ji-Hoon Kim Yeo-Reum Park Jaeyoung Do Soo-Young Ji and Joo-Young Kim. 2023. Accelerating Large-Scale Graph-Based Nearest Neighbor Search on a Computational Storage Platform. IEEE Trans. Comput. 72 1 (2023) 278\u2013290. 10.1109\/TC.2022.3155956","DOI":"10.1109\/TC.2022.3155956"},{"key":"e_1_3_3_2_40_2","series-title":"WSC5 \u201900","first-page":"4","volume-title":"Proceedings of the 5th Online World Conference on Soft Computing in Industrial Applications","volume":"1","author":"K\u00f6ppen Mario","year":"2000","unstructured":"Mario K\u00f6ppen. 2000. The Curse of Dimensionality. In Proceedings of the 5th Online World Conference on Soft Computing in Industrial Applications(WSC5 \u201900, Vol.\u00a01). Online World Conference on Soft Computing, 4\u20138."},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00013"},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA53966.2022.00021"},{"key":"e_1_3_3_2_43_2","series-title":"(NIPS \u201920)","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems","author":"Lewis Patrick","year":"2020","unstructured":"Patrick Lewis, Ethan Perez, Aleksandra Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich K\u00fcttler, Mike Lewis, Wen-tau Yih, Tim Rockt\u00e4schel, Sebastian Riedel, and Douwe Kiela. 2020. Retrieval-augmented generation for knowledge-intensive NLP tasks. In Proceedings of the 34th International Conference on Neural Information Processing Systems (Vancouver, BC, Canada) (NIPS \u201920). Curran Associates Inc., Red Hook, NY, USA, Article 793, 16\u00a0pages."},{"key":"e_1_3_3_2_44_2","doi-asserted-by":"publisher","unstructured":"Shang Li Zhiyuan Yang Dhiraj Reddy Ankur Srivastava and Bruce Jacob. 2020. DRAMsim3: A Cycle-Accurate Thermal-Capable DRAM Simulator. IEEE Computer Architecture Letters 19 2 (2020) 106\u2013109. 10.1109\/LCA.2020.2973991","DOI":"10.1109\/LCA.2020.2973991"},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"publisher","DOI":"10.1145\/3620665.3640360"},{"key":"e_1_3_3_2_46_2","unstructured":"Locuza. 2022. Die Analysis: Samsung Exynos 2200 with RDNA2 Graphics. https:\/\/locuza.substack.com\/p\/die-analysis-samsung-exynos-2200. Accessed: 2024-11-22."},{"key":"e_1_3_3_2_47_2","doi-asserted-by":"publisher","DOI":"10.1145\/2818950.2818951"},{"key":"e_1_3_3_2_48_2","doi-asserted-by":"publisher","unstructured":"Haocong Luo Yahya\u00a0Can Tu\u011frul F.\u00a0Nisa Bostanc\u0131 Ataberk Olgun A.\u00a0Giray Ya\u011fl\u0131k\u00e7\u0131 and Onur Mutlu. 2024. Ramulator 2.0: A Modern Modular and Extensible DRAM Simulator. IEEE Comput. Archit. Lett. 23 1 (Jan. 2024) 112\u2013116. 10.1109\/LCA.2023.3333759","DOI":"10.1109\/LCA.2023.3333759"},{"key":"e_1_3_3_2_49_2","doi-asserted-by":"publisher","unstructured":"Yury Malkov Alexander Ponomarenko Andrey Logvinov and Vladimir Krylov. 2014. Approximate nearest neighbor algorithm based on navigable small world graphs. Information Systems 45 (Sept. 2014) 61\u201368. 10.1016\/j.is.2013.10.006","DOI":"10.1016\/j.is.2013.10.006"},{"key":"e_1_3_3_2_50_2","doi-asserted-by":"publisher","unstructured":"Yu\u00a0A. Malkov and D.\u00a0A. Yashunin. 2020. Efficient and Robust Approximate Nearest Neighbor Search Using Hierarchical Navigable Small World Graphs. IEEE Transactions on Pattern Analysis and Machine Intelligence 42 4 (2020) 824\u2013836. 10.1109\/TPAMI.2018.2889473","DOI":"10.1109\/TPAMI.2018.2889473"},{"key":"e_1_3_3_2_51_2","unstructured":"Luke Merrick. 2024. Embedding And Clustering Your Data Can Improve Contrastive Pretraining. arxiv (2024). arxiv:https:\/\/arXiv.org\/abs\/2407.18887\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2407.18887"},{"key":"e_1_3_3_2_52_2","doi-asserted-by":"crossref","unstructured":"Niklas Muennighoff Nouamane Tazi Lo\u00efc Magne and Nils Reimers. 2022. MTEB: Massive text embedding benchmark. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2210.07316 (2022).","DOI":"10.18653\/v1\/2023.eacl-main.148"},{"key":"e_1_3_3_2_53_2","unstructured":"Hiroyuki Ootomo Akira Naruse Corey Nolet Ray Wang Tamas Feher and Yong Wang. 2024. CAGRA: Highly Parallel Graph Construction and Approximate Nearest Neighbor Search for GPUs. arxiv:https:\/\/arXiv.org\/abs\/2308.15136\u00a0[cs.DS] https:\/\/arxiv.org\/abs\/2308.15136"},{"key":"e_1_3_3_2_54_2","unstructured":"OpenAI. 2023. ChatGPT plugins. OpenAI Blog (2023). https:\/\/openai.com\/blog\/chatgpt-plugins"},{"key":"e_1_3_3_2_55_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS61541.2024.00015"},{"key":"e_1_3_3_2_56_2","doi-asserted-by":"publisher","unstructured":"James\u00a0Jie Pan Jianguo Wang and Guoliang Li. 2024. Survey of vector database management systems. The VLDB Journal 33 5 (jul 2024) 1591\u20131615. 10.1007\/s00778-024-00864-x","DOI":"10.1007\/s00778-024-00864-x"},{"key":"e_1_3_3_2_57_2","doi-asserted-by":"publisher","DOI":"10.1145\/3620665.3640422"},{"key":"e_1_3_3_2_58_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA57654.2024.00078"},{"key":"e_1_3_3_2_59_2","unstructured":"Dylan Patel. 2022. Apple M2 Die Shot and Architecture Analysis \u2013 Big Cost Increase And A15 Based IP. SemiAnalysis (June 2022). https:\/\/www.semianalysis.com\/p\/apple-m2-die-shot-and-architecture"},{"key":"e_1_3_3_2_60_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD51958.2021.9643528"},{"key":"e_1_3_3_2_61_2","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"e_1_3_3_2_62_2","doi-asserted-by":"publisher","DOI":"10.1145\/3669940.3707264"},{"key":"e_1_3_3_2_63_2","unstructured":"TREC RAG. 2024. TREC RAG 2024 Corpus Finalization. https:\/\/trec-rag.github.io\/annoucements\/2024-corpus-finalization\/. Accessed: 2024-11-23."},{"key":"e_1_3_3_2_64_2","first-page":"29","volume-title":"Proceedings of the first instructional conference on machine learning","volume":"242","author":"Ramos Juan","year":"2003","unstructured":"Juan Ramos et\u00a0al. 2003. Using TF-IDF to determine word relevance in document queries. In Proceedings of the first instructional conference on machine learning, Vol.\u00a0242. Citeseer, 29\u201348."},{"key":"e_1_3_3_2_65_2","unstructured":"RAPIDS AI. 2025. cuVS: GPU-Accelerated Vector Search and Clustering. https:\/\/github.com\/rapidsai\/cuvs. Accessed: 2025-05-09."},{"key":"e_1_3_3_2_66_2","volume-title":"Text Retrieval Conference","author":"Robertson Stephen\u00a0E.","year":"1994","unstructured":"Stephen\u00a0E. Robertson, Steve Walker, Susan Jones, Micheline Hancock-Beaulieu, and Mike Gatford. 1994. Okapi at TREC-3. In Text Retrieval Conference. https:\/\/api.semanticscholar.org\/CorpusID:3946054"},{"key":"e_1_3_3_2_67_2","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591629"},{"key":"e_1_3_3_2_68_2","doi-asserted-by":"publisher","DOI":"10.1145\/3578337.3605137"},{"key":"e_1_3_3_2_69_2","doi-asserted-by":"publisher","unstructured":"Gerard Salton and Christopher Buckley. 1988. Term-Weighting Approaches in Automatic Text Retrieval. Inf. Process. Manage. 24 5 (Aug. 1988) 513\u2013523. 10.1016\/0306-4573(88)90021-0","DOI":"10.1016\/0306-4573(88)90021-0"},{"key":"e_1_3_3_2_70_2","volume-title":"The Twenty-Seventh International Flairs Conference","author":"Schuh Michael\u00a0A","year":"2014","unstructured":"Michael\u00a0A Schuh, Tim Wylie, and Rafal\u00a0A Angryk. 2014. Mitigating the curse of dimensionality for exact kNN retrieval. In The Twenty-Seventh International Flairs Conference."},{"key":"e_1_3_3_2_71_2","doi-asserted-by":"crossref","unstructured":"Karen Sparck\u00a0Jones. 1972. A statistical interpretation of term specificity and its application in retrieval. Journal of documentation 28 1 (1972) 11\u201321.","DOI":"10.1108\/eb026526"},{"key":"e_1_3_3_2_72_2","unstructured":"Heidi Steen and Dan Wahlin. 2023. Retrieval Augumented Generation Overview. Microsoft Learn (2023). https:\/\/learn.microsoft.com\/en-us\/azure\/search\/retrieval-augmented-generation-overview"},{"key":"e_1_3_3_2_73_2","doi-asserted-by":"publisher","unstructured":"Aaron Stillmaker and Bevan Baas. 2017. Scaling equations for the accurate prediction of CMOS device performance from 180nm to 7nm. Integration 58 (2017) 74\u201381. 10.1016\/j.vlsi.2017.02.002","DOI":"10.1016\/j.vlsi.2017.02.002"},{"key":"e_1_3_3_2_74_2","doi-asserted-by":"publisher","unstructured":"Jovan Stojkovic Esha Choukse Chaojie Zhang Inigo Goiri and Josep Torrellas. 2024. Towards Greener LLMs: Bringing Energy-Efficiency to the Forefront of LLM Inference. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.20306 (2024). arxiv:https:\/\/arXiv.org\/abs\/2403.20306\u00a0[cs.AI] 10.48550\/arXiv.2403.20306","DOI":"10.48550\/arXiv.2403.20306"},{"key":"e_1_3_3_2_75_2","doi-asserted-by":"publisher","DOI":"10.1109\/IEDM.2017.8268306"},{"key":"e_1_3_3_2_76_2","doi-asserted-by":"publisher","unstructured":"Gemini Team. 2024. Gemini: A Family of Highly Capable Multimodal Models. arXiv (2024). arxiv:https:\/\/arXiv.org\/abs\/2312.11805\u00a0[cs.CL] 10.48550\/arXiv.2312.11805","DOI":"10.48550\/arXiv.2312.11805"},{"key":"e_1_3_3_2_77_2","unstructured":"TechInsights. 2025. Samsung 1a 16Gb LPDDR5X DRAM Transistor. https:\/\/www.techinsights.com\/blog\/samsung-1a-16gb-lpddr5x-dram-transistor. Accessed: 2025-02-20."},{"key":"e_1_3_3_2_78_2","unstructured":"Nandan Thakur Nils Reimers Andreas R\u00fcckl\u00e9 Abhishek Srivastava and Iryna Gurevych. 2021. Beir: A heterogenous benchmark for zero-shot evaluation of information retrieval models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2104.08663 (2021)."},{"key":"e_1_3_3_2_79_2","doi-asserted-by":"publisher","DOI":"10.1145\/2393347.2393378"},{"key":"e_1_3_3_2_80_2","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3457550"},{"key":"e_1_3_3_2_81_2","doi-asserted-by":"publisher","unstructured":"Mengzhao Wang Xiaoliang Xu Qiang Yue and Yuxiang Wang. 2021. A comprehensive survey and experimental comparison of graph-based approximate nearest neighbor search. Proc. VLDB Endow. 14 11 (July 2021) 1964\u20131978. 10.14778\/3476249.3476255","DOI":"10.14778\/3476249.3476255"},{"key":"e_1_3_3_2_82_2","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2312.03141"},{"key":"e_1_3_3_2_83_2","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3657357"},{"key":"e_1_3_3_2_84_2","doi-asserted-by":"publisher","unstructured":"Lee Xiong Chenyan Xiong Ye Li Kwok-Fung Tang Jialin Liu Paul Bennett Junaid Ahmed and Arnold Overwijk. 2020. Approximate Nearest Neighbor Negative Contrastive Learning for Dense Text Retrieval. arXiv (2020). 10.48550\/arXiv.2007.00808 arxiv:https:\/\/arXiv.org\/abs\/2007.00808","DOI":"10.48550\/arXiv.2007.00808"},{"key":"e_1_3_3_2_85_2","unstructured":"Zhaozhuo Xu Weijie Zhao Shulong Tan Zhixin Zhou and Ping Li. 2022. Proximity graph maintenance for fast online nearest neighbor search. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2206.10839 (2022)."},{"key":"e_1_3_3_2_86_2","doi-asserted-by":"publisher","unstructured":"Shi-Qi Yan Jia-Chen Gu Yun Zhu and Zhen-Hua Ling. 2024. Corrective Retrieval Augmented Generation. arXiv (2024). 10.48550\/arXiv.2401.15884 arxiv:https:\/\/arXiv.org\/abs\/2401.15884\u00a0[cs.CL]","DOI":"10.48550\/arXiv.2401.15884"},{"key":"e_1_3_3_2_87_2","doi-asserted-by":"publisher","unstructured":"Wei Yuan and Xi Jin. 2025. FANNS: An FPGA-Based Approximate Nearest-Neighbor Search Accelerator. IEEE Transactions on Very Large Scale Integration (VLSI) Systems 33 4 (2025) 1197\u20131201. 10.1109\/TVLSI.2024.3496589","DOI":"10.1109\/TVLSI.2024.3496589"},{"key":"e_1_3_3_2_88_2","doi-asserted-by":"publisher","unstructured":"Xi Zhao Yao Tian Kai Huang Bolong Zheng and Xiaofang Zhou. 2023. Towards Efficient Index Construction and Approximate Nearest Neighbor Search in High-Dimensional Spaces. Proc. VLDB Endow. 16 8 (April 2023) 1979\u20131991. 10.14778\/3594512.3594527","DOI":"10.14778\/3594512.3594527"},{"key":"e_1_3_3_2_89_2","doi-asserted-by":"publisher","unstructured":"Yun Zhu Jia-Chen Gu Caitlin Sikora Ho Ko Yinxiao Liu Chu-Cheng Lin Lei Shu Liangchen Luo Lei Meng Bang Liu and Jindong Chen. 2024. Accelerating Inference of Retrieval-Augmented Generation via Sparse Context Selection. arXiv (2024). 10.48550\/arXiv.2405.16178","DOI":"10.48550\/arXiv.2405.16178"}],"event":{"name":"ISCA '25: Proceedings of the 52nd Annual International Symposium on Computer Architecture","location":"Tokyo Japan","acronym":"SIGARCH '25","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 52nd Annual International Symposium on Computer Architecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3695053.3731079","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3695053.3731079","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,21]],"date-time":"2025-06-21T11:09:34Z","timestamp":1750504174000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3695053.3731079"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,20]]},"references-count":88,"alternative-id":["10.1145\/3695053.3731079","10.1145\/3695053"],"URL":"https:\/\/doi.org\/10.1145\/3695053.3731079","relation":{},"subject":[],"published":{"date-parts":[[2025,6,20]]},"assertion":[{"value":"2025-06-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}