{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T23:31:34Z","timestamp":1780356694203,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":29,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,3,4]],"date-time":"2024-03-04T00:00:00Z","timestamp":1709510400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100006374","name":"Natural Sciences and Engineering Research Council of Canada","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,3,4]]},"DOI":"10.1145\/3616855.3635691","type":"proceedings-article","created":{"date-parts":[[2024,3,4]],"date-time":"2024-03-04T18:18:12Z","timestamp":1709576292000},"page":"1090-1093","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":28,"title":["Vector Search with OpenAI Embeddings: Lucene Is All You Need"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-2740-6120","authenticated-orcid":false,"given":"Jasper","family":"Xian","sequence":"first","affiliation":[{"name":"University of Waterloo, Waterloo, Ontario, Canada"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4372-0273","authenticated-orcid":false,"given":"Tommaso","family":"Teofili","sequence":"additional","affiliation":[{"name":"Roma Tre University, Rome, Italy"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6296-601X","authenticated-orcid":false,"given":"Ronak","family":"Pradeep","sequence":"additional","affiliation":[{"name":"University of Waterloo, Waterloo, Ontario, Canada"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0661-7189","authenticated-orcid":false,"given":"Jimmy","family":"Lin","sequence":"additional","affiliation":[{"name":"University of Waterloo, Waterloo, Ontario, Canada"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,3,4]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"crossref","unstructured":"A. Asai et al. 2023. Retrieval-based Language Models and Applications. In ACL Tutorials.","DOI":"10.18653\/v1\/2023.acl-tutorials.6"},{"key":"e_1_3_2_1_2_1","volume-title":"MS MARCO: A Human Generated MAchine Reading COmprehension Dataset. arXiv:1611.09268v3","author":"Bajaj P.","year":"2018","unstructured":"P. Bajaj et al. 2018. MS MARCO: A Human Generated MAchine Reading COmprehension Dataset. arXiv:1611.09268v3 (2018)."},{"key":"e_1_3_2_1_3_1","unstructured":"Haonan Chen et al. 2023. End-to-End Retrieval with Learned Dense and Sparse Representations Using Lucene. arXiv:2311.18503 (2023)."},{"key":"e_1_3_2_1_4_1","volume-title":"Overview of the TREC 2019 Deep Learning Track. In TREC.","author":"Craswell N.","year":"2019","unstructured":"N. Craswell et al. 2019. Overview of the TREC 2019 Deep Learning Track. In TREC."},{"key":"e_1_3_2_1_5_1","volume-title":"Overview of the TREC 2020 Deep Learning Track. In TREC.","author":"Craswell N.","year":"2020","unstructured":"N. Craswell et al. 2020. Overview of the TREC 2020 Deep Learning Track. In TREC."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"J. Devins et al. 2022. Aligning the Research and Practice of Building Search Applications: Elasticsearch and Pyserini. In WSDM.","DOI":"10.1145\/3488560.3502186"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"T. Formal et al. 2022. From Distillation to Hard Negative Sampling: Making Sparse Neural IR Models More Effective. In SIGIR. 2353--2359.","DOI":"10.1145\/3477495.3531857"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"S. Hofst\"atter et al. 2021. Efficiently Teaching an Effective Dense Retriever with Balanced Topic Aware Sampling. In SIGIR.","DOI":"10.1145\/3404835.3462891"},{"key":"e_1_3_2_1_9_1","unstructured":"G. Izacard et al. 2021. Towards Unsupervised Dense Information Retrieval with Contrastive Learning. arXiv:2112.09118 (2021)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2019.2921572"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"E. Kamalloo et al. 2023. Evaluating Embedding APIs for Information Retrieval. In ACL Industry Track.","DOI":"10.18653\/v1\/2023.acl-industry.50"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"V. Karpukhin et al. 2020. Dense Passage Retrieval for Open-Domain Question Answering. In EMNLP.","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"e_1_3_2_1_13_1","volume-title":"A Proposed Conceptual Framework for a Representational Approach to Information Retrieval. arXiv:2110.01529","author":"Lin J.","year":"2021","unstructured":"J. Lin. 2021. A Proposed Conceptual Framework for a Representational Approach to Information Retrieval. arXiv:2110.01529 (2021)."},{"key":"e_1_3_2_1_14_1","volume-title":"Building a Culture of Reproducibility in Academic Research. arXiv:2212.13534","author":"Lin J.","year":"2022","unstructured":"J. Lin. 2022. Building a Culture of Reproducibility in Academic Research. arXiv:2212.13534 (2022)."},{"key":"e_1_3_2_1_15_1","volume-title":"Pyserini: A Python Toolkit for Reproducible Information Retrieval Research with Sparse and Dense Representations. In SIGIR.","author":"Lin J.","year":"2021","unstructured":"J. Lin et al. 2021a. Pyserini: A Python Toolkit for Reproducible Information Retrieval Research with Sparse and Dense Representations. In SIGIR."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"S. Lin et al. 2021b. In-Batch Negatives for Knowledge Distillation with Tightly-Coupled Teachers for Dense Retrieval. In RepL4NLP Workshop.","DOI":"10.18653\/v1\/2021.repl4nlp-1.17"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00556"},{"key":"e_1_3_2_1_18_1","volume-title":"ACM Transactions on Information Systems","volume":"41","author":"Lin S.","year":"2023","unstructured":"S. Lin and J. Lin. 2023. A Dense Representation Framework for Lexical and Semantic Matching. ACM Transactions on Information Systems , Vol. 41 (2023). Issue 4."},{"key":"e_1_3_2_1_19_1","volume-title":"DPR: Reproduction of Training and Replication of Retrieval. In ECIR.","author":"Ma X.","year":"2022","unstructured":"X. Ma et al. 2022a. Another Look at DPR: Reproduction of Training and Replication of Retrieval. In ECIR."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"X. Ma et al. 2022b. Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2. In SIGIR.","DOI":"10.1145\/3477495.3531749"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"X. Ma et al. 2023. Anserini Gets Dense Retrieval: Integration of Lucene's HNSW Indexes. In CIKM.","DOI":"10.1145\/3583780.3615112"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2889473"},{"key":"e_1_3_2_1_23_1","unstructured":"G. Mialon et al. 2023. Augmented Language Models: a Survey. arXiv:2302.07842 (2023)."},{"key":"e_1_3_2_1_24_1","unstructured":"A. Neelakantan et al. 2022. Text and Code Embeddings by Contrastive Pre-Training. arXiv:2201.10005 (2022)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"A. Pavlo et al. 2009. A Comparison of Approaches to Large-Scale Data Analysis. In SIGMOD.","DOI":"10.1145\/1559845.1559865"},{"key":"e_1_3_2_1_26_1","unstructured":"R. Pradeep et al. 2021. The Expando-Mono-Duo Design Pattern for Text Ranking with Pretrained Sequence-to-Sequence Models. arXiv:2101.05667 (2021)."},{"key":"e_1_3_2_1_27_1","unstructured":"M. Stonebraker and J. M. Hellerstein. 2005. What Goes Around Comes Around."},{"key":"e_1_3_2_1_28_1","unstructured":"L. Xiong et al. 2021. Approximate Nearest Neighbor Negative Contrastive Learning for Dense Text Retrieval. In ICLR."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3239571"}],"event":{"name":"WSDM '24: The 17th ACM International Conference on Web Search and Data Mining","location":"Merida Mexico","acronym":"WSDM '24","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 17th ACM International Conference on Web Search and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3616855.3635691","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3616855.3635691","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:49:48Z","timestamp":1755823788000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3616855.3635691"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,4]]},"references-count":29,"alternative-id":["10.1145\/3616855.3635691","10.1145\/3616855"],"URL":"https:\/\/doi.org\/10.1145\/3616855.3635691","relation":{},"subject":[],"published":{"date-parts":[[2024,3,4]]},"assertion":[{"value":"2024-03-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}