{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T02:03:57Z","timestamp":1771466637673,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,8,20]],"date-time":"2024-08-20T00:00:00Z","timestamp":1724112000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,8,20]]},"DOI":"10.1145\/3685650.3685658","type":"proceedings-article","created":{"date-parts":[[2024,9,18]],"date-time":"2024-09-18T10:19:28Z","timestamp":1726654768000},"page":"1-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["LexBoost: Improving Lexical Document Retrieval with Nearest Neighbors"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-8900-3118","authenticated-orcid":false,"given":"Hrishikesh","family":"Kulkarni","sequence":"first","affiliation":[{"name":"Georgetown University, Washington, DC, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6349-5237","authenticated-orcid":false,"given":"Nazli","family":"Goharian","sequence":"additional","affiliation":[{"name":"Georgetown University, Washington, DC, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5076-8171","authenticated-orcid":false,"given":"Ophir","family":"Frieder","sequence":"additional","affiliation":[{"name":"Georgetown University, Washington, DC, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8914-2659","authenticated-orcid":false,"given":"Sean","family":"MacAvaney","sequence":"additional","affiliation":[{"name":"University of Glasgow, Glasgow, UK"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,9,18]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"crossref","unstructured":"Nasreen Abdul-Jaleel et al. 2004. UMass at TREC 2004: Novelty and HARD. Computer Science Department Faculty Publication Series 189. (2004).","DOI":"10.21236\/ADA460118"},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of the ACM Symposium on Document Engineering 2023","author":"Antonio","unstructured":"Antonio Acquavia et al. 2023. Static Pruning for Multi-Representation Dense Retrieval. In Proceedings of the ACM Symposium on Document Engineering 2023 (Limerick, Ireland) (DocEng '23)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/582415.582416"},{"key":"e_1_3_2_1_4_1","volume-title":"MS MARCO: A Human Generated MAchine Reading COmprehension Dataset. In InCoCo@NIPS.","author":"Payal Bajaj","year":"2016","unstructured":"Payal Bajaj et al. 2016. MS MARCO: A Human Generated MAchine Reading COmprehension Dataset. In InCoCo@NIPS."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1002\/(SICI)1097-4571(199806)49:8<742::AID-ASI8>3.0.CO;2-H"},{"key":"e_1_3_2_1_6_1","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Tom","year":"1877","unstructured":"Tom Brown et al. 2020. Language Models are Few-Shot Learners. In Advances in Neural Information Processing Systems, Vol. 33. Curran Associates, Inc., 1877--1901."},{"key":"e_1_3_2_1_7_1","article-title":"An Analysis of Fusion Functions for Hybrid Retrieval","volume":"42","author":"Sebastian Bruch","year":"2023","unstructured":"Sebastian Bruch et al. 2023. An Analysis of Fusion Functions for Hybrid Retrieval. ACM Trans. Inf. Syst. 42, 1, Article 20 (aug 2023).","journal-title":"ACM Trans. Inf. Syst."},{"key":"e_1_3_2_1_8_1","volume-title":"Article 1 (jan","author":"Carpineto Claudio","year":"2012","unstructured":"Claudio Carpineto and Giovanni Romano. 2012. A Survey of Automatic Query Expansion in Information Retrieval. ACM Comput. Surv. 44, 1, Article 1 (jan 2012)."},{"key":"e_1_3_2_1_9_1","volume-title":"DREQ: Document Re-ranking Using Entity-Based Query Understanding. In Advances in Information Retrieval: 46th European Conference on Information Retrieval, ECIR 2024","author":"Shubham","year":"2024","unstructured":"Shubham Chatterjee et al. 2024. DREQ: Document Re-ranking Using Entity-Based Query Understanding. In Advances in Information Retrieval: 46th European Conference on Information Retrieval, ECIR 2024, Glasgow, UK, March 24-28, 2024."},{"key":"e_1_3_2_1_10_1","volume-title":"InDi: Informative and Diverse Sampling for Dense Retrieval. In Advances in Information Retrieval: 46th European Conference on Information Retrieval, ECIR 2024","author":"Nachshon","year":"2024","unstructured":"Nachshon Cohen et al. 2024. InDi: Informative and Diverse Sampling for Dense Retrieval. In Advances in Information Retrieval: 46th European Conference on Information Retrieval, ECIR 2024, Glasgow, UK, March 24-28, 2024."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2003.07820"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2102.07662"},{"key":"e_1_3_2_1_13_1","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics)","author":"Jacob","unstructured":"Jacob Devlin et al. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics). Minneapolis, Minnesota, 4171--4186."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1002\/9780470033647"},{"key":"e_1_3_2_1_15_1","volume-title":"Proceedings of the 44th International ACM SIGIR Conference on Research and Development in Information Retrieval","author":"Sebastian","unstructured":"Sebastian Hofst\u00e4tter et al. 2021. Efficiently Teaching an Effective Dense Retriever with Balanced Topic Aware Sampling. In Proceedings of the 44th International ACM SIGIR Conference on Research and Development in Information Retrieval (Virtual Event, Canada) (SIGIR '21). 113--122."},{"key":"e_1_3_2_1_16_1","volume-title":"Proceedings of the 22nd ACM International Conference on Information & Knowledge Management","author":"Po-Sen","unstructured":"Po-Sen Huang et al. 2013. Learning deep structured semantic models for web search using clickthrough data. In Proceedings of the 22nd ACM International Conference on Information & Knowledge Management (San Francisco, California, USA) (CIKM '13). 2333--2338."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/0020-0271(71)90051-9"},{"key":"e_1_3_2_1_18_1","volume-title":"Proceedings of the ACM Symposium on Document Engineering 2023","author":"Hrishikesh","unstructured":"Hrishikesh Kulkarni et al. 2023. Genetic Generative Information Retrieval. In Proceedings of the ACM Symposium on Document Engineering 2023 (Limerick, Ireland) (DocEng '23). Article 8, 4 pages."},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval","author":"Hrishikesh","unstructured":"Hrishikesh Kulkarni et al. 2023. Lexically-Accelerated Dense Retrieval. In Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval (Taipei, Taiwan) (SIGIR '23). 152--162."},{"key":"e_1_3_2_1_20_1","volume-title":"Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval","author":"Haitao","unstructured":"Haitao Li et al. 2023. Constructing Tree-based Index for Efficient and Effective Dense Retrieval. In Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval (Taipei, Taiwan) (SIGIR '23). 131--140."},{"key":"e_1_3_2_1_21_1","volume-title":"Proceedings of the 6th Workshop on Representation Learning for NLP (RepL4NLP-2021)","author":"Sheng-Chieh","unstructured":"Sheng-Chieh Lin et al. 2021. In-Batch Negatives for Knowledge Distillation with Tightly-Coupled Teachers for Dense Retrieval. In Proceedings of the 6th Workshop on Representation Learning for NLP (RepL4NLP-2021). Association for Computational Linguistics, Online, 163--173."},{"key":"e_1_3_2_1_22_1","volume-title":"Web usage mining. Web Data Mining: Exploring Hyperlinks, Contents, and Usage Data","author":"Liu Bing","year":"2007","unstructured":"Bing Liu. 2007. Web usage mining. Web Data Mining: Exploring Hyperlinks, Contents, and Usage Data (2007), 449--483."},{"key":"e_1_3_2_1_23_1","volume-title":"31st ACM International Conference on Information and Knowledge Management.","author":"Sean","unstructured":"Sean MacAvaney et al. 2022. Adaptive Re-Ranking with a Corpus Graph. In 31st ACM International Conference on Information and Knowledge Management."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2889473"},{"key":"e_1_3_2_1_25_1","volume-title":"Annual International ACM SIGIR Conference on Research and Development in Information Retrieval.","author":"Metzler Donald","unstructured":"Donald Metzler and W. Bruce Croft. 2005. A Markov random field model for term dependencies. In Annual International ACM SIGIR Conference on Research and Development in Information Retrieval."},{"key":"e_1_3_2_1_26_1","unstructured":"Tomas Mikolov et al. 2013. Efficient Estimation of Word Representations in Vector Space. In ICLR."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1561\/9781680835335"},{"key":"e_1_3_2_1_28_1","volume-title":"CEQE: Contextualized Embeddings for Query Expansion. In Advances in Information Retrieval: 43rd European Conference on IR Research, ECIR 2021. Springer-Verlag","author":"Shahrzad","unstructured":"Shahrzad Naseri et al. 2021. CEQE: Contextualized Embeddings for Query Expansion. In Advances in Information Retrieval: 43rd European Conference on IR Research, ECIR 2021. Springer-Verlag, Berlin, Heidelberg, 467--482."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"crossref","unstructured":"Sameh Neji et al. 2021. HyRa: An Effective Hybrid Ranking Model. Procedia Comput. Sci. 192 C (Jan 2021) 1111--1120.","DOI":"10.1016\/j.procs.2021.08.114"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","unstructured":"Liang Pang et al. 2016. A Study of MatchPyramid Models on Ad-hoc Retrieval. https:\/\/doi.org\/10.48550\/ARXIV.1606.04648","DOI":"10.48550\/ARXIV.1606.04648"},{"key":"e_1_3_2_1_31_1","volume-title":"Proceedings of the Thirteenth ACM International Conference on Information and Knowledge Management (Washington, D.C., USA) (CIKM '04)","author":"Stephen","unstructured":"Stephen Robertson et al. 2004. Simple BM25 extension to multiple weighted fields. In Proceedings of the Thirteenth ACM International Conference on Information and Knowledge Management (Washington, D.C., USA) (CIKM '04). 42--49."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000019"},{"key":"e_1_3_2_1_33_1","volume-title":"The Smart retrieval system - experiments in automatic document processing","author":"Rocchio J. J.","unstructured":"J. J. Rocchio. 1971. Relevance feedback in information retrieval. In The Smart retrieval system - experiments in automatic document processing, G. Salton (Ed.). Englewood Cliffs, NJ: Prentice-Hall, 313--323."},{"key":"e_1_3_2_1_34_1","volume-title":"Proceedings of the 23rd International Conference on World Wide Web","author":"Yelong","unstructured":"Yelong Shen et al. 2014. Learning semantic representations using convolutional neural networks for web search. In Proceedings of the 23rd International Conference on World Wide Web (Seoul, Korea) (WWW '14 Companion). 373--374."},{"key":"e_1_3_2_1_35_1","volume-title":"Proceedings Ninth IEEE International Conference on Computer Vision. 1470--1477","volume":"2","author":"Zisserman Sivic","year":"2003","unstructured":"Sivic and Zisserman. 2003. Video Google: a text retrieval approach to object matching in videos. In Proceedings Ninth IEEE International Conference on Computer Vision. 1470--1477 vol.2."},{"key":"e_1_3_2_1_36_1","volume-title":"Overview of TREC 2021. In 30th Text REtrieval Conference","author":"Soboroff Ian","year":"2021","unstructured":"Ian Soboroff. 2021. Overview of TREC 2021. In 30th Text REtrieval Conference. Gaithersburg, Maryland."},{"key":"e_1_3_2_1_37_1","volume-title":"Proceedings of the 18th ACM Conference on Information and Knowledge Management","author":"Krysta","unstructured":"Krysta M. Svore and Christopher J.C. Burges. 2009. A machine learning approach for improved BM25 retrieval. In Proceedings of the 18th ACM Conference on Information and Knowledge Management (Hong Kong, China) (CIKM '09). 4 pages."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.2934"},{"key":"e_1_3_2_1_39_1","volume-title":"Advances in Neural Information Processing Systems","volume":"30","author":"Ashish","unstructured":"Ashish Vaswani et al. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems, Vol. 30."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"crossref","unstructured":"Ellen Voorhees et al. 2021. TREC-COVID: Constructing a Pandemic Information Retrieval Test Collection. SIGIR Forum 54 1 Article 1 (feb 2021) 12 pages.","DOI":"10.1145\/3451964.3451965"},{"key":"e_1_3_2_1_41_1","volume-title":"Proceedings of the 1st Workshop on NLP for COVID-19 at ACL","author":"Lu Lucy","year":"2020","unstructured":"Lucy Lu Wang et al. 2020. CORD-19: The COVID-19 Open Research Dataset. In Proceedings of the 1st Workshop on NLP for COVID-19 at ACL 2020. Online."},{"key":"e_1_3_2_1_42_1","article-title":"ColBERT-PRF: Semantic Pseudo-Relevance Feedback for Dense Passage and Document Retrieval","volume":"17","author":"Xiao Wang","year":"2023","unstructured":"Xiao Wang et al. 2023. ColBERT-PRF: Semantic Pseudo-Relevance Feedback for Dense Passage and Document Retrieval. ACM Trans. Web 17, 1, Article 3 (jan 2023), 39 pages.","journal-title":"ACM Trans. Web"},{"key":"e_1_3_2_1_43_1","volume-title":"Proceedings of the 2014 ACM Symposium on Document Engineering","author":"Kyle","unstructured":"Kyle Williams et al. 2014. SimSeerX: a similar document search engine. In Proceedings of the 2014 ACM Symposium on Document Engineering (Fort Collins, Colorado, USA) (DocEng '14). 143--146."},{"key":"e_1_3_2_1_44_1","unstructured":"Lee Xiong et al. 2020. Approximate Nearest Neighbor Negative Contrastive Learning for Dense Text Retrieval. In ICLR."},{"key":"e_1_3_2_1_45_1","volume-title":"Proceedings of the 30th ACM International Conference on Information & Knowledge Management (Virtual Event","unstructured":"HongChien Yu et al. 2021. Improving Query Representations for Dense Retrieval with Pseudo Relevance Feedback. In Proceedings of the 30th ACM International Conference on Information & Knowledge Management (Virtual Event, Queensland, Australia) (CIKM '21). 3592--3596."},{"key":"e_1_3_2_1_46_1","volume-title":"Workshops and Demonstrations - Florence, Italy.","volume":"7583","author":"Jiangbo","unstructured":"Jiangbo Yuan et al. 2012. Efficient Mining of Repetitions in Large-Scale TV Streams with Product Quantization Hashing. In Computer Vision - ECCV 2012. Workshops and Demonstrations - Florence, Italy., Vol. 7583. Springer, 271--280."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/502585.502654"}],"event":{"name":"DocEng '24: ACM Symposium on Document Engineering 2024","location":"San Jose CA USA","acronym":"DocEng '24","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM Symposium on Document Engineering 2024"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3685650.3685658","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3685650.3685658","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:06:18Z","timestamp":1750291578000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3685650.3685658"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,20]]},"references-count":47,"alternative-id":["10.1145\/3685650.3685658","10.1145\/3685650"],"URL":"https:\/\/doi.org\/10.1145\/3685650.3685658","relation":{},"subject":[],"published":{"date-parts":[[2024,8,20]]},"assertion":[{"value":"2024-09-18","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}