{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T18:14:57Z","timestamp":1763748897995,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":57,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,4,30]],"date-time":"2023-04-30T00:00:00Z","timestamp":1682812800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,4,30]]},"DOI":"10.1145\/3543507.3583294","type":"proceedings-article","created":{"date-parts":[[2023,4,26]],"date-time":"2023-04-26T23:30:51Z","timestamp":1682551851000},"page":"3203-3213","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["LED: Lexicon-Enlightened Dense Retriever for Large-Scale Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3850-5429","authenticated-orcid":false,"given":"Kai","family":"Zhang","sequence":"first","affiliation":[{"name":"The Ohio State University, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4162-2119","authenticated-orcid":false,"given":"Chongyang","family":"Tao","sequence":"additional","affiliation":[{"name":"Microsoft, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3315-2468","authenticated-orcid":false,"given":"Tao","family":"Shen","sequence":"additional","affiliation":[{"name":"University of Technology Sydney, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1949-5715","authenticated-orcid":false,"given":"Can","family":"Xu","sequence":"additional","affiliation":[{"name":"Microsoft, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6477-7933","authenticated-orcid":false,"given":"Xiubo","family":"Geng","sequence":"additional","affiliation":[{"name":"Microsoft, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4710-0095","authenticated-orcid":false,"given":"Binxing","family":"Jiao","sequence":"additional","affiliation":[{"name":"Microsoft, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6657-5806","authenticated-orcid":false,"given":"Daxin","family":"Jiang","sequence":"additional","affiliation":[{"name":"Microsoft, China"}]}],"member":"320","published-online":{"date-parts":[[2023,4,30]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Injecting the BM25 Score as Text Improves BERT-Based Re-rankers. arXiv preprint arXiv:2301.09728","author":"Askari Arian","year":"2023","unstructured":"Arian Askari, Amin Abolghasemi, Gabriella Pasi, Wessel Kraaij, and Suzan Verberne. 2023. Injecting the BM25 Score as Text Improves BERT-Based Re-rankers. arXiv preprint arXiv:2301.09728 (2023)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/1102351.1102363"},{"key":"e_1_3_2_1_3_1","volume-title":"Semantic Models for the First-stage Retrieval: A Comprehensive Review. CoRR abs\/2103.04831","author":"Cai Yinqiong","year":"2021","unstructured":"Yinqiong Cai, Yixing Fan, Jiafeng Guo, Fei Sun, Ruqing Zhang, and Xueqi Cheng. 2021. Semantic Models for the First-stage Retrieval: A Comprehensive Review. CoRR abs\/2103.04831 (2021). arXiv:2103.04831https:\/\/arxiv.org\/abs\/2103.04831"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1273496.1273513"},{"key":"e_1_3_2_1_5_1","volume-title":"Salient Phrase Aware Dense Retrieval: Can a Dense Retriever Imitate a Sparse One?CoRR abs\/2110.06918","author":"Chen Xilun","year":"2021","unstructured":"Xilun Chen, Kushal Lakhotia, Barlas Oguz, Anchit Gupta, Patrick S.\u00a0H. Lewis, Stan Peshterliev, Yashar Mehdad, Sonal Gupta, and Wen-tau Yih. 2021. Salient Phrase Aware Dense Retrieval: Can a Dense Retriever Imitate a Sparse One?CoRR abs\/2110.06918 (2021). arXiv:2110.06918https:\/\/arxiv.org\/abs\/2110.06918"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.6028\/NIST.SP.1266.deep-overview"},{"key":"e_1_3_2_1_7_1","volume-title":"Overview of the TREC 2019 deep learning track. CoRR abs\/2003","author":"Craswell Nick","year":"2020","unstructured":"Nick Craswell, Bhaskar Mitra, Emine Yilmaz, Daniel Campos, and Ellen\u00a0M. Voorhees. 2020. Overview of the TREC 2019 deep learning track. CoRR abs\/2003.07820 (2020). arXiv:2003.07820https:\/\/arxiv.org\/abs\/2003.07820"},{"key":"e_1_3_2_1_8_1","volume-title":"CoRR abs\/1910.10687","author":"Dai Zhuyun","year":"2019","unstructured":"Zhuyun Dai and Jamie Callan. 2019. Context-Aware Sentence\/Passage Term Importance Estimation For First Stage Retrieval. CoRR abs\/1910.10687 (2019). arXiv:1910.10687http:\/\/arxiv.org\/abs\/1910.10687"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3077136.3080832"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_11_1","volume-title":"SPLADE v2: Sparse Lexical and Expansion Model for Information Retrieval. CoRR abs\/2109.10086","author":"Formal Thibault","year":"2021","unstructured":"Thibault Formal, Carlos Lassance, Benjamin Piwowarski, and St\u00e9phane Clinchant. 2021. SPLADE v2: Sparse Lexical and Expansion Model for Information Retrieval. CoRR abs\/2109.10086 (2021). arXiv:2109.10086https:\/\/arxiv.org\/abs\/2109.10086"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463098"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.75"},{"key":"e_1_3_2_1_14_1","volume-title":"Unsupervised Corpus Aware Language Model Pre-training for Dense Passage Retrieval. CoRR abs\/2108.05540","author":"Gao Luyu","year":"2021","unstructured":"Luyu Gao and Jamie Callan. 2021. Unsupervised Corpus Aware Language Model Pre-training for Dense Passage Retrieval. CoRR abs\/2108.05540 (2021). arXiv:2108.05540https:\/\/arxiv.org\/abs\/2108.05540"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.241"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.552"},{"key":"e_1_3_2_1_17_1","volume-title":"Improving Efficient Neural Ranking Models with Cross-Architecture Knowledge Distillation. CoRR abs\/2010.02666","author":"Hofst\u00e4tter Sebastian","year":"2020","unstructured":"Sebastian Hofst\u00e4tter, Sophia Althammer, Michael Schr\u00f6der, Mete Sertkan, and Allan Hanbury. 2020. Improving Efficient Neural Ranking Models with Cross-Architecture Knowledge Distillation. CoRR abs\/2010.02666 (2020). arXiv:2010.02666https:\/\/arxiv.org\/abs\/2010.02666"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462891"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403305"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2019.2921572"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401075"},{"key":"e_1_3_2_1_23_1","volume-title":"Leveraging Semantic and Lexical Matching to Improve the Recall of Document Retrieval Systems: A Hybrid Approach. CoRR abs\/2010.01195","author":"Kuzi Saar","year":"2020","unstructured":"Saar Kuzi, Mingyang Zhang, Cheng Li, Michael Bendersky, and Marc Najork. 2020. Leveraging Semantic and Lexical Matching to Improve the Recall of Document Retrieval Systems: A Hybrid Approach. CoRR abs\/2010.01195 (2020). arXiv:2010.01195https:\/\/arxiv.org\/abs\/2010.01195"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_25_1","volume-title":"Pseudo Relevance Feedback with Deep Language Models and Dense Retrievers: Successes and Pitfalls. CoRR abs\/2108.11044","author":"Li Hang","year":"2021","unstructured":"Hang Li, Ahmed Mourad, Shengyao Zhuang, Bevan Koopman, and Guido Zuccon. 2021. Pseudo Relevance Feedback with Deep Language Models and Dense Retrievers: Successes and Pitfalls. CoRR abs\/2108.11044 (2021). arXiv:2108.11044https:\/\/arxiv.org\/abs\/2108.11044"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531884"},{"key":"e_1_3_2_1_27_1","volume-title":"COIL, and a Conceptual Framework for Information Retrieval Techniques. CoRR abs\/2106.14807","author":"Lin Jimmy","year":"2021","unstructured":"Jimmy Lin and Xueguang Ma. 2021. A Few Brief Notes on DeepImpact, COIL, and a Conceptual Framework for Information Retrieval Techniques. CoRR abs\/2106.14807 (2021). arXiv:2106.14807https:\/\/arxiv.org\/abs\/2106.14807"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2208.00511"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.repl4nlp-1.17"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2209.13335"},{"key":"e_1_3_2_1_31_1","volume-title":"RoBERTa: A Robustly Optimized BERT Pretraining Approach. CoRR abs\/1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. RoBERTa: A Robustly Optimized BERT Pretraining Approach. CoRR abs\/1907.11692 (2019). arXiv:1907.11692http:\/\/arxiv.org\/abs\/1907.11692"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00369"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.eacl-main.92"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463030"},{"key":"e_1_3_2_1_35_1","unstructured":"Tri Nguyen Mir Rosenberg Xia Song Jianfeng Gao Saurabh Tiwary Rangan Majumder and Li Deng. 2016. MS MARCO: A Human Generated MAchine Reading COmprehension Dataset. In Proceedings of the Workshop on Cognitive Computation: Integrating neural and symbolic approaches 2016 co-located with the 30th Annual Conference on Neural Information Processing Systems (NIPS 2016) Barcelona Spain December 9 2016(CEUR Workshop Proceedings Vol.\u00a01773) Tarek\u00a0Richard Besold Antoine Bordes Artur\u00a0S. d\u2019Avila Garcez and Greg Wayne (Eds.). CEUR-WS.org. http:\/\/ceur-ws.org\/Vol-1773\/CoCoNIPS_2016_paper9.pdf"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_37_1","unstructured":"Alec Radford Karthik Narasimhan Tim Salimans Ilya Sutskever 2018. Improving language understanding by generative pre-training. (2018)."},{"key":"e_1_3_2_1_38_1","volume-title":"RankDistil: Knowledge Distillation for Ranking. In The 24th International Conference on Artificial Intelligence and Statistics, AISTATS 2021","author":"Reddi J.","year":"2021","unstructured":"Sashank\u00a0J. Reddi, Rama\u00a0Kumar Pasumarthi, Aditya\u00a0Krishna Menon, Ankit\u00a0Singh Rawat, Felix\u00a0X. Yu, Seungyeon Kim, Andreas Veit, and Sanjiv Kumar. 2021. RankDistil: Knowledge Distillation for Ranking. In The 24th International Conference on Artificial Intelligence and Statistics, AISTATS 2021, April 13-15, 2021, Virtual Event(Proceedings of Machine Learning Research, Vol.\u00a0130), Arindam Banerjee and Kenji Fukumizu (Eds.). PMLR, 2368\u20132376. http:\/\/proceedings.mlr.press\/v130\/reddi21a.html"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-acl.191"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.224"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000019"},{"key":"e_1_3_2_1_42_1","volume-title":"a distilled version of BERT: smaller, faster, cheaper and lighter. CoRR abs\/1910.01108","author":"Sanh Victor","year":"2019","unstructured":"Victor Sanh, Lysandre Debut, Julien Chaumond, and Thomas Wolf. 2019. DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter. CoRR abs\/1910.01108 (2019). arXiv:1910.01108http:\/\/arxiv.org\/abs\/1910.01108"},{"key":"e_1_3_2_1_43_1","volume-title":"ColBERTv2: Effective and Efficient Retrieval via Lightweight Late Interaction. CoRR abs\/2112.01488","author":"Santhanam Keshav","year":"2021","unstructured":"Keshav Santhanam, Omar Khattab, Jon Saad-Falcon, Christopher Potts, and Matei Zaharia. 2021. ColBERTv2: Effective and Efficient Retrieval via Lightweight Late Interaction. CoRR abs\/2112.01488 (2021). arXiv:2112.01488https:\/\/arxiv.org\/abs\/2112.01488"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2205.11194"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6428"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3471158.3472233"},{"key":"e_1_3_2_1_48_1","volume-title":"Contextual mask auto-encoder for dense passage retrieval. arXiv preprint arXiv:2208.07670","author":"Wu Xing","year":"2022","unstructured":"Xing Wu, Guangyuan Ma, Meng Lin, Zijia Lin, Zhongyuan Wang, and Songlin Hu. 2022. Contextual mask auto-encoder for dense passage retrieval. arXiv preprint arXiv:2208.07670 (2022)."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2204.00185"},{"key":"e_1_3_2_1_50_1","volume-title":"Approximate Nearest Neighbor Negative Contrastive Learning for Dense Text Retrieval. In 9th International Conference on Learning Representations, ICLR 2021","author":"Xiong Lee","year":"2021","unstructured":"Lee Xiong, Chenyan Xiong, Ye Li, Kwok-Fung Tang, Jialin Liu, Paul\u00a0N. Bennett, Junaid Ahmed, and Arnold Overwijk. 2021. Approximate Nearest Neighbor Negative Contrastive Learning for Dense Text Retrieval. In 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3-7, 2021. OpenReview.net. https:\/\/openreview.net\/forum?id=zeFrfgyZln"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531791"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462880"},{"key":"e_1_3_2_1_54_1","volume-title":"Adversarial Retriever-Ranker for Dense Text Retrieval. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=MR7XubKUFB","author":"Zhang Hang","year":"2022","unstructured":"Hang Zhang, Yeyun Gong, Yelong Shen, Jiancheng Lv, Nan Duan, and Weizhu Chen. 2022. Adversarial Retriever-Ranker for Dense Text Retrieval. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=MR7XubKUFB"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3285029"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","unstructured":"Yucheng Zhou Tao Shen Xiubo Geng Chongyang Tao Can Xu Guodong Long Binxing Jiao and Daxin Jiang. 2022. Towards Robust Ranker for Text Retrieval. https:\/\/doi.org\/10.48550\/ARXIV.2206.08063","DOI":"10.48550\/ARXIV.2206.08063"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467147"}],"event":{"name":"WWW '23: The ACM Web Conference 2023","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Austin TX USA","acronym":"WWW '23"},"container-title":["Proceedings of the ACM Web Conference 2023"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583294","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3543507.3583294","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:37:22Z","timestamp":1750178242000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583294"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,30]]},"references-count":57,"alternative-id":["10.1145\/3543507.3583294","10.1145\/3543507"],"URL":"https:\/\/doi.org\/10.1145\/3543507.3583294","relation":{},"subject":[],"published":{"date-parts":[[2023,4,30]]},"assertion":[{"value":"2023-04-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}