{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T01:55:19Z","timestamp":1775181319715,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":64,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,12,8]],"date-time":"2024-12-08T00:00:00Z","timestamp":1733616000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"NWO","award":["VI.Vidi.223.166"],"award-info":[{"award-number":["VI.Vidi.223.166"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,12,8]]},"DOI":"10.1145\/3673791.3698441","type":"proceedings-article","created":{"date-parts":[[2024,12,8]],"date-time":"2024-12-08T06:24:16Z","timestamp":1733639056000},"page":"303-306","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["Neural Lexical Search with Learned Sparse Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5970-880X","authenticated-orcid":false,"given":"Andrew","family":"Yates","sequence":"first","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7754-6656","authenticated-orcid":false,"given":"Carlos","family":"Lassance","sequence":"additional","affiliation":[{"name":"Cohere, Grenoble, France"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8914-2659","authenticated-orcid":false,"given":"Sean","family":"MacAvaney","sequence":"additional","affiliation":[{"name":"University of Glasgow, Glasgow, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0607-0723","authenticated-orcid":false,"given":"Thong","family":"Nguyen","sequence":"additional","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-9558-5548","authenticated-orcid":false,"given":"Yibin","family":"Lei","sequence":"additional","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,12,8]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"MS MARCO: A Human Generated MAchine Reading COmprehension Dataset. arXiv preprint arXiv:1611.09268","author":"Bajaj Payal","year":"2018","unstructured":"Payal Bajaj, Daniel Campos, Nick Craswell, Li Deng, Jianfeng Gao, Xiaodong Liu, Rangan Majumder, Andrew McNamara, Bhaskar Mitra, Tri Nguyen, Mir Rosenberg, Xia Song, Alina Stoica, Saurabh Tiwary, and Tong Wang. 2018. MS MARCO: A Human Generated MAchine Reading COmprehension Dataset. arXiv preprint arXiv:1611.09268 (2018)."},{"key":"e_1_3_2_1_2_1","first-page":"1","article-title":"An approximate algorithm for maximum inner product search over streaming sparse vectors","volume":"42","author":"Bruch Sebastian","year":"2023","unstructured":"Sebastian Bruch, Franco Maria Nardini, Amir Ingber, and Edo Liberty. 2023. An approximate algorithm for maximum inner product search over streaming sparse vectors. ACM Transactions on Information Systems, Vol. 42, 2 (2023), 1--43.","journal-title":"ACM Transactions on Information Systems"},{"key":"e_1_3_2_1_3_1","volume-title":"Amir Ingber, and Edo Liberty.","author":"Bruch Sebastian","year":"2024","unstructured":"Sebastian Bruch, Franco Maria Nardini, Amir Ingber, and Edo Liberty. 2024. Bridging dense and sparse maximum inner product search. ACM Transactions on Information Systems (2024)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657769"},{"key":"e_1_3_2_1_5_1","volume-title":"Cosimo Rulli, and Rossano Venturini.","author":"Bruch Sebastian","year":"2024","unstructured":"Sebastian Bruch, Franco Maria Nardini, Cosimo Rulli, and Rossano Venturini. 2024. Pairing Clustered Inverted Indexes with kNN Graphs for Fast Approximate Retrieval over Learned Sparse Representations. arXiv preprint arXiv:2408.04443 (2024)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.932"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-emnlp.19"},{"key":"e_1_3_2_1_8_1","volume-title":"Overview of the TREC 2022 Deep Learning Track. In TREC.","author":"Craswell Nick","year":"2022","unstructured":"Nick Craswell, Bhaskar Mitra, Emine Yilmaz, Daniel Campos, Jimmy Lin, Ellen M. Voorhees, and Ian Soboroff. 2022. Overview of the TREC 2022 Deep Learning Track. In TREC."},{"key":"e_1_3_2_1_9_1","volume-title":"Overview of the TREC 2019 deep learning track. arXiv preprint arXiv:2003","author":"Craswell Nick","year":"2020","unstructured":"Nick Craswell, Bhaskar Mitra, Emine Yilmaz, Daniel Campos, and Ellen M. Voorhees. 2020. Overview of the TREC 2019 deep learning track. arXiv preprint arXiv:2003.07820 (2020). https:\/\/arxiv.org\/abs\/2003.07820"},{"key":"e_1_3_2_1_10_1","volume-title":"Overview of the TREC 2020 deep learning track. arXiv preprint arXiv:2102","author":"Craswell Nick","year":"2021","unstructured":"Nick Craswell, Bhaskar Mitra, Emine Yilmaz, Daniel Campos, and Ellen M. Voorhees. 2021. Overview of the TREC 2020 deep learning track. arXiv preprint arXiv:2102.07662 (2021). https:\/\/arxiv.org\/abs\/2102.07662"},{"key":"e_1_3_2_1_11_1","volume-title":"Overview of the TREC 2023 Deep Learning Track. In TREC.","author":"Craswell Nick","year":"2023","unstructured":"Nick Craswell, Bhaskar Mitra, Emine Yilmaz, Hossein A. Rahmani, Daniel Campos, Jimmy Lin, Ellen M. Voorhees, and Ian Soboroff. 2023. Overview of the TREC 2023 Deep Learning Track. In TREC."},{"key":"e_1_3_2_1_12_1","volume-title":"Overview of the TREC 2021 Deep Learning Track. In TREC.","author":"Nick","year":"2021","unstructured":"Nick Craswell1, Bhaskar Mitra, Emine Yilmaz, Daniel Campos, and Jimmy Lin. 2021. Overview of the TREC 2021 Deep Learning Track. In TREC."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/299917.299920"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1002\/asi.4630320609"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1108\/eb026683"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380258"},{"key":"e_1_3_2_1_17_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2433396.2433412"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/2009916.2010048"},{"key":"e_1_3_2_1_20_1","volume-title":"Improving retrieval of short texts through document expansion (SIGIR '12)","author":"Efron Miles","unstructured":"Miles Efron, Peter Organisciak, and Katrina Fenlon. 2012. Improving retrieval of short texts through document expansion (SIGIR '12). Association for Computing Machinery, 911--920."},{"key":"e_1_3_2_1_21_1","volume-title":"SPLADE v2: Sparse Lexical and Expansion Model for Information Retrieval. arXiv preprint arXiv:2109.10086","author":"Formal Thibault","year":"2021","unstructured":"Thibault Formal, Carlos Lassance, Benjamin Piwowarski, and St\u00e9phane Clinchant. 2021. SPLADE v2: Sparse Lexical and Expansion Model for Information Retrieval. arXiv preprint arXiv:2109.10086 (2021)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463098"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/32206.32212"},{"key":"e_1_3_2_1_24_1","unstructured":"Zhichao Geng Xinyuan Lu Dagney Braun Charlie Yang and Fanit Kolchina. 2023. Improving document retrieval with sparse semantic encoders. https:\/\/opensearch.org\/blog\/improving-document-retrieval-with-sparse-semantic-encoders\/"},{"key":"e_1_3_2_1_25_1","volume-title":"MultiContrievers: Analysis of Dense Retrieval Representations. arXiv preprint arXiv:2402.15925","author":"Goldfarb-Tarrant Seraphina","year":"2024","unstructured":"Seraphina Goldfarb-Tarrant, Pedro Rodriguez, Jane Dwivedi-Yu, and Patrick Lewis. 2024. MultiContrievers: Analysis of Dense Retrieval Representations. arXiv preprint arXiv:2402.15925 (2024)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/582415.582418"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531833"},{"key":"e_1_3_2_1_29_1","volume-title":"From matching to generation: A survey on generative information retrieval. arXiv preprint arXiv:2404.14851","author":"Li Xiaoxi","year":"2024","unstructured":"Xiaoxi Li, Jiajie Jin, Yujia Zhou, Yuyao Zhang, Peitian Zhang, Yutao Zhu, and Zhicheng Dou. 2024. From matching to generation: A survey on generative information retrieval. arXiv preprint arXiv:2404.14851 (2024)."},{"key":"e_1_3_2_1_30_1","volume-title":"COIL, and a Conceptual Framework for Information Retrieval Techniques. arXiv preprint arXiv:2106.14807","author":"Lin Jimmy","year":"2021","unstructured":"Jimmy Lin and Xueguang Ma. 2021. A Few Brief Notes on DeepImpact, COIL, and a Conceptual Framework for Information Retrieval Techniques. arXiv preprint arXiv:2106.14807 (2021)."},{"key":"e_1_3_2_1_31_1","volume-title":"Pretrained Transformers for Text Ranking: BERT and Beyond. ArXiv","author":"Lin Jimmy","year":"2020","unstructured":"Jimmy Lin, Rodrigo Nogueira, and Andrew Yates. 2020. Pretrained Transformers for Text Ranking: BERT and Beyond. ArXiv, Vol. abs\/2010.06467 (2020). https:\/\/arxiv.org\/abs\/2010.06467"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/2808194.2809477"},{"key":"e_1_3_2_1_33_1","unstructured":"Sheng-Chieh Lin and Jimmy Lin. 2023. A Dense Representation Framework for Lexical and Semantic Matching. ACM Trans. Inf. Syst. (2023)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01029"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401262"},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of the Third International Conference on Design of Experimental Search & Information REtrieval Systems","volume":"34","author":"Mackenzie Joel","year":"2022","unstructured":"Joel Mackenzie, Matthias Petri, and Luke Gallagher. 2022. IOQP: A simple Impact-Ordered Query Processor written in Rust. In Proceedings of the Third International Conference on Design of Experimental Search & Information REtrieval Systems, San Jose, CA, USA, August 30--31, 2022 (CEUR Workshop Proceedings, Vol. 3480), Omar Alonso, Ricardo Baeza-Yates, Tracy Holloway King, and Gianmaria Silvello (Eds.). CEUR-WS.org, 22--34. https:\/\/ceur-ws.org\/Vol-3480\/paper-03.pdf"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1016\/J.IS.2013.10.006"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463030"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3077136.3080780"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-15712-8_52"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657906"},{"key":"e_1_3_2_1_42_1","volume-title":"Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval","author":"Nair Suraj","unstructured":"Suraj Nair, Eugene Yang, Dawn Lawrie, James Mayfield, and Douglas W. Oard. 2023. BLADE: Combining Vocabulary Pruning and Intermediate Pretraining for Scaleable Neural CLIR. In Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval (Taipei, Taiwan) (SIGIR '23). Association for Computing Machinery, 1219--1229."},{"key":"e_1_3_2_1_43_1","volume-title":"Biennial Conference on Design of Experimental Search & Information Retrieval Systems.","author":"Nair Suraj","unstructured":"Suraj Nair, Eugene Yang, Dawn J Lawrie, James Mayfield, and Douglas W. Oard. 2022. Learning a Sparse Representation Model for Neural CLIR. In Biennial Conference on Design of Experimental Search & Information Retrieval Systems."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-56060-6_29"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-28241-6_7"},{"key":"e_1_3_2_1_46_1","volume-title":"Document Expansion by Query Prediction. arXiv preprint arXiv:1904.08375","author":"Nogueira Rodrigo","year":"2019","unstructured":"Rodrigo Nogueira, Wei Yang, Jimmy Lin, and Kyunghyun Cho. 2019. Document Expansion by Query Prediction. arXiv preprint arXiv:1904.08375 (2019)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.83"},{"key":"e_1_3_2_1_48_1","first-page":"109","article-title":"Okapi at TREC-3","volume":"109","author":"Robertson Stephen E","year":"1995","unstructured":"Stephen E Robertson, Steve Walker, Susan Jones, Micheline M Hancock-Beaulieu, Mike Gatford, et al. 1995. Okapi at TREC-3. Nist Special Publication Sp, Vol. 109 (1995), 109.","journal-title":"Nist Special Publication Sp"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1016\/0306-4573(88)90021-0"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/361219.361220"},{"key":"e_1_3_2_1_51_1","volume-title":"Faster, Cheaper and Lighter. arXiv preprint arXiv:1910.01108","author":"Sanh V","year":"2019","unstructured":"V Sanh. 2019. DistilBERT, A Distilled Version of BERT: Smaller, Faster, Cheaper and Lighter. arXiv preprint arXiv:1910.01108 (2019)."},{"key":"e_1_3_2_1_52_1","volume-title":"The Notion of Relevance in Information Science","author":"Saracevic Tefko","unstructured":"Tefko Saracevic. 2016. The Notion of Relevance in Information Science. In Morgan & Claypool Publishers."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599927"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.923"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3624918.3629547"},{"key":"e_1_3_2_1_56_1","first-page":"21831","article-title":"Transformer memory as a differentiable search index","volume":"35","author":"Tay Yi","year":"2022","unstructured":"Yi Tay, Vinh Tran, Mostafa Dehghani, Jianmo Ni, Dara Bahri, Harsh Mehta, Zhen Qin, Kai Hui, Zhe Zhao, Jai Gupta, et al. 2022. Transformer memory as a differentiable search index. Advances in Neural Information Processing Systems, Vol. 35 (2022), 21831--21843.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_57_1","volume-title":"Beir: A heterogenous benchmark for zero-shot evaluation of information retrieval models. arXiv preprint arXiv:2104.08663","author":"Thakur Nandan","year":"2021","unstructured":"Nandan Thakur, Nils Reimers, Andreas R\u00fcckl\u00e9, Abhishek Srivastava, and Iryna Gurevych. 2021. Beir: A heterogenous benchmark for zero-shot evaluation of information retrieval models. arXiv preprint arXiv:2104.08663 (2021)."},{"key":"e_1_3_2_1_58_1","volume-title":"Proceedings of the Eighth Text REtrieval Conference (TREC-8).","author":"Ellen","unstructured":"Ellen M. Voorhees and Dawn M. Tice. 1999. The TREC-8 Question Answering Track Report. In Proceedings of the Eighth Text REtrieval Conference (TREC-8)."},{"key":"e_1_3_2_1_59_1","volume-title":"Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)","author":"Yamada Ikuya","unstructured":"Ikuya Yamada, Akari Asai, and Hannaneh Hajishirzi. 2021. Efficient Passage Retrieval with Hashing for Open-domain Question Answering. In Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 2: Short Papers), Chengqing Zong, Fei Xia, Wenjie Li, and Roberto Navigli (Eds.). Association for Computational Linguistics, Online, 979--986."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657972"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3269206.3271800"},{"key":"e_1_3_2_1_62_1","volume-title":"Retrieval-based Disentangled Representation Learning with Natural Language Supervision. In The Twelfth International Conference on Learning Representations.","author":"Zhou Jiawei","year":"2024","unstructured":"Jiawei Zhou, Xiaoguang Li, Lifeng Shang, Xin Jiang, Qun Liu, and Lei Chen. 2024. Retrieval-based Disentangled Representation Learning with Natural Language Supervision. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_1_63_1","volume-title":"PromptReps: Prompting Large Language Models to Generate Dense and Sparse Representations for Zero-Shot Document Retrieval. arXiv preprint arXiv:2404.18424","author":"Zhuang Shengyao","year":"2024","unstructured":"Shengyao Zhuang, Xueguang Ma, Bevan Koopman, Jimmy Lin, and Guido Zuccon. 2024. PromptReps: Prompting Large Language Models to Generate Dense and Sparse Representations for Zero-Shot Document Retrieval. arXiv preprint arXiv:2404.18424 (2024)."},{"key":"e_1_3_2_1_64_1","volume-title":"Fast Passage Re-ranking with Contextualized Exact Term Matching and Efficient Passage Expansion. arXiv preprint arXiv:2108.08513","author":"Zhuang Shengyao","year":"2021","unstructured":"Shengyao Zhuang and Guido Zuccon. 2021. Fast Passage Re-ranking with Contextualized Exact Term Matching and Efficient Passage Expansion. arXiv preprint arXiv:2108.08513 (2021)."}],"event":{"name":"SIGIR-AP 2024: Annual International ACM SIGIR Conference on Research and Development in Information Retrieval in the Asia Pacific Region","location":"Tokyo Japan","acronym":"SIGIR-AP 2024","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 2024 Annual International ACM SIGIR Conference on Research and Development in Information Retrieval in the Asia Pacific Region"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3673791.3698441","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3673791.3698441","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T16:24:24Z","timestamp":1755879864000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3673791.3698441"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,8]]},"references-count":64,"alternative-id":["10.1145\/3673791.3698441","10.1145\/3673791"],"URL":"https:\/\/doi.org\/10.1145\/3673791.3698441","relation":{},"subject":[],"published":{"date-parts":[[2024,12,8]]},"assertion":[{"value":"2024-12-08","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}