{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,23]],"date-time":"2026-03-23T18:45:05Z","timestamp":1774291505787,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":88,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,10]],"date-time":"2024-07-10T00:00:00Z","timestamp":1720569600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,10]]},"DOI":"10.1145\/3626772.3657850","type":"proceedings-article","created":{"date-parts":[[2024,7,11]],"date-time":"2024-07-11T12:40:05Z","timestamp":1720701605000},"page":"186-196","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Revisiting Document Expansion and Filtering for Effective First-Stage Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9463-595X","authenticated-orcid":false,"given":"Watheq","family":"Mansour","sequence":"first","affiliation":[{"name":"The University of Queensland, Brisbane, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6711-0955","authenticated-orcid":false,"given":"Shengyao","family":"Zhuang","sequence":"additional","affiliation":[{"name":"CSIRO, Brisbane, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0271-5563","authenticated-orcid":false,"given":"Guido","family":"Zuccon","sequence":"additional","affiliation":[{"name":"The University of Queensland, Brisbane, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7992-4633","authenticated-orcid":false,"given":"Joel","family":"Mackenzie","sequence":"additional","affiliation":[{"name":"The University of Queensland, Brisbane, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,7,11]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.21236\/ADA460118"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591960"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10791-022-09411-0"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1645953.1646031"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2019.05.009"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/1277741.1277820"},{"key":"e_1_3_2_1_7_1","volume-title":"MS MARCO: A Human Generated MAchine Reading COmprehension Dataset. arXiv:1611.09268v3","author":"Bajaj Payal","year":"2018","unstructured":"Payal Bajaj, Daniel Campos, Nick Craswell, Li Deng, Jianfeng Gao, Xiaodong Liu, Rangan Majumder, Andrew McNamara, Bhaskar Mitra, Tri Nguyen, Mir Rosenberg, Xia Song, Alina Stoica, Saurabh Tiwary, and Tong Wang. 2018. MS MARCO: A Human Generated MAchine Reading COmprehension Dataset. arXiv:1611.09268v3 (2018)."},{"key":"e_1_3_2_1_8_1","volume-title":"Proc. ADC. 69--76","author":"Billerbeck Bodo","year":"2004","unstructured":"Bodo Billerbeck and Justin Zobel. 2004. Questioning query expansion: An examination of behaviour and parameters. In Proc. ADC. 69--76."},{"key":"e_1_3_2_1_9_1","volume-title":"Proc. CLEF Assoc.","author":"Bondarenko Alexander","year":"2020","unstructured":"Alexander Bondarenko, Maik Fr\u00f6be, Meriem Beloucif, Lukas Gienapp, Yamen Ajjour, Alexander Panchenko, Chris Biemann, Benno Stein, Henning Wachsmuth, Martin Potthast, and Matthias Hagen. 2020. Overview of Touch\u00e9 2020: Argument Retrieval. In Proc. CLEF Assoc."},{"key":"e_1_3_2_1_10_1","volume-title":"Manning","author":"Clark Kevin","year":"2020","unstructured":"Kevin Clark, Minh-Thang Luong, Quoc V. Le, and Christopher D. Manning. 2020. ELECTRA: Pre-training text encoders as discriminators rather than generators. In Proc. ICLR."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/1645953.1646059"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/383952.383999"},{"key":"e_1_3_2_1_13_1","volume-title":"Overview of the TREC 2020 deep learning track. In Proc. TREC.","author":"Craswell Nick","year":"2021","unstructured":"Nick Craswell, Bhaskar Mitra, Emine Yilmaz, and Daniel Campos. 2021. Overview of the TREC 2020 deep learning track. In Proc. TREC."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462804"},{"key":"e_1_3_2_1_15_1","volume-title":"Overview of the TREC 2019 deep learning track.","author":"Craswell Nick","year":"2020","unstructured":"Nick Craswell, Bhaskar Mitra, Emine Yilmaz, Daniel Campos, and Ellen M. Voorhees. 2020. Overview of the TREC 2019 deep learning track. (2020)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/1277741.1277784"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380258"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939862"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/2348283.2348405"},{"key":"e_1_3_2_1_20_1","volume-title":"McCurley","author":"Eiron Nadav","year":"2003","unstructured":"Nadav Eiron and Kevin S. McCurley. 2003. Analysis of anchor text for web search. In Proc. SIGIR. 459--460."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1082"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463098"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/32206.32212"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599782"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.241"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-72240-1_26"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-28238-6_31"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3077136.3080751"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-00958-7_57"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403305"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/2911451.2911531"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-45442-5_4"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401075"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3592071"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1002\/spe.2203"},{"key":"e_1_3_2_1_36_1","volume-title":"Efficient Neural Ranking using Forward Indexes and Lightweight Encoders. ACM Transactions on Information Systems","author":"Leonhardt Jurek","year":"2023","unstructured":"Jurek Leonhardt, Henrik M\u00fcller, Koustav Rudra, Megha Khosla, Abhijit Anand, and Avishek Anand. 2023. Efficient Neural Ranking using Forward Indexes and Lightweight Encoders. ACM Transactions on Information Systems (2023). Just Accepted."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531884"},{"key":"e_1_3_2_1_38_1","volume-title":"COIL, and a Conceptual Framework for Information Retrieval Techniques. arXiv:2106.14807","author":"Lin Jimmy","year":"2021","unstructured":"Jimmy Lin and Xueguang Ma. 2021. A Few Brief Notes on DeepImpact, COIL, and a Conceptual Framework for Information Retrieval Techniques. arXiv:2106.14807 (2021)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401404"},{"key":"e_1_3_2_1_40_1","volume-title":"Pretrained Transformers for Text Ranking: BERT and Beyond","author":"Lin Jimmy","unstructured":"Jimmy Lin, Rodrigo Nogueira, and Andrew Yates. 2021. Pretrained Transformers for Text Ranking: BERT and Beyond. Morgan & Claypool Publishers."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331339"},{"key":"e_1_3_2_1_42_1","volume-title":"Proc. SIGIR. 3187--3197","author":"Ma Xueguang","year":"2022","unstructured":"Xueguang Ma, Ronak Pradeep, Rodrigo Nogueira, and Jimmy Lin. 2022. Document Expansion Baselines and Learned Sparse Lexical Representations for MSMARCO V1 and V2. In Proc. SIGIR. 3187--3197."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531656"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401262"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463254"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3409256.3409829"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3482013"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3412000"},{"key":"e_1_3_2_1_49_1","volume-title":"A Sensitivity Analysis of the MSMARCO Passage Collection. arXiv preprint arXiv:2112.03396","author":"Mackenzie Joel","year":"2021","unstructured":"Joel Mackenzie, Matthias Petri, and Alistair Moffat. 2021. A Sensitivity Analysis of the MSMARCO Passage Collection. arXiv preprint arXiv:2112.03396 (2021)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2022.3208902"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3576922"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463030"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531774"},{"key":"e_1_3_2_1_54_1","volume-title":"Proc. OSIRRC at SIGIR. 50--56","author":"Mallia Antonio","year":"2019","unstructured":"Antonio Mallia, Micha\u0140 Siedlaczek, Joel Mackenzie, and Torsten Suel. 2019. PISA: Performant Indexes and Search for Academia. In Proc. OSIRRC at SIGIR. 50--56."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-15712-8_23"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-28241-6_7"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.63"},{"key":"e_1_3_2_1_58_1","unstructured":"Rodrigo Nogueira and Jimmy Lin. 2019. From doc2query to docTTTTTquery. (2019). https:\/\/cs.uwaterloo.ca\/~jimmylin\/publications\/Nogueira_Lin_2019_ docTTTTTquery-latest.pdf."},{"key":"e_1_3_2_1_59_1","volume-title":"Document expansion by query prediction. arXiv preprint arXiv:1904.08375","author":"Nogueira Rodrigo","year":"2019","unstructured":"Rodrigo Nogueira, Wei Yang, Jimmy Lin, and Kyunghyun Cho. 2019. Document expansion by query prediction. arXiv preprint arXiv:1904.08375 (2019)."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/1871437.1871571"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-99736-6_44"},{"key":"e_1_3_2_1_62_1","volume-title":"Proc. NeurIPS.","author":"Rafailov Rafael","year":"2023","unstructured":"Rafael Rafailov, Archit Sharma, Eric Mitchell, Christopher D Manning, Stefano Ermon, and Chelsea Finn. 2023. Direct Preference Optimization: Your Language Model is Secretly a Reward Model. In Proc. NeurIPS."},{"key":"e_1_3_2_1_63_1","first-page":"1","article-title":"Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer","volume":"21","author":"Raffel Colin","year":"2020","unstructured":"Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, and Peter J. Liu. 2020. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. J. Mach. Learn. Res. 21, 140 (2020), 1--67.","journal-title":"J. Mach. Learn. Res."},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000019"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/2911451.2911492"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531766"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1002\/asi.20011"},{"key":"e_1_3_2_1_68_1","volume-title":"Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347","author":"Schulman John","year":"2017","unstructured":"John Schulman, Filip Wolski, Prafulla Dhariwal, Alec Radford, and Oleg Klimov. 2017. Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)."},{"key":"e_1_3_2_1_69_1","volume-title":"Proc. Wkshp. on Representation Learning for NLP. 163--173","author":"Yang Sheng-Chieh Lin Jheng-Hong","year":"2021","unstructured":"Jheng-Hong Yang Sheng-Chieh Lin and Jimmy Lin. 2021. In-Batch Negatives for Knowledge Distillation with Tightly-Coupled Teachers for Dense Retrieval. In Proc. Wkshp. on Representation Learning for NLP. 163--173."},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.3115\/1220835.1220887"},{"key":"e_1_3_2_1_71_1","volume-title":"Proc. ReNeuIR at SIGIR","author":"Thakur Nandan","year":"2023","unstructured":"Nandan Thakur, Nils Reimers, and Jimmy Lin. 2023. Injecting Domain Adaptation with Learning-to-hash for Effective and Efficient Zero-shot Dense Retrieval. In Proc. ReNeuIR at SIGIR 2023."},{"key":"e_1_3_2_1_72_1","volume-title":"Proc. NeurIPS.","author":"Thakur Nandan","year":"2021","unstructured":"Nandan Thakur, Nils Reimers, Andreas R\u00fcckl\u00e9, Abhishek Srivastava, and Iryna Gurevych. 2021. BEIR: A Heterogeneous Benchmark for Zero-shot Evaluation of Information Retrieval Models. In Proc. NeurIPS."},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000057"},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1016\/0306-4573(95)00020-H"},{"key":"e_1_3_2_1_75_1","volume-title":"Proc. TREC.","author":"Voorhees Ellen M.","year":"2004","unstructured":"Ellen M. Voorhees. 2004. Overview of the TREC 2004 Robust Retrieval Track. In Proc. TREC."},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3451964.3451965","article-title":"TREC-COVID: Constructing a pandemic information retrieval test collection","volume":"54","author":"Voorhees Ellen M.","year":"2021","unstructured":"Ellen M. Voorhees, Tasmeer Alam, Steven Bedrick, Dina Demner-Fushman, William R. Hersh, Kyle Lo, Kirk Roberts, Ian Soboroff, and Lucy Lu Wang. 2021. TREC-COVID: Constructing a pandemic information retrieval test collection. SIGIR Forum 54, 1 (2021), 1--12.","journal-title":"SIGIR Forum"},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1145\/2009916.2009934"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1145\/3471158.3472233"},{"key":"e_1_3_2_1_79_1","volume-title":"Arman Cohan, and Luca Soldaini.","author":"Weller Orion","year":"2023","unstructured":"Orion Weller, Kyle Lo, David Wadden, Dawn Lawrie, Benjamin Van Durme, Arman Cohan, and Luca Soldaini. 2023. When do Generative Query and Document Expansions Fail? A Comprehensive Study Across Methods, Retrievers, and Datasets. arXiv preprint arXiv:2309.08541 (2023)."},{"key":"e_1_3_2_1_80_1","volume-title":"Proc. TREC. 663--672","author":"Westerveld Thijs","year":"2002","unstructured":"Thijs Westerveld, Wessel Kraaij, and Djoerd Hiemstra. 2002. Retrieving Web Pages using Content, Links, URLs and Anchors. In Proc. TREC. 663--672."},{"key":"e_1_3_2_1_81_1","article-title":"Anserini: Reproducible Ranking Baselines Using Lucene","volume":"10","author":"Yang Peilin","year":"2018","unstructured":"Peilin Yang, Hui Fang, and Jimmy Lin. 2018. Anserini: Reproducible Ranking Baselines Using Lucene. J. Data Inf. Qual. 10, 4 (2018).","journal-title":"J. Data Inf. Qual."},{"key":"e_1_3_2_1_82_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-48051-0_21"},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.1145\/1871437.1871474"},{"key":"e_1_3_2_1_84_1","volume-title":"A Setwise Approach for Effective and Highly Efficient Zero-shot Ranking with Large Language Models. arXiv preprint arXiv:2310.09497","author":"Zhuang Shengyao","year":"2023","unstructured":"Shengyao Zhuang, Honglei Zhuang, Bevan Koopman, and Guido Zuccon. 2023. A Setwise Approach for Effective and Highly Efficient Zero-shot Ranking with Large Language Models. arXiv preprint arXiv:2310.09497 (2023)."},{"key":"e_1_3_2_1_85_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.225"},{"key":"e_1_3_2_1_86_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462922"},{"key":"e_1_3_2_1_87_1","volume-title":"Proc. ReNeuIR at SIGIR.","author":"Zhuang Shengyao","year":"2022","unstructured":"Shengyao Zhuang and Guido Zuccon. 2022. Fast Passage Re-ranking with Contex-tualized Exact Term Matching and Efficient Passage Expansion. In Proc. ReNeuIR at SIGIR."},{"key":"e_1_3_2_1_88_1","doi-asserted-by":"publisher","DOI":"10.1145\/1132956.1132959"}],"event":{"name":"SIGIR 2024: The 47th International ACM SIGIR Conference on Research and Development in Information Retrieval","location":"Washington DC USA","acronym":"SIGIR 2024","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626772.3657850","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3626772.3657850","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T05:19:04Z","timestamp":1755839944000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626772.3657850"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,10]]},"references-count":88,"alternative-id":["10.1145\/3626772.3657850","10.1145\/3626772"],"URL":"https:\/\/doi.org\/10.1145\/3626772.3657850","relation":{},"subject":[],"published":{"date-parts":[[2024,7,10]]},"assertion":[{"value":"2024-07-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}