{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,23]],"date-time":"2026-03-23T18:43:34Z","timestamp":1774291414651,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":29,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,7,18]],"date-time":"2023-07-18T00:00:00Z","timestamp":1689638400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,7,19]]},"DOI":"10.1145\/3539618.3592080","type":"proceedings-article","created":{"date-parts":[[2023,7,19]],"date-time":"2023-07-19T00:22:23Z","timestamp":1689726143000},"page":"2476-2480","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Unsupervised Dense Retrieval Training with Web Anchors"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1668-0376","authenticated-orcid":false,"given":"Yiqing","family":"Xie","sequence":"first","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8893-366X","authenticated-orcid":false,"given":"Xiao","family":"Liu","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, WA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0392-4183","authenticated-orcid":false,"given":"Chenyan","family":"Xiong","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, WA, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,7,18]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"MS MARCO: A Human Generated MAchine Reading COmprehension Dataset. https:\/\/arxiv.org\/abs\/1611.09268","author":"Bajaj Payal","year":"2016","unstructured":"Payal Bajaj, Daniel Campos, Nick Craswell, Li Deng, Jianfeng Gao, Xiaodong Liu, Rangan Majumder, Andrew McNamara, Bhaskar Mitra, Tri Nguyen, Mir Rosenberg, Xia Song, Alina Stoica, Saurabh Tiwary, and Tong Wang. 2016. MS MARCO: A Human Generated MAchine Reading COmprehension Dataset. https:\/\/arxiv.org\/abs\/1611.09268"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531863"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-emnlp.19"},{"key":"e_1_3_2_1_4_1","volume-title":"Davison","author":"Dai Na","year":"2010","unstructured":"Na Dai and Brian D. Davison. 2010. Mining Anchor Text Trends for Retrieval. In Advances in Information Retrieval, Cathal Gurrin, Yulan He, Gabriella Kazai, Udo Kruschwitz, Suzanne Little, Thomas Roelleke, Stefan R\u00fcger, and Keith van Rijsbergen (Eds.). Springer Berlin Heidelberg, Berlin, Heidelberg, 127--139."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1002\/(SICI)1097-4571(199009)41:6<391::AID-ASI1>3.0.CO;2-9"},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","volume":"1","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers). Association for Computational Linguistics. https:\/\/aclanthology.org\/N19--1423"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/1571941.1571982"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/860435.860550"},{"key":"e_1_3_2_1_9_1","volume-title":"Proceedings of the 2008 International Conference on Data Mining, DMIN 2008, 667--673","author":"Feng Junlan","year":"2008","unstructured":"Junlan Feng, Valerie Torres, Daniel Sheleheda, and Cynthia Cama. 2008. WebTrack: Mining and Tracking Content and Structure of Websites. Proceedings of the 2008 International Conference on Data Mining, DMIN 2008, 667--673."},{"key":"e_1_3_2_1_10_1","volume-title":"Susan Dumais, Jonathan Grudin, and Steven Poltrock.","author":"Fidel Raya","year":"2000","unstructured":"Raya Fidel, Harry Bruce, Annelise Mark Pejtersen, Susan Dumais, Jonathan Grudin, and Steven Poltrock. 2000. Collaborative Information Retrieval (CIR). The New Review of Information Behaviour Research, Vol. 1 (January 2000), 235--247. https:\/\/www.microsoft.com\/en-us\/research\/publication\/collaborative-information-retrieval-cir\/"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.203"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/2505515.2505665"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","unstructured":"Gautier Izacard Mathilde Caron Lucas Hosseini Sebastian Riedel Piotr Bojanowski Armand Joulin and Edouard Grave. 2021. Unsupervised Dense Information Retrieval with Contrastive Learning. https:\/\/doi.org\/10.48550\/ARXIV.2112.09118","DOI":"10.48550\/ARXIV.2112.09118"},{"key":"e_1_3_2_1_14_1","volume-title":"Unsupervised Dense Information Retrieval with Contrastive Learning. Transactions on Machine Learning Research","author":"Izacard Gautier","year":"2022","unstructured":"Gautier Izacard, Mathilde Caron, Lucas Hosseini, Sebastian Riedel, Piotr Bojanowski, Armand Joulin, and Edouard Grave. 2022. Unsupervised Dense Information Retrieval with Contrastive Learning. Transactions on Machine Learning Research (2022). https:\/\/openreview.net\/forum?id=jKN1pXi7b0"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1612"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3482286"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","unstructured":"Rui Meng Ye Liu Semih Yavuz Divyansh Agarwal Lifu Tu Ning Yu Jianguo Zhang Meghana Bhat and Yingbo Zhou. 2022. Unsupervised Dense Retrieval Deserves Better Positive Pairs: Scalable Augmentation with Query Extraction and Generation. https:\/\/doi.org\/10.48550\/ARXIV.2212.08841","DOI":"10.48550\/ARXIV.2212.08841"},{"key":"e_1_3_2_1_19_1","volume-title":"Conference on Neural Information Processing Systems.","author":"Meng Yu","year":"2021","unstructured":"Yu Meng, Chenyan Xiong, Payal Bajaj, Saurabh Tiwary, Paul Bennett, Jiawei Han, and Xia Song. 2021. COCO-LM: Correcting and contrasting text sequences for language model pretraining. In Conference on Neural Information Processing Systems."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","unstructured":"Arnold Overwijk Chenyan Xiong Xiao Liu Cameron VandenBerg and Jamie Callan. 2022. ClueWeb22: 10 Billion Web Documents with Visual and Semantic Information. https:\/\/doi.org\/10.48550\/ARXIV.2211.15848","DOI":"10.48550\/ARXIV.2211.15848"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1910.10683"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","unstructured":"Ori Ram Gal Shachaf Omer Levy Jonathan Berant and Amir Globerson. 2021. Learning to Retrieve Passages without Supervision. https:\/\/doi.org\/10.48550\/ARXIV.2112.07708","DOI":"10.48550\/ARXIV.2112.07708"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","unstructured":"Stephen Robertson and Hugo Zaragoza. 2009. The Probabilistic Relevance Framework: BM25 and Beyond. (2009). https:\/\/doi.org\/10.1561\/1500000019","DOI":"10.1561\/1500000019"},{"key":"e_1_3_2_1_24_1","unstructured":"Nandan Thakur Nils Reimers Andreas R\u00fcckl\u00e9 Abhishek Srivastava and Iryna Gurevych. 2021. BEIR: A Heterogeneous Benchmark for Zero-shot Evaluation of Information Retrieval Models. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2). https:\/\/openreview.net\/forum?id=wCu6T5xFjeJ"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Henry Tsai Jason Riesa Melvin Johnson Naveen Arivazhagan Xin Li and Amelia Archer. 2019. Small and Practical BERT Models for Sequence Labeling. https:\/\/arxiv.org\/abs\/1909.00100","DOI":"10.18653\/v1\/D19-1374"},{"key":"e_1_3_2_1_26_1","volume-title":"Approximate Nearest Neighbor Negative Contrastive Learning for Dense Text Retrieval. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=zeFrfgyZln","author":"Xiong Lee","year":"2021","unstructured":"Lee Xiong, Chenyan Xiong, Ye Li, Kwok-Fung Tang, Jialin Liu, Paul N. Bennett, Junaid Ahmed, and Arnold Overwijk. 2021. Approximate Nearest Neighbor Negative Contrastive Learning for Dense Text Retrieval. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=zeFrfgyZln"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.551"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/1835449.1835521"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380131"}],"event":{"name":"SIGIR '23: The 46th International ACM SIGIR Conference on Research and Development in Information Retrieval","location":"Taipei Taiwan","acronym":"SIGIR '23","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3539618.3592080","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3539618.3592080","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:38:03Z","timestamp":1750178283000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3539618.3592080"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,7,18]]},"references-count":29,"alternative-id":["10.1145\/3539618.3592080","10.1145\/3539618"],"URL":"https:\/\/doi.org\/10.1145\/3539618.3592080","relation":{},"subject":[],"published":{"date-parts":[[2023,7,18]]},"assertion":[{"value":"2023-07-18","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}