{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T13:06:45Z","timestamp":1765544805189,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,7,11]],"date-time":"2021-07-11T00:00:00Z","timestamp":1625961600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Beijing Academy of Artificial Intelligence","award":["BAAI2019ZD0306"],"award-info":[{"award-number":["BAAI2019ZD0306"]}]},{"name":"the Foundation and Frontier Research Key Program of Chongqing Science and Technology Commission","award":["cstc2017jcyjBX0059"],"award-info":[{"award-number":["cstc2017jcyjBX0059"]}]},{"name":"the Youth Innovation Promotion Association CAS","award":["20144310, 2016102, and 2021100"],"award-info":[{"award-number":["20144310, 2016102, and 2021100"]}]},{"name":"the Lenovo-CAS Joint Lab Youth Scientist Project"},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62006218, 61902381, 61773362, and 61872338"],"award-info":[{"award-number":["62006218, 61902381, 61773362, and 61872338"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"the K.C.Wong Education Foundation"},{"name":"the National Key RD Program of China","award":["2016QY02D0405"],"award-info":[{"award-number":["2016QY02D0405"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,7,11]]},"DOI":"10.1145\/3404835.3462869","type":"proceedings-article","created":{"date-parts":[[2021,7,12]],"date-time":"2021-07-12T02:41:54Z","timestamp":1626057714000},"page":"1513-1522","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":32,"title":["B-PROP"],"prefix":"10.1145","author":[{"given":"Xinyu","family":"Ma","sequence":"first","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiafeng","family":"Guo","sequence":"additional","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruqing","family":"Zhang","sequence":"additional","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yixing","family":"Fan","sequence":"additional","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yingyan","family":"Li","sequence":"additional","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xueqi","family":"Cheng","sequence":"additional","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2021,7,11]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"crossref","unstructured":"G. Amati and C. Rijsbergen. 2002. Probabilistic models of information retrieval based on measuring the divergence from randomness. ACM Trans. Inf. Syst. (2002) 357--389.","DOI":"10.1145\/582415.582416"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/1390334.1390517"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/1277741.1277820"},{"key":"e_1_3_2_2_4_1","volume-title":"Mann et.al","author":"Brown T.","year":"2020","unstructured":"T. Brown and B. Mann et.al. 2020. Language Models are Few-Shot Learners. ArXiv , Vol. abs\/2005.14165 (2020)."},{"key":"e_1_3_2_2_5_1","volume-title":"MS MARCO: A Human Generated MAchine Reading COmprehension Dataset. ArXiv","author":"Campos Daniel Fernando","year":"2016","unstructured":"Daniel Fernando Campos, T. Nguyen, M. Rosenberg, Xia Song, Jianfeng Gao, Saurabh Tiwary, Rangan Majumder, L. Deng, and Bhaskar Mitra. 2016. MS MARCO: A Human Generated MAchine Reading COmprehension Dataset. ArXiv , Vol. abs\/1611.09268 (2016)."},{"key":"e_1_3_2_2_6_1","volume-title":"Pre-training Tasks for Embedding-based Large-scale Retrieval. International Conference on Learning Representations","author":"Chang Wei-Cheng","year":"2019","unstructured":"Wei-Cheng Chang, F. Yu, Yin-Wen Chang, Yiming Yang, and S. Kumar. 2019. Pre-training Tasks for Embedding-based Large-scale Retrieval. International Conference on Learning Representations (2019)."},{"volume-title":"Overview of the TREC 2004 Terabyte Track. In TREC .","author":"Clarke C.","key":"e_1_3_2_2_7_1","unstructured":"C. Clarke, Nick Craswell, and I. Soboroff. 2004. Overview of the TREC 2004 Terabyte Track. In TREC ."},{"volume-title":"Overview of the TREC 2009 Web Track. In TREC .","author":"Clarke C.","key":"e_1_3_2_2_8_1","unstructured":"C. Clarke, Nick Craswell, and I. Soboroff. 2009. Overview of the TREC 2009 Web Track. In TREC ."},{"key":"e_1_3_2_2_9_1","volume-title":"Overview of the TREC 2019 deep learning track. ArXiv","volume":"2003","author":"Craswell Nick","year":"2020","unstructured":"Nick Craswell, Bhaskar Mitra, E. Yilmaz, Daniel Fernando Campos, and E. Voorhees. 2020. Overview of the TREC 2019 deep learning track. ArXiv , Vol. abs\/2003.07820 (2020)."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331303"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3159652.3159659"},{"key":"e_1_3_2_2_12_1","volume-title":"Bert: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proceedings of the 2019 Conference of the North American","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. Bert: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. The 58th Annual Meeting of the Association for Computational Linguistics, Stroudsburg, PA, USA, 4171--4186."},{"key":"e_1_3_2_2_13_1","volume-title":"Modeling Diverse Relevance Patterns in Ad-hoc Retrieval. The 41st International ACM SIGIR Conference on Research and Development in Information Retrieval","author":"Fan Y.","year":"2018","unstructured":"Y. Fan, J. Guo, Yanyan Lan, J. Xu, ChengXiang Zhai, and X. Cheng. 2018. Modeling Diverse Relevance Patterns in Ad-hoc Retrieval. The 41st International ACM SIGIR Conference on Research and Development in Information Retrieval (2018)."},{"key":"e_1_3_2_2_14_1","volume-title":"Span Selection Pre-training for Question Answering. The 58th Annual Meeting of the Association for Computational Linguistics","author":"Gla\u00df M.","year":"2020","unstructured":"M. Gla\u00df, A. Gliozzo, Rishav Chakravarti, Anthony Ferritto, Lin Pan, G P Shrivatsa Bhargav, Dinesh Garg, and A. Sil. 2020. Span Selection Pre-training for Question Answering. The 58th Annual Meeting of the Association for Computational Linguistics (2020)."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2983323.2983769"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331403"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.567"},{"key":"e_1_3_2_2_18_1","volume-title":"Kingma and Jimmy Ba","author":"Diederik","year":"2015","unstructured":"Diederik P. Kingma and Jimmy Ba. 2015. Adam: A Method for Stochastic Optimization. In 3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7--9, 2015, Conference Track Proceedings, Yoshua Bengio and Yann LeCun (Eds.)."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N16-1030"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1612"},{"key":"e_1_3_2_2_21_1","unstructured":"Jimmy Lin Rodrigo Nogueira and A. Yates. 2020. Pretrained Transformers for Text Ranking: BERT and Beyond. ArXiv Vol. abs\/2010.06467 (2020)."},{"key":"e_1_3_2_2_22_1","volume-title":"PROP: Pre-training with Representative Words Prediction for Ad-hoc Retrieval. ArXiv","author":"Ma Xinyu","year":"2020","unstructured":"Xinyu Ma, Jiafeng Guo, Ruqing Zhang, Yixing Fan, Xiang Ji, and Xueqi Cheng. 2020. PROP: Pre-training with Representative Words Prediction for Ad-hoc Retrieval. ArXiv , Vol. abs\/2010.10137 (2020)."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331317"},{"key":"e_1_3_2_2_24_1","volume-title":"Advances in Pre-Training Distributed Word Representations. ArXiv","author":"Mikolov Tomas","year":"2018","unstructured":"Tomas Mikolov, E. Grave, P. Bojanowski, Christian Puhrsch, and Armand Joulin. 2018. Advances in Pre-Training Distributed Word Representations. ArXiv , Vol. abs\/1712.09405 (2018)."},{"key":"e_1_3_2_2_25_1","volume-title":"Passage Re-ranking with BERT. ArXiv","author":"Nogueira Rodrigo","year":"2019","unstructured":"Rodrigo Nogueira and Kyunghyun Cho. 2019. Passage Re-ranking with BERT. ArXiv , Vol. abs\/1901.04085 (2019)."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.63"},{"key":"e_1_3_2_2_27_1","volume-title":"Document Expansion by Query Prediction. ArXiv","author":"Nogueira Rodrigo","year":"2019","unstructured":"Rodrigo Nogueira, Wei Yang, Jimmy Lin, and Kyunghyun Cho. 2019. Document Expansion by Query Prediction. ArXiv , Vol. abs\/1904.08375 (2019)."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3132847.3132914"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1202"},{"key":"e_1_3_2_2_30_1","unstructured":"T. Qin and T. Liu. 2013. Introducing LETOR 4.0 Datasets. ArXiv Vol. abs\/1306.2597 (2013)."},{"key":"e_1_3_2_2_31_1","article-title":"Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer","volume":"21","author":"Raffel Colin","year":"2020","unstructured":"Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, M. Matena, Yanqi Zhou, W. Li, and Peter J. Liu. 2020. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. J. Mach. Learn. Res. , Vol. 21 (2020), 140:1--140:67.","journal-title":"J. Mach. Learn. Res."},{"key":"e_1_3_2_2_32_1","unstructured":"Pranav Rajpurkar Jian Zhang Konstantin Lopyrev and Percy Liang. 2016. SQuAD: 100 000"},{"volume-title":"Machine Comprehension of Text. In Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing .","author":"Questions","key":"e_1_3_2_2_33_1","unstructured":"Questions for Machine Comprehension of Text. In Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing ."},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000019"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.5555\/3295222.3295349"},{"key":"e_1_3_2_2_36_1","volume-title":"Overview of the TREC 2004 Robust Track.","author":"Voorhees E.","year":"2004","unstructured":"E. Voorhees. 2004. Overview of the TREC 2004 Robust Track."},{"key":"e_1_3_2_2_37_1","volume-title":"Bowman","author":"Wang Alex","year":"2018","unstructured":"Alex Wang, Amanpreet Singh, Julian Michael, Felix Hill, Omer Levy, and Samuel R. Bowman. 2018. GLUE: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding. In BlackboxNLP@EMNLP ."},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3077136.3080721"},{"key":"e_1_3_2_2_39_1","volume-title":"2019 b. Simple Applications of BERT for Ad Hoc Document Retrieval. ArXiv","author":"Yang Wei","year":"2019","unstructured":"Wei Yang, Haotian Zhang, and Jimmy Lin. 2019 b. Simple Applications of BERT for Ad Hoc Document Retrieval. ArXiv , Vol. abs\/1903.10972 (2019)."},{"key":"e_1_3_2_2_40_1","unstructured":"Z. Yang Zihang Dai Yiming Yang J. Carbonell R. Salakhutdinov and Quoc V. Le. 2019 a. XLNet: Generalized Autoregressive Pretraining for Language Understanding. In NeurIPS ."},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000008"},{"key":"e_1_3_2_2_42_1","volume-title":"Proceedings of the 37th International Conference on Machine Learning, Hal Daum\u00e9 III and Aarti Singh (Eds.). 11328--11339","author":"Zhang Jingqing","year":"2020","unstructured":"Jingqing Zhang, Yao Zhao, Mohammad Saleh, and Peter Liu. 2020. PEGASUS: Pre-training with Extracted Gap-sentences for Abstractive Summarization. In Proceedings of the 37th International Conference on Machine Learning, Hal Daum\u00e9 III and Aarti Singh (Eds.). 11328--11339."}],"event":{"name":"SIGIR '21: The 44th International ACM SIGIR Conference on Research and Development in Information Retrieval","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Virtual Event Canada","acronym":"SIGIR '21"},"container-title":["Proceedings of the 44th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3404835.3462869","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3404835.3462869","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:47:17Z","timestamp":1750193237000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3404835.3462869"}},"subtitle":["Bootstrapped Pre-training with Representative Words Prediction for Ad-hoc Retrieval"],"short-title":[],"issued":{"date-parts":[[2021,7,11]]},"references-count":42,"alternative-id":["10.1145\/3404835.3462869","10.1145\/3404835"],"URL":"https:\/\/doi.org\/10.1145\/3404835.3462869","relation":{},"subject":[],"published":{"date-parts":[[2021,7,11]]},"assertion":[{"value":"2021-07-11","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}