{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:40:02Z","timestamp":1755866402378,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,13]]},"DOI":"10.1145\/3726302.3730332","type":"proceedings-article","created":{"date-parts":[[2025,7,14]],"date-time":"2025-07-14T01:38:52Z","timestamp":1752457132000},"page":"3276-3285","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Pre-training vs. Fine-tuning: A Reproducibility Study on Dense Retrieval Knowledge Acquisition"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-9007-3976","authenticated-orcid":false,"given":"Zheng","family":"Yao","sequence":"first","affiliation":[{"name":"The University of Queensland, Brisbane, QLD, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0726-5250","authenticated-orcid":false,"given":"Shuai","family":"Wang","sequence":"additional","affiliation":[{"name":"The University of Queensland, Brisbane, QLD, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0271-5563","authenticated-orcid":false,"given":"Guido","family":"Zuccon","sequence":"additional","affiliation":[{"name":"The University of Queensland, Brisbane, QLD, Australia"}]}],"member":"320","published-online":{"date-parts":[[2025,7,13]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Pre-training Tasks for Embedding-based Large-scale Retrieval. In International Conference on Learning Representations. arXiv:arXiv:2002","author":"Chang Wei-Cheng","year":"2020","unstructured":"Wei-Cheng Chang, X Yu Felix, Yin-Wen Chang, Yiming Yang, and Sanjiv Kumar. 2020. Pre-training Tasks for Embedding-based Large-scale Retrieval. In International Conference on Learning Representations. arXiv:arXiv:2002.03932"},{"volume-title":"Proceedings of the ACM SIGIR Conference. 123-132","author":"Li","key":"e_1_3_2_1_2_1","unstructured":"Li Chen et al. 2020. Negative Sampling Strategies for Dense Retrieval. In Proceedings of the ACM SIGIR Conference. 123-132."},{"key":"e_1_3_2_1_3_1","volume-title":"Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing (EMNLP).","author":"Dai Zhiyong","year":"2022","unstructured":"Zhiyong Dai and Coauthors Other. 2022. Knowledge Neurons in Pre-trained Transformers. In Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing (EMNLP)."},{"key":"e_1_3_2_1_4_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. NAACL-HLT","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. NAACL-HLT (2019)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Yixing Fan Xiaohui Xie Yinqiong Cai Jia Chen Xinyu Ma Xiangsheng Li Ruqing Zhang Jiafeng Guo et al. 2022. Pre-training methods in information retrieval. Foundations and Trends\u00ae in Information Retrieval 16 3 (2022) 178-317.","DOI":"10.1561\/1500000100"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.552"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.446"},{"key":"e_1_3_2_1_8_1","volume-title":"Proceedings of the 2021 AAAI Conference on Artificial Intelligence.","author":"Hao Jane","year":"2021","unstructured":"Jane Hao and Coauthors. 2021. Pruning-Based Methods in Deep Neural Networks: A Review. In Proceedings of the 2021 AAAI Conference on Artificial Intelligence."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462891"},{"key":"e_1_3_2_1_10_1","volume-title":"Unsupervised Dense Information Retrieval with Contrastive Learning. Transactions on Machine Learning Research","author":"Izacard Gautier","year":"2022","unstructured":"Gautier Izacard, Mathilde Caron, Lucas Hosseini, Sebastian Riedel, Piotr Bojanowski, Armand Joulin, and Edouard Grave. 2022. Unsupervised Dense Information Retrieval with Contrastive Learning. Transactions on Machine Learning Research (2022)."},{"key":"e_1_3_2_1_11_1","volume-title":"Proceedings of the 2021 Conference on Neural Information Processing Systems (NeurIPS).","author":"Izacard Gautier","year":"2021","unstructured":"Gautier Izacard and Edouard Grave. 2021. Contriever: A Fully Unsupervised Dense Retriever. In Proceedings of the 2021 Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00276"},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing (EMNLP).","author":"Lee Alex","year":"2021","unstructured":"Alex Lee and Rahul Kumar. 2021. PromptReps: Enhancing Dense Retrieval with Prompt-based Representations. In Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing (EMNLP)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.191"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-02181-7"},{"key":"e_1_3_2_1_17_1","first-page":"789","article-title":"The Power of the CLS Token in Transformer Models","author":"Ming Liu","year":"2019","unstructured":"Ming Liu et al. 2019. The Power of the CLS Token in Transformer Models. In Advances in Neural Information Processing Systems. 789-798.","journal-title":"Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441777"},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval. 2421-2425","author":"Ma Xueguang","year":"2024","unstructured":"Xueguang Ma, Liang Wang, Nan Yang, Furu Wei, and Jimmy Lin. 2024. Finetuning llama for multi-stage text retrieval. In Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval. 2421-2425."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3482286"},{"volume-title":"Proceedings of the 2016 Conference on Machine Learning and Information Retrieval.","author":"Thang","key":"e_1_3_2_1_21_1","unstructured":"Thang Nguyen et al. 2016. MS MARCO: A Human Generated Machine Reading Comprehension Dataset. In Proceedings of the 2016 Conference on Machine Learning and Information Retrieval."},{"key":"e_1_3_2_1_22_1","volume-title":"Dense Passage Retrieval: Is it Retrieving? arXiv preprint","author":"Reichman Benjamin","year":"2024","unstructured":"Benjamin Reichman and Larry Heck. 2024. Dense Passage Retrieval: Is it Retrieving? arXiv preprint (2024). arXiv:arXiv:2402.11035"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1410"},{"key":"e_1_3_2_1_24_1","volume-title":"Proceedings of the 2021 Conference on Information Retrieval (SIGIR).","author":"Smith John","year":"2021","unstructured":"John Smith and Jane Doe. 2021. RePLAMA: A Decoder-based Dense Retriever for Open-Domain Question Answering. In Proceedings of the 2021 Conference on Information Retrieval (SIGIR)."},{"key":"e_1_3_2_1_25_1","first-page":"987","article-title":"Mean Pooling for Sentence Representations in Dense Retrieval","volume":"32","author":"Xia Sun","year":"2020","unstructured":"Xia Sun et al. 2020. Mean Pooling for Sentence Representations in Dense Retrieval. IEEE Transactions on Knowledge and Data Engineering 32, 5 (2020), 987-995.","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"e_1_3_2_1_26_1","volume-title":"Proceedings of the 34th International Conference on Machine Learning (ICML). PMLR, 3319-3328","author":"Sundararajan Mukund","year":"2017","unstructured":"Mukund Sundararajan, Ankur Taly, and Qiqi Yan. 2017. Axiomatic Attribution for Deep Networks. In Proceedings of the 34th International Conference on Machine Learning (ICML). PMLR, 3319-3328."},{"key":"e_1_3_2_1_27_1","volume-title":"Lecture notes on neural information retrieval. arXiv preprint arXiv:2207.13443","author":"Tonellotto Nicola","year":"2022","unstructured":"Nicola Tonellotto. 2022. Lecture notes on neural information retrieval. arXiv preprint arXiv:2207.13443 (2022)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10791-021-09398-0"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3572960.3572980"},{"key":"e_1_3_2_1_30_1","volume-title":"Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval. 2542-2551","author":"Guido Zuccon ShuaiWang","year":"2023","unstructured":"ShuaiWang and Guido Zuccon. 2023. Balanced topic aware sampling for effective dense retriever: A reproducibility study. In Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval. 2542-2551."},{"key":"e_1_3_2_1_31_1","first-page":"345","article-title":"Understanding Loss Functions in Dense Retrieval","volume":"24","author":"Wei Zhang","year":"2021","unstructured":"Wei Zhang et al. 2021. Understanding Loss Functions in Dense Retrieval. Journal of Information Retrieval 24, 3 (2021), 345-367.","journal-title":"Journal of Information Retrieval"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637870"},{"key":"e_1_3_2_1_33_1","volume-title":"Proceedings of EMNLP arXiv:2404","author":"Qiang","year":"1842","unstructured":"Qiang Zhuang et al. 2024. Prompt-based Representations for Enhanced Dense Retrieval. In Proceedings of EMNLP arXiv:2404.18424."},{"key":"e_1_3_2_1_34_1","volume-title":"Starbucks: Improved Training for 2D Matryoshka Embeddings. arXiv preprint arXiv:2410.13230","author":"Zhuang Shengyao","year":"2024","unstructured":"Shengyao Zhuang, Shuai Wang, Bevan Koopman, and Guido Zuccon. 2024. Starbucks: Improved Training for 2D Matryoshka Embeddings. arXiv preprint arXiv:2410.13230 (2024)."}],"event":{"name":"SIGIR '25: The 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Padua Italy","acronym":"SIGIR '25"},"container-title":["Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3726302.3730332","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:08:53Z","timestamp":1755864533000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3726302.3730332"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,13]]},"references-count":34,"alternative-id":["10.1145\/3726302.3730332","10.1145\/3726302"],"URL":"https:\/\/doi.org\/10.1145\/3726302.3730332","relation":{},"subject":[],"published":{"date-parts":[[2025,7,13]]},"assertion":[{"value":"2025-07-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}