{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,3]],"date-time":"2025-09-03T10:20:32Z","timestamp":1756894832886,"version":"3.40.3"},"publisher-location":"Cham","reference-count":48,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031206429"},{"type":"electronic","value":"9783031206436"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-20643-6_11","type":"book-chapter","created":{"date-parts":[[2022,10,31]],"date-time":"2022-10-31T13:18:09Z","timestamp":1667222289000},"page":"147-161","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["How Train\u2013Test Leakage Affects Zero-Shot Retrieval"],"prefix":"10.1007","author":[{"given":"Maik","family":"Fr\u00f6be","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Christopher","family":"Akiki","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Martin","family":"Potthast","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Matthias","family":"Hagen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,11,1]]},"reference":[{"key":"11_CR1","doi-asserted-by":"crossref","unstructured":"Allan, J., Harman, D., Kanoulas, E., Li, D., Gysel, C., Voorhees, E.: TREC 2017 common core track overview. In: Proceedings of TREC 2017, vol. 500\u2013324. NIST (2017)","DOI":"10.6028\/NIST.SP.500-324.core-overview"},{"issue":"3","key":"11_CR2","doi-asserted-by":"publisher","first-page":"137","DOI":"10.1504\/IJSN.2015.071829","volume":"10","author":"G Ateniese","year":"2015","unstructured":"Ateniese, G., Mancini, L., Spognardi, A., Villani, A., Vitali, D., Felici, G.: Hacking smart machines with smarter ones: how to extract meaningful data from machine learning classifiers. Int. J. Secur. Netw. 10(3), 137\u2013150 (2015)","journal-title":"Int. J. Secur. Netw."},{"key":"11_CR3","doi-asserted-by":"crossref","unstructured":"Benham, R., et al.: RMIT at the 2017 TREC CORE track. In: Proceedings of TREC 2017, NIST Special Publication, vol. 500-324. NIST (2017)","DOI":"10.6028\/NIST.SP.500-324.core-RMIT"},{"key":"11_CR4","doi-asserted-by":"crossref","unstructured":"Benham, R., et al.: RMIT at the 2018 TREC CORE track. In: Proceedings of TREC 2018, NIST Special Publication, vol. 500-331. NIST (2018)","DOI":"10.6028\/NIST.SP.500-331.core-RMIT"},{"key":"11_CR5","unstructured":"Berthelot, D., Raffel, C., Roy, A., Goodfellow, I.: Understanding and improving interpolation in autoencoders via an adversarial regularizer. In: Proceedings of ICLR 2019. OpenReview.net (2019)"},{"key":"11_CR6","unstructured":"Chen, C., Wu, B., Qiu, M., Wang, L., Zhou, J.: A comprehensive analysis of information leakage in deep transfer learning. CoRR abs\/2009.01989 (2020)"},{"key":"11_CR7","unstructured":"Chollet, F.: Deep Learning with Python. Simon and Schuster (2021)"},{"key":"11_CR8","doi-asserted-by":"crossref","unstructured":"Craswell, N., Campos, D., Mitra, B., Yilmaz, E., Billerbeck, B.: ORCAS: 20 million clicked query-document pairs for analyzing search. In: Proceedings of CIKM 2020, pp. 2983\u20132989. ACM (2020)","DOI":"10.1145\/3340531.3412779"},{"key":"11_CR9","doi-asserted-by":"crossref","unstructured":"Craswell, N., Mitra, B., Yilmaz, E., Campos, D.: Overview of the TREC 2021 deep learning track. In: Voorhees, E.M., Ellis, A. (eds.) Notebook. NIST (2021)","DOI":"10.6028\/NIST.SP.500-335.deep-overview"},{"key":"11_CR10","doi-asserted-by":"crossref","unstructured":"Craswell, N., Mitra, B., Yilmaz, E., Campos, D., Voorhees, E.: Overview of the TREC 2019 deep learning track. In: Proceedings of TREC 2019, NIST Special Publication. NIST (2019)","DOI":"10.6028\/NIST.SP.1266.deep-overview"},{"key":"11_CR11","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of NAACL 2019, Minneapolis, Minnesota, pp. 4171\u20134186. Association for Computational Linguistics (2019)"},{"key":"11_CR12","unstructured":"Dolan, W.B., Brockett, C.: Automatically constructing a corpus of sentential paraphrases. In: Proceedings of the Third International Workshop on Paraphrasing (IWP 2005) (2005)"},{"key":"11_CR13","doi-asserted-by":"crossref","unstructured":"Fan, A., Jernite, Y., Perez, E., Grangier, D., Weston, J., Auli, M.: ELI5: long form question answering. In: Proceedings of ACL 2019, pp. 3558\u20133567. ACL (2019)","DOI":"10.18653\/v1\/P19-1346"},{"key":"11_CR14","doi-asserted-by":"crossref","unstructured":"Fan, Y., Guo, J., Lan, Y., Xu, J., Zhai, C., Cheng, X.: Modeling diverse relevance patterns in ad-hoc retrieval. In: Proceedings of SIGIR 2018, pp. 375\u2013384. ACM (2018)","DOI":"10.1145\/3209978.3209980"},{"key":"11_CR15","doi-asserted-by":"crossref","unstructured":"Feldman, V.: Does learning require memorization? A short tale about a long tail. In: Proceedings of STOC 2020, pp. 954\u2013959. ACM (2020)","DOI":"10.1145\/3357713.3384290"},{"key":"11_CR16","unstructured":"Feldman, V., Zhang, C.: What neural networks memorize and why: discovering the long tail via influence estimation. In: Proceedings of NeurIPS 2020 (2020)"},{"key":"11_CR17","doi-asserted-by":"crossref","unstructured":"Fredrikson, M., Jha, S., Ristenpart, T.: Model inversion attacks that exploit confidence information and basic countermeasures. In: Proceedings of CCS 2015, pp. 1322\u20131333. ACM (2015)","DOI":"10.1145\/2810103.2813677"},{"issue":"3","key":"11_CR18","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1145\/3190580.3190586","volume":"51","author":"N Fuhr","year":"2017","unstructured":"Fuhr, N.: Some common mistakes in IR evaluation, and how they can be avoided. SIGIR Forum 51(3), 32\u201341 (2017)","journal-title":"SIGIR Forum"},{"issue":"9","key":"11_CR19","doi-asserted-by":"publisher","first-page":"1263","DOI":"10.1109\/TKDE.2008.239","volume":"21","author":"H He","year":"2009","unstructured":"He, H., Garcia, E.: Learning from imbalanced data. IEEE Trans. Knowl. Data Eng. 21(9), 1263\u20131284 (2009)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"11_CR20","unstructured":"Hofst\u00e4tter, S., Zlabinger, M., Hanbury, A.: Interpretable & time-budget-constrained contextualization for re-ranking. In: Proceedings of ECAI 2020, Frontiers in Artificial Intelligence and Applications, vol. 325, pp. 513\u2013520. IOS Press (2020)"},{"key":"11_CR21","doi-asserted-by":"crossref","unstructured":"Hui, K., Yates, A., Berberich, K., Melo, G.: PACRR: a position-aware neural IR model for relevance matching. In: Proceedings of EMNLP 2017, pp. 1049\u20131058. ACL (2017)","DOI":"10.18653\/v1\/D17-1110"},{"issue":"7","key":"11_CR22","doi-asserted-by":"publisher","first-page":"1358","DOI":"10.1002\/asi.21071","volume":"60","author":"B Jansen","year":"2009","unstructured":"Jansen, B., Booth, D., Spink, A.: Patterns of query reformulation during web searching. J. Assoc. Inf. Sci. Technol. 60(7), 1358\u20131371 (2009)","journal-title":"J. Assoc. Inf. Sci. Technol."},{"issue":"3","key":"11_CR23","doi-asserted-by":"publisher","first-page":"535","DOI":"10.1109\/TBDATA.2019.2921572","volume":"7","author":"J Johnson","year":"2021","unstructured":"Johnson, J., Douze, M., J\u00e9gou, H.: Billion-scale similarity search with GPUs. IEEE Trans. Big Data 7(3), 535\u2013547 (2021)","journal-title":"IEEE Trans. Big Data"},{"key":"11_CR24","doi-asserted-by":"crossref","unstructured":"Krishna, K., Roy, A., Iyyer, M.: Hurdles to progress in long-form question answering. In: Proceedings of NAACL 2021, pp. 4940\u20134957. ACL (2021)","DOI":"10.18653\/v1\/2021.naacl-main.393"},{"key":"11_CR25","unstructured":"Li, C., Yates, A., MacAvaney, S., He, B., Sun, Y.: PARADE: passage representation aggregation for document reranking. CoRR abs\/2008.09093 (2020)"},{"key":"11_CR26","doi-asserted-by":"crossref","unstructured":"Lin, J., Ma, X., Lin, S., Yang, J., Pradeep, R., Nogueira, R.: Pyserini: a Python toolkit for reproducible information retrieval research with sparse and dense representations. In: Proceedings of SIGIR 2021, pp. 2356\u20132362. ACM (2021)","DOI":"10.1145\/3404835.3463238"},{"key":"11_CR27","doi-asserted-by":"crossref","unstructured":"Lin, J., Yang, P.: The impact of score ties on repeatability in document ranking. In: Proceedings of SIGIR 2019, pp. 1125\u20131128. ACM (2019)","DOI":"10.1145\/3331184.3331339"},{"key":"11_CR28","unstructured":"Lin, S., Yang, J., Lin, J.: Distilling dense representations for ranking using tightly-coupled teachers. CoRR abs\/2010.11386 (2020)"},{"key":"11_CR29","doi-asserted-by":"crossref","unstructured":"Linjordet, T., Balog, K.: Sanitizing synthetic training data generation for question answering over knowledge graphs. In: Proceedings of ICTIR 2020, pp. 121\u2013128. ACM (2020)","DOI":"10.1145\/3409256.3409836"},{"key":"11_CR30","doi-asserted-by":"crossref","unstructured":"MacAvaney, S., Yates, A., Cohan, A., Goharian, N.: CEDR: contextualized embeddings for document ranking. In: Proceedings of SIGIR 2019, pp. 1101\u20131104. ACM (2019)","DOI":"10.1145\/3331184.3331317"},{"key":"11_CR31","doi-asserted-by":"crossref","unstructured":"MacAvaney, S., Yates, A., Feldman, S., Downey, D., Cohan, A., Goharian, N.: Simplified data wrangling with ir_datasets. In: Proceedings of SIGIR 2021, pp. 2429\u20132436. ACM (2021)","DOI":"10.1145\/3404835.3463254"},{"key":"11_CR32","doi-asserted-by":"crossref","unstructured":"Macdonald, C., Tonellotto, N., MacAvaney, S., Ounis, I.: PyTerrier: declarative experimentation in Python from BM25 to dense retrieval. In: Proceedings of CIKM 2021, pp. 4526\u20134533. ACM (2021)","DOI":"10.1145\/3459637.3482013"},{"key":"11_CR33","doi-asserted-by":"crossref","unstructured":"Mitra, B., Diaz, F., Craswell, N.: Learning to match using local and distributed representations of text for web search. In: Proceedings of WWW 2017, pp. 1291\u20131299. ACM (2017)","DOI":"10.1145\/3038912.3052579"},{"key":"11_CR34","doi-asserted-by":"crossref","unstructured":"Mokrii, I., Boytsov, L., Braslavski, P.: A systematic evaluation of transfer learning and pseudo-labeling with BERT-based ranking models. In: Proceedings of SIGIR 2021, pp. 2081\u20132085. ACM (2021)","DOI":"10.1145\/3404835.3463093"},{"key":"11_CR35","doi-asserted-by":"crossref","unstructured":"Nasr, M., Shokri, R., Houmansadr, A.: Comprehensive privacy analysis of deep learning: passive and active white-box inference attacks against centralized and federated learning. In: Proceedings of SP 2019, pp. 739\u2013753. IEEE (2019)","DOI":"10.1109\/SP.2019.00065"},{"key":"11_CR36","doi-asserted-by":"crossref","unstructured":"Nogueira, R., Jiang, Z., Pradeep, R., Lin, J.: Document ranking with a pretrained sequence-to-sequence model. In: Findings of EMNLP 2020, vol. EMNLP 2020, pp. 708\u2013718. ACL (2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.63"},{"key":"11_CR37","unstructured":"Nogueira, R., Yang, W., Cho, K., Lin, J.: Multi-stage document ranking with BERT. CoRR abs\/1910.14424 (2019)"},{"key":"11_CR38","doi-asserted-by":"crossref","unstructured":"Reimers, N., Gurevych, I.: Sentence-BERT: sentence embeddings using Siamese BERT-networks. In: Proceedings of EMNLP 2019, pp. 3980\u20133990. ACL (2019)","DOI":"10.18653\/v1\/D19-1410"},{"key":"11_CR39","unstructured":"Sandhaus, E.: The New York times annotated corpus. Linguist. Data Consortium Philadelphia 6(12), e26752 (2008)"},{"key":"11_CR40","unstructured":"Sharma, L., Graesser, L., Nangia, N., Evci, U.: Natural language understanding with the Quora question pairs dataset. CoRR abs\/1907.01041 (2019)"},{"key":"11_CR41","doi-asserted-by":"crossref","unstructured":"Shokri, R., Stronati, M., Song, C., Shmatikov, V.: Membership inference attacks against machine learning models. In: Proceedings of SP 2017, pp. 3\u201318. IEEE (2017)","DOI":"10.1109\/SP.2017.41"},{"issue":"1","key":"11_CR42","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1145\/1067268.1067272","volume":"39","author":"E Voorhees","year":"2005","unstructured":"Voorhees, E.: The TREC robust retrieval track. SIGIR Forum 39(1), 11\u201320 (2005)","journal-title":"SIGIR Forum"},{"key":"11_CR43","doi-asserted-by":"crossref","unstructured":"Wahle, J.P., Ruas, T., Meuschke, N., Gipp, B.: Are neural language models good plagiarists? A benchmark for neural paraphrase detection. In: Proceedings of JCDL 2021, pp. 226\u2013229 (2021)","DOI":"10.1109\/JCDL52503.2021.00065"},{"key":"11_CR44","doi-asserted-by":"crossref","unstructured":"Xiong, C., Dai, Z., Callan, J., Liu, Z., Power, R.: End-to-end neural ad-hoc ranking with kernel pooling. In: Proceedings of SIGIR 2017, pp. 55\u201364. ACM (2017)","DOI":"10.1145\/3077136.3080809"},{"key":"11_CR45","doi-asserted-by":"crossref","unstructured":"Yates, A., Arora, S., Zhang, X., Yang, W., Jose, K., Lin, J.: Capreolus: a toolkit for end-to-end neural ad hoc retrieval. In: Proceedings of WSDM 2020, pp. 861\u2013864. ACM (2020)","DOI":"10.1145\/3336191.3371868"},{"key":"11_CR46","unstructured":"Zhan, J., Xie, X., Mao, J., Liu, Y., Zhang, M., Ma, S.: Evaluating extrapolation performance of dense retrieval. CoRR abs\/2204.11447 (2022)"},{"key":"11_CR47","doi-asserted-by":"crossref","unstructured":"Zhang, X., Yates, A., Lin, J.: A little bit is worse than none: ranking with limited training data. In: Proceedings of SustaiNLP 2020, pp. 107\u2013112. Association for Computational Linguistics (2020)","DOI":"10.18653\/v1\/2020.sustainlp-1.14"},{"key":"11_CR48","doi-asserted-by":"crossref","unstructured":"Zobel, J., Rashidi, L.: Corpus bootstrapping for assessment of the properties of effectiveness measures. In: Proceedings of CIKM 2020, pp. 1933\u20131952. ACM (2020)","DOI":"10.1145\/3340531.3411998"}],"container-title":["Lecture Notes in Computer Science","String Processing and Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-20643-6_11","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,6]],"date-time":"2024-10-06T22:53:45Z","timestamp":1728255225000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-20643-6_11"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031206429","9783031206436"],"references-count":48,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-20643-6_11","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"1 November 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SPIRE","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Symposium on String Processing and Information Retrieval","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Concepci\u00f3n","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chile","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 November 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 November 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"spire2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/spire2022.inf.udec.cl\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"43","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"23","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"53% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.62","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}