{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T18:33:52Z","timestamp":1771698832616,"version":"3.50.1"},"publisher-location":"Cham","reference-count":59,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030721121","type":"print"},{"value":"9783030721138","type":"electronic"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-72113-8_23","type":"book-chapter","created":{"date-parts":[[2021,3,26]],"date-time":"2021-03-26T12:03:02Z","timestamp":1616760182000},"page":"342-358","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["Evaluating Multilingual Text Encoders for Unsupervised Cross-Lingual Retrieval"],"prefix":"10.1007","author":[{"given":"Robert","family":"Litschko","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ivan","family":"Vuli\u0107","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Simone Paolo","family":"Ponzetto","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Goran","family":"Glava\u0161","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,3,27]]},"reference":[{"key":"23_CR1","doi-asserted-by":"crossref","unstructured":"Artetxe, M., Labaka, G., Agirre, E.: A robust self-learning method for fully unsupervised cross-lingual mappings of word embeddings. In: Proceedings of ACL, pp. 789\u2013798 (2018)","DOI":"10.18653\/v1\/P18-1073"},{"key":"23_CR2","doi-asserted-by":"publisher","first-page":"597","DOI":"10.1162\/tacl_a_00288","volume":"7","author":"M Artetxe","year":"2019","unstructured":"Artetxe, M., Schwenk, H.: Massively multilingual sentence embeddings for zero-shot cross-lingual transfer and beyond. Trans. Assoc. Comput. Linguist. 7, 597\u2013610 (2019)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"23_CR3","unstructured":"Beltagy, I., Peters, M.E., Cohan, A.: Longformer: the long-document transformer. arXiv preprint arXiv:2004.05150 (2020)"},{"key":"23_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"44","DOI":"10.1007\/978-3-540-30222-3_5","volume-title":"Comparative evaluation of multilingual information access systems","author":"M Braschler","year":"2004","unstructured":"Braschler, M.: CLEF 2003 \u2013 Overview of Results. In: Peters, C., Gonzalo, J., Braschler, M., Kluck, M. (eds.) CLEF 2003. LNCS, vol. 3237, pp. 44\u201363. Springer, Heidelberg (2004). https:\/\/doi.org\/10.1007\/978-3-540-30222-3_5"},{"key":"23_CR5","unstructured":"Brown, T.B., et al.: Language models are few-shot learners. In: Proceedings of NeurIPS (2020)"},{"key":"23_CR6","unstructured":"Cao, S., Kitaev, N., Klein, D.: Multilingual alignment of contextual word representations. In: Proceedings of ICLR (2020)"},{"key":"23_CR7","doi-asserted-by":"crossref","unstructured":"Cer, D., Diab, M., Agirre, E., Lopez-Gazpio, I., Specia, L.: SemEval-2017 task 1: semantic textual similarity multilingual and crosslingual focused evaluation. In: Proceedings of SemEval, pp. 1\u201314 (2017)","DOI":"10.18653\/v1\/S17-2001"},{"key":"23_CR8","doi-asserted-by":"crossref","unstructured":"Cer, D., et al.: Universal sentence encoder for English. In: Proceedings of EMNLP, pp. 169\u2013174 (2018)","DOI":"10.18653\/v1\/D18-2029"},{"key":"23_CR9","doi-asserted-by":"crossref","unstructured":"Chidambaram, M., et al.: Learning cross-lingual sentence representations via a multi-task dual-encoder model. In: Proceedings of the ACL Workshop on Representation Learning for NLP, pp. 250\u2013259 (2019)","DOI":"10.18653\/v1\/W19-4330"},{"key":"23_CR10","unstructured":"Clark, K., Luong, M., Le, Q.V., Manning, C.D.: ELECTRA: pre-training text encoders as discriminators rather than generators. In: Proceedings of ICLR (2020)"},{"key":"23_CR11","doi-asserted-by":"crossref","unstructured":"Conneau, A., et al.: Unsupervised cross-lingual representation learning at scale. In: Proceedings of ACL, pp. 8440\u20138451 (2020)","DOI":"10.18653\/v1\/2020.acl-main.747"},{"key":"23_CR12","unstructured":"Conneau, A., Kiela, D.: SentEval: an evaluation toolkit for universal sentence representations. In: Proceedings of LREC, pp. 1699\u20131704 (2018)"},{"key":"23_CR13","doi-asserted-by":"crossref","unstructured":"Conneau, A., Kiela, D., Schwenk, H., Barrault, L., Bordes, A.: Supervised learning of universal sentence representations from natural language inference data. In: Proceedings of EMNLP, pp. 670\u2013680 (2017)","DOI":"10.18653\/v1\/D17-1070"},{"key":"23_CR14","unstructured":"Conneau, A., Lample, G.: Cross-lingual language model pretraining. In: Proceedings of NeurIPS, pp. 7059\u20137069 (2019)"},{"key":"23_CR15","doi-asserted-by":"crossref","unstructured":"Conneau, A., et al.: XNLI: evaluating cross-lingual sentence representations. In: Proceedings of EMNLP, pp. 2475\u20132485 (2018)","DOI":"10.18653\/v1\/D18-1269"},{"key":"23_CR16","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of NAACL, pp. 4171\u20134186 (2019)"},{"key":"23_CR17","doi-asserted-by":"crossref","unstructured":"Ethayarajh, K.: How contextual are contextualized word representations? Comparing the geometry of BERT, ELMo, and GPT-2 embeddings. In: Proceedings of EMNLP-IJCNLP, pp. 55\u201365 (2019)","DOI":"10.18653\/v1\/D19-1006"},{"key":"23_CR18","unstructured":"Feng, F., Yang, Y., Cer, D., Arivazhagan, N., Wang, W.: Language-agnostic BERT sentence embedding. arXiv preprint arXiv:2007.01852 (2020)"},{"key":"23_CR19","doi-asserted-by":"crossref","unstructured":"Glava\u0161, G., Litschko, R., Ruder, S., Vuli\u0107, I.: How to (properly) evaluate cross-lingual word embeddings: on strong baselines, comparative analyses, and some misconceptions. In: Proceedings of ACL, pp. 710\u2013721 (2019)","DOI":"10.18653\/v1\/P19-1070"},{"key":"23_CR20","doi-asserted-by":"crossref","unstructured":"Guo, M., et al.: Effective parallel corpus mining using bilingual sentence embeddings. In: Proceedings of WMT, pp. 165\u2013176 (2018)","DOI":"10.18653\/v1\/W18-6317"},{"key":"23_CR21","doi-asserted-by":"crossref","unstructured":"Hoogeveen, D., Verspoor, K.M., Baldwin, T.: CQADupStack: a benchmark data set for community question-answering research. In: Proceedings of ADCS, pp. 3:1\u20133:8 (2015)","DOI":"10.1145\/2838931.2838934"},{"key":"23_CR22","unstructured":"Jiang, Z., El-Jaroudi, A., Hartmann, W., Karakos, D., Zhao, L.: Cross-lingual information retrieval with BERT. In: Proceedings of LREC, p. 26 (2020)"},{"key":"23_CR23","unstructured":"Karthikeyan, K., Wang, Z., Mayhew, S., Roth, D.: Cross-lingual ability of multilingual BERT: an empirical study. In: Proceedings of ICLR (2020)"},{"key":"23_CR24","unstructured":"Koehn, P.: Europarl: a parallel corpus for statistical machine translation. In: Proceedings of the 10th Machine Translation Summit (MT SUMMIT), pp. 79\u201386 (2005)"},{"key":"23_CR25","doi-asserted-by":"crossref","unstructured":"Lei, T., et al.: Semi-supervised question retrieval with gated convolutions. In: Proceedings of NAACL, pp. 1279\u20131289 (2016)","DOI":"10.18653\/v1\/N16-1153"},{"key":"23_CR26","doi-asserted-by":"crossref","unstructured":"Liang, Y., et al.: XGLUE: a new benchmark dataset for cross-lingual pre-training, understanding and generation. In: Proceedings of EMNLP (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.484"},{"key":"23_CR27","doi-asserted-by":"crossref","unstructured":"Litschko, R., Glava\u0161, G., Vuli\u0107, I., Dietz, L.: Evaluating resource-lean cross-lingual embedding models in unsupervised retrieval. In: Proceedings of SIGIR, pp. 1109\u20131112 (2019)","DOI":"10.1145\/3331184.3331324"},{"key":"23_CR28","unstructured":"Liu, Q., Kusner, M.J., Blunsom, P.: A survey on contextual embeddings. arXiv preprint arXiv:2003.07278 (2020)"},{"key":"23_CR29","doi-asserted-by":"crossref","unstructured":"Liu, Q., McCarthy, D., Vuli\u0107, I., Korhonen, A.: Investigating cross-lingual alignment methods for contextualized embeddings with token-level evaluation. In: Proceedings of CoNLL, pp. 33\u201343 (2019)","DOI":"10.18653\/v1\/K19-1004"},{"key":"23_CR30","unstructured":"Liu, Y., et al.: RoBERTa: a robustly optimized BERT pretraining approach. arXiv preprint arXiv:1907.11692 (2019)"},{"key":"23_CR31","doi-asserted-by":"crossref","unstructured":"MacAvaney, S., Soldaini, L., Goharian, N.: Teaching a new dog old tricks: resurrecting multilingual retrieval using zero-shot learning. In: Proceedings of ECIR, pp. 246\u2013254 (2020)","DOI":"10.1007\/978-3-030-45442-5_31"},{"key":"23_CR32","doi-asserted-by":"crossref","unstructured":"MacAvaney, S., Yates, A., Cohan, A., Goharian, N.: Cedr: contextualized embeddings for document ranking. In: Proceedings of SIGIR, pp. 1101\u20131104 (2019)","DOI":"10.1145\/3331184.3331317"},{"key":"23_CR33","unstructured":"Nogueira, R., Yang, W., Cho, K., Lin, J.: Multi-stage document ranking with BERT. arXiv preprint arXiv:1910.14424 (2019)"},{"key":"23_CR34","doi-asserted-by":"crossref","unstructured":"Pires, T., Schlinger, E., Garrette, D.: How multilingual is multilingual BERT? In: Proceedings of ACL, pp. 4996\u20135001 (2019)","DOI":"10.18653\/v1\/P19-1493"},{"key":"23_CR35","doi-asserted-by":"crossref","unstructured":"Ponte, J.M., Croft, W.B.: A language modeling approach to information retrieval. In: Proceedings of SIGIR, pp. 275\u2013281 (1998)","DOI":"10.1145\/290941.291008"},{"key":"23_CR36","doi-asserted-by":"crossref","unstructured":"Ponti, E.M., Glava\u0161, G., Majewska, O., Liu, Q., Vuli\u0107, I., Korhonen, A.: XCOPA: a multilingual dataset for causal commonsense reasoning. In: Proceedings of EMNLP (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.185"},{"issue":"8","key":"23_CR37","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A., Wu, J., Child, R., Luan, D., Amodei, D., Sutskever, I.: Language models are unsupervised multitask learners. OpenAI blog 1(8), 9 (2019)","journal-title":"OpenAI blog"},{"key":"23_CR38","doi-asserted-by":"crossref","unstructured":"Reimers, N., Gurevych, I.: Sentence-BERT: sentence embeddings using siamese BERT-networks. In: Proceedings of EMNLP, pp. 3973\u20133983 (2019)","DOI":"10.18653\/v1\/D19-1410"},{"key":"23_CR39","doi-asserted-by":"crossref","unstructured":"Reimers, N., Gurevych, I.: Making monolingual sentence embeddings multilingual using knowledge distillation. In: Proceedings of EMNLP (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.365"},{"key":"23_CR40","doi-asserted-by":"publisher","first-page":"842","DOI":"10.1162\/tacl_a_00349","volume":"8","author":"A Rogers","year":"2020","unstructured":"Rogers, A., Kovaleva, O., Rumshisky, A.: A primer in BERTology: what we know about how BERT works. Trans. Assoc. Comput. Linguist. 8, 842\u2013866 (2020)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"23_CR41","doi-asserted-by":"publisher","first-page":"569","DOI":"10.1613\/jair.1.11640","volume":"65","author":"S Ruder","year":"2019","unstructured":"Ruder, S., Vuli\u0107, I., S\u00f8gaard, A.: A survey of cross-lingual word embedding models. J. Artif. Intell. Res. 65, 569\u2013631 (2019)","journal-title":"J. Artif. Intell. Res."},{"key":"23_CR42","unstructured":"Sanh, V., Debut, L., Chaumond, J., Wolf, T.: DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter. arXiv preprint arXiv:1910.01108 (2019)"},{"key":"23_CR43","unstructured":"Smith, S.L., Turban, D.H., Hamblin, S., Hammerla, N.Y.: Offline bilingual word vectors, orthogonal transformations and the inverted softmax. In: Proceedings of ICLR (2017)"},{"key":"23_CR44","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Proceedings of NeurIPS, pp. 5998\u20136008 (2017)"},{"key":"23_CR45","doi-asserted-by":"crossref","unstructured":"Vuli\u0107, I., Glavas, G., Reichart, R., Korhonen, A.: Do we really need fully unsupervised cross-lingual embeddings? In: Proceedings of EMNLP, pp. 4406\u20134417 (2019)","DOI":"10.18653\/v1\/D19-1449"},{"key":"23_CR46","doi-asserted-by":"crossref","unstructured":"Vuli\u0107, I., Moens, M.F.: Monolingual and cross-lingual information retrieval models based on (bilingual) word embeddings. In: Proceedings of SIGIR, pp. 363\u2013372 (2015)","DOI":"10.1145\/2766462.2767752"},{"key":"23_CR47","doi-asserted-by":"crossref","unstructured":"Wang, A., Singh, A., Michael, J., Hill, F., Levy, O., Bowman, S.R.: GLUE: a multi-task benchmark and analysis platform for natural language understanding. In: Proceedings of ICLR (2019)","DOI":"10.18653\/v1\/W18-5446"},{"key":"23_CR48","doi-asserted-by":"crossref","unstructured":"Williams, A., Nangia, N., Bowman, S.: A broad-coverage challenge corpus for sentence understanding through inference. In: Proceedings of NAACL, pp. 1112\u20131122 (2018)","DOI":"10.18653\/v1\/N18-1101"},{"key":"23_CR49","doi-asserted-by":"crossref","unstructured":"Wu, S., Dredze, M.: Beto, bentz, becas: the surprising cross-lingual effectiveness of BERT. In: Proceedings of EMNLP, pp. 833\u2013844 (2019)","DOI":"10.18653\/v1\/D19-1077"},{"key":"23_CR50","doi-asserted-by":"crossref","unstructured":"Yang, Y., et al.: Improving multilingual sentence embedding using bi-directional dual encoder with additive margin softmax. In: Proceedings of AAAI, pp. 5370\u20135378 (2019)","DOI":"10.24963\/ijcai.2019\/746"},{"key":"23_CR51","doi-asserted-by":"crossref","unstructured":"Yang, Y., et al.: Multilingual universal sentence encoder for semantic retrieval. In: Proceedings of ACL: System Demonstrations, pp. 87\u201394 (2020)","DOI":"10.18653\/v1\/2020.acl-demos.12"},{"key":"23_CR52","doi-asserted-by":"crossref","unstructured":"Yang, Y., et al.: Improving multilingual sentence embedding using bi-directional dual encoder with additive margin softmax. In: Proceedings of IJCAI, pp. 5370\u20135378 (2019)","DOI":"10.24963\/ijcai.2019\/746"},{"key":"23_CR53","doi-asserted-by":"crossref","unstructured":"Yu, P., Allan, J.: A study of neural matching models for cross-lingual IR. In: Proceedings of SIGIR, pp. 1637\u20131640 (2020)","DOI":"10.1145\/3397271.3401322"},{"key":"23_CR54","unstructured":"Zaheer, M., et al.: Big Bird: transformers for longer sequences. arXiv preprint arXiv:2007.14062 (2020)"},{"issue":"2","key":"23_CR55","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1145\/984321.984322","volume":"22","author":"C Zhai","year":"2004","unstructured":"Zhai, C., Lafferty, J.: A study of smoothing methods for language models applied to information retrieval. ACM Trans. Inf. Syst. (TOIS) 22(2), 179\u2013214 (2004)","journal-title":"ACM Trans. Inf. Syst. (TOIS)"},{"key":"23_CR56","doi-asserted-by":"crossref","unstructured":"Zhao, W., Eger, S., Bjerva, J., Augenstein, I.: Inducing language-agnostic multilingual representations. arXiv preprint arXiv:2008.09112 (2020)","DOI":"10.18653\/v1\/2021.starsem-1.22"},{"key":"23_CR57","doi-asserted-by":"crossref","unstructured":"Zhao, W., Glava\u0161, G., Peyrard, M., Gao, Y., West, R., Eger, S.: On the limitations of cross-lingual encoders as exposed by reference-free machine translation evaluation. In: Proceedings of ACL, pp. 1656\u20131671 (2020)","DOI":"10.18653\/v1\/2020.acl-main.151"},{"key":"23_CR58","unstructured":"Ziemski, M., Junczys-Dowmunt, M., Pouliquen, B.: The United Nations parallel corpus v1.0. In: Proceedings of LREC, pp. 3530\u20133534 (2016)"},{"key":"23_CR59","doi-asserted-by":"crossref","unstructured":"Zweigenbaum, P., Sharoff, S., Rapp, R.: Overview of the third BUCC shared task: spotting parallel sentences in comparable corpora. In: Proceedings of LREC (2018)","DOI":"10.18653\/v1\/W17-2512"}],"container-title":["Lecture Notes in Computer Science","Advances in Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-72113-8_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T11:56:36Z","timestamp":1710244596000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-72113-8_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030721121","9783030721138"],"references-count":59,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-72113-8_23","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"27 March 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECIR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Information Retrieval","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 March 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 April 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"43","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecir2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.ecir2021.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"436","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"50","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"39","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"11% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}