{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,20]],"date-time":"2026-05-20T13:08:04Z","timestamp":1779282484046,"version":"3.51.4"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032212887","type":"print"},{"value":"9783032212894","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-21289-4_5","type":"book-chapter","created":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T01:05:54Z","timestamp":1774314354000},"page":"67-82","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A simple sketch of a disposable coffee cup with a lid. The cup features a green circle in the center, possibly representing a logo or design element. The drawing is outlined in black with minimal detail. Starbucks: Improved Training for 2D Matryoshka Embeddings"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6711-0955","authenticated-orcid":false,"given":"Shengyao","family":"Zhuang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0726-5250","authenticated-orcid":false,"given":"Shuai","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-0230-7969","authenticated-orcid":false,"given":"Fabio","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5577-3391","authenticated-orcid":false,"given":"Bevan","family":"Koopman","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0271-5563","authenticated-orcid":false,"given":"Guido","family":"Zuccon","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,3,25]]},"reference":[{"key":"5_CR1","doi-asserted-by":"publisher","unstructured":"Agirre, E., et al.: SemEval-2016 task 1: semantic textual similarity, monolingual and cross-lingual evaluation. In: Bethard, S., Carpuat, M., Cer, D., Jurgens, D., Nakov, P., Zesch, T. (eds.) Proceedings of the 10th International Workshop on Semantic Evaluation (SemEval-2016), pp. 497\u2013511. Association for Computational Linguistics, San Diego (2016). https:\/\/doi.org\/10.18653\/v1\/S16-1081. https:\/\/aclanthology.org\/S16-1081","DOI":"10.18653\/v1\/S16-1081"},{"key":"5_CR2","unstructured":"Bajaj, P., et al.: MS MARCO: a human generated machine reading comprehension dataset (2018). https:\/\/arxiv.org\/abs\/1611.09268"},{"key":"5_CR3","doi-asserted-by":"publisher","unstructured":"Bowman, S.R., Angeli, G., Potts, C., Manning, C.D.: A large annotated corpus for learning natural language inference. In: M\u00e0rquez, L., Callison-Burch, C., Su, J. (eds.) Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing. pp. 632\u2013642. Association for Computational Linguistics, Lisbon (2015). https:\/\/doi.org\/10.18653\/v1\/D15-1075. https:\/\/aclanthology.org\/D15-1075","DOI":"10.18653\/v1\/D15-1075"},{"key":"5_CR4","doi-asserted-by":"publisher","unstructured":"Cer, D., Diab, M., Agirre, E., Lopez-Gazpio, I., Specia, L.: SemEval-2017 task 1: semantic textual similarity multilingual and crosslingual focused evaluation. In: Bethard, S., Carpuat, M., Apidianaki, M., Mohammad, S.M., Cer, D., Jurgens, D. (eds.) Proceedings of the 11th International Workshop on Semantic Evaluation (SemEval-2017), pp. 1\u201314. Association for Computational Linguistics, Vancouver (2017).https:\/\/doi.org\/10.18653\/v1\/S17-2001. https:\/\/aclanthology.org\/S17-2001","DOI":"10.18653\/v1\/S17-2001"},{"key":"5_CR5","doi-asserted-by":"crossref","unstructured":"Craswell, N., Mitra, B., Yilmaz, E., Campos, D.: Overview of the TREC 2020 deep learning track (2021). https:\/\/arxiv.org\/abs\/2102.07662","DOI":"10.6028\/NIST.SP.1266.deep-overview"},{"key":"5_CR6","doi-asserted-by":"crossref","unstructured":"Craswell, N., Mitra, B., Yilmaz, E., Campos, D., Voorhees, E.M.: Overview of the TREC 2019 deep learning track (2020). https:\/\/arxiv.org\/abs\/2003.07820","DOI":"10.6028\/NIST.SP.1266.deep-overview"},{"key":"5_CR7","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Burstein, J., Doran, C., Solorio, T. (eds.) Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, vol. 1 (Long and Short Papers), pp. 4171\u20134186. Association for Computational Linguistics, Minneapolis, Minnesota (2019). https:\/\/doi.org\/10.18653\/v1\/N19-1423. https:\/\/aclanthology.org\/N19-1423","DOI":"10.18653\/v1\/N19-1423"},{"key":"5_CR8","unstructured":"Devvrit, F., et al.: MatFormer: nested transformer for elastic inference (2023). https:\/\/arxiv.org\/abs\/2310.07707"},{"key":"5_CR9","doi-asserted-by":"publisher","unstructured":"Gao, L., Callan, J.: Condenser: a pre-training architecture for dense retrieval. In: Moens, M.F., Huang, X., Specia, L., Yih, S.W.t. (eds.) Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pp. 981\u2013993. Association for Computational Linguistics, Online and Punta Cana (2021). https:\/\/doi.org\/10.18653\/v1\/2021.emnlp-main.75. https:\/\/aclanthology.org\/2021.emnlp-main.75","DOI":"10.18653\/v1\/2021.emnlp-main.75"},{"key":"5_CR10","doi-asserted-by":"publisher","unstructured":"Gao, L., Ma, X., Lin, J., Callan, J.: Tevatron: an efficient and flexible toolkit for neural retrieval. In: Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval, SIGIR \u201923, pp. 3120\u20133124. Association for Computing Machinery, New York (2023). https:\/\/doi.org\/10.1145\/3539618.3591805","DOI":"10.1145\/3539618.3591805"},{"key":"5_CR11","doi-asserted-by":"publisher","unstructured":"Gao, T., Yao, X., Chen, D.: SimCSE: simple contrastive learning of sentence embeddings. In: Moens, M.F., Huang, X., Specia, L., Yih, S.W.t. (eds.) Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pp. 6894\u20136910. Association for Computational Linguistics, Online and Punta Cana (2021). https:\/\/doi.org\/10.18653\/v1\/2021.emnlp-main.552. https:\/\/aclanthology.org\/2021.emnlp-main.552","DOI":"10.18653\/v1\/2021.emnlp-main.552"},{"key":"5_CR12","doi-asserted-by":"publisher","unstructured":"Karpukhin, V., et al.: Dense passage retrieval for open-domain question answering. In: Webber, B., Cohn, T., He, Y., Liu, Y. (eds.) Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 6769\u20136781. Association for Computational Linguistics, Online (2020). https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.550. https:\/\/aclanthology.org\/2020.emnlp-main.550","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"5_CR13","doi-asserted-by":"crossref","unstructured":"Kusupati, A., et al.: Matryoshka representation learning. In: Koyejo, S., Mohamed, S., Agarwal, A., Belgrave, D., Cho, K., Oh, A. (eds.) Advances in Neural Information Processing Systems, vol.\u00a035, pp. 30233\u201330249. Curran Associates, Inc. (2022). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2022\/file\/c32319f4868da7613d78af9993100e42-Paper-Conference.pdf","DOI":"10.52202\/068431-2192"},{"key":"5_CR14","unstructured":"Lee, J., et al.: Gecko: versatile text embeddings distilled from large language models (2024). https:\/\/arxiv.org\/abs\/2403.20327"},{"key":"5_CR15","doi-asserted-by":"publisher","unstructured":"Li, X., Li, J.: AoE: angle-optimized embeddings for semantic textual similarity. In: Ku, L.W., Martins, A., Srikumar, V. (eds.) Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics, vol. 1: Long Papers, pp. 1825\u20131839. Association for Computational Linguistics, Bangkok, Thailand (2024). https:\/\/doi.org\/10.18653\/v1\/2024.acl-long.101. https:\/\/aclanthology.org\/2024.acl-long.101","DOI":"10.18653\/v1\/2024.acl-long.101"},{"key":"5_CR16","unstructured":"Li, X., Li, Z., Li, J., Xie, H., Li, Q.: ESE: espresso sentence embeddings (2024). https:\/\/arxiv.org\/abs\/2402.14776"},{"key":"5_CR17","doi-asserted-by":"crossref","unstructured":"Lu, S., et al.: Less is more: pretrain a strong siamese encoder for dense text retrieval using a weak decoder. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pp. 2780\u20132791 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.220"},{"key":"5_CR18","doi-asserted-by":"crossref","unstructured":"Marelli, M., Menini, S., Baroni, M., Bentivogli, L., Bernardi, R., Zamparelli, R.: A SICK cure for the evaluation of compositional distributional semantic models. In: Calzolari, N., et al. (eds.) Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC\u201914), pp. 216\u2013223. European Language Resources Association (ELRA), Reykjavik (2014). http:\/\/www.lrec-conf.org\/proceedings\/lrec2014\/pdf\/363_Paper.pdf","DOI":"10.63317\/39qdhuevzbqa"},{"key":"5_CR19","doi-asserted-by":"publisher","unstructured":"Muennighoff, N., Tazi, N., Magne, L., Reimers, N.: MTEB: massive text embedding benchmark. In: Vlachos, A., Augenstein, I. (eds.) Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics, pp. 2014\u20132037. Association for Computational Linguistics, Dubrovnik (2023). https:\/\/doi.org\/10.18653\/v1\/2023.eacl-main.148. https:\/\/aclanthology.org\/2023.eacl-main.148","DOI":"10.18653\/v1\/2023.eacl-main.148"},{"key":"5_CR20","unstructured":"Penedo, G., et al.: The fineweb datasets: decanting the web for the finest text data at scale (2024). https:\/\/arxiv.org\/abs\/2406.17557"},{"key":"5_CR21","doi-asserted-by":"crossref","unstructured":"Rege, A., et al.: AdANNS: a framework for adaptive semantic search. In: Oh, A., Naumann, T., Globerson, A., Saenko, K., Hardt, M., Levine, S. (eds.) Advances in Neural Information Processing Systems, vol.\u00a036, pp. 76311\u201376335. Curran Associates, Inc. (2023). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2023\/file\/f062da1973ac9ac61fc6d44dd7fa309f-Paper-Conference.pdf","DOI":"10.52202\/075280-3336"},{"key":"5_CR22","doi-asserted-by":"publisher","unstructured":"Reimers, N., Gurevych, I.: Sentence-BERT: sentence embeddings using siamese BERT-networks. In: Inui, K., Jiang, J., Ng, V., Wan, X. (eds.) Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pp. 3982\u20133992. Association for Computational Linguistics, Hong Kong (2019) https:\/\/doi.org\/10.18653\/v1\/D19-1410. https:\/\/aclanthology.org\/D19-1410","DOI":"10.18653\/v1\/D19-1410"},{"key":"5_CR23","unstructured":"Thakur, N., Reimers, N., R\u00fcckl\u00e9, A., Srivastava, A., Gurevych, I.: BEIR: a heterogeneous benchmark for zero-shot evaluation of information retrieval models. In: Vanschoren, J., Yeung, S. (eds.) Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks, vol.\u00a01 (2021). https:\/\/datasets-benchmarks-proceedings.neurips.cc\/paper_files\/paper\/2021\/file\/65b9eea6e1cc6bb9f0cd2a47751a186f-Paper-round2.pdf"},{"key":"5_CR24","unstructured":"Wang, L., et al.: Text embeddings by weakly-supervised contrastive pre-training. arXiv preprint arXiv:2212.03533 (2022)"},{"key":"5_CR25","doi-asserted-by":"publisher","unstructured":"Wang, L., et al.: SimLM: pre-training with representation bottleneck for dense passage retrieval. In: Rogers, A., Boyd-Graber, J., Okazaki, N. (eds.) Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics, vol. 1: Long Papers, pp. 2244\u20132258. Association for Computational Linguistics, Toronto (2023). https:\/\/doi.org\/10.18653\/v1\/2023.acl-long.125. https:\/\/aclanthology.org\/2023.acl-long.125","DOI":"10.18653\/v1\/2023.acl-long.125"},{"key":"5_CR26","doi-asserted-by":"crossref","unstructured":"Wang, S., Zhuang, S., Koopman, B., Zuccon, G.: 2d matryoshka training for information retrieval. arXiv preprint arXiv:2411.17299 (2024)","DOI":"10.1145\/3726302.3730330"},{"key":"5_CR27","doi-asserted-by":"publisher","unstructured":"Williams, A., Nangia, N., Bowman, S.: A broad-coverage challenge corpus for sentence understanding through inference. In: Walker, M., Ji, H., Stent, A. (eds.) Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, vol. 1 (Long Papers), pp. 1112\u20131122. Association for Computational Linguistics, New Orleans (2018). https:\/\/doi.org\/10.18653\/v1\/N18-1101. https:\/\/aclanthology.org\/N18-1101","DOI":"10.18653\/v1\/N18-1101"},{"key":"5_CR28","doi-asserted-by":"publisher","unstructured":"Wolf, T., et al.: Transformers: state-of-the-art natural language processing. In: Liu, Q., Schlangen, D. (eds.) Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, pp. 38\u201345. Association for Computational Linguistics (2020). https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-demos.6. https:\/\/aclanthology.org\/2020.emnlp-demos.6","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"5_CR29","doi-asserted-by":"publisher","unstructured":"Xiao, S., Liu, Z., Shao, Y., Cao, Z.: RetroMAE: pre-training retrieval-oriented language models via masked auto-encoder. In: Goldberg, Y., Kozareva, Z., Zhang, Y. (eds.) Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, pp. 538\u2013548. Association for Computational Linguistics, Abu Dhabi (2022). https:\/\/doi.org\/10.18653\/v1\/2022.emnlp-main.35. https:\/\/aclanthology.org\/2022.emnlp-main.35","DOI":"10.18653\/v1\/2022.emnlp-main.35"},{"key":"5_CR30","doi-asserted-by":"publisher","unstructured":"Zhao, W.X., Liu, J., Ren, R., Wen, J.R.: Dense text retrieval based on pretrained language models: a survey. ACM Trans. Inf. Syst. 42(4) (2024). https:\/\/doi.org\/10.1145\/3637870","DOI":"10.1145\/3637870"},{"key":"5_CR31","doi-asserted-by":"crossref","unstructured":"Zhuang, S., Ma, X., Koopman, B., Lin, J., Zuccon, G.: PromptReps: prompting large language models to generate dense and sparse representations for zero-shot document retrieval (2024). https:\/\/arxiv.org\/abs\/2404.18424","DOI":"10.18653\/v1\/2024.emnlp-main.250"},{"key":"5_CR32","doi-asserted-by":"publisher","unstructured":"Zhuang, S., et al.: Typos-aware bottlenecked pre-training for robust dense retrieval. In: Proceedings of the Annual International ACM SIGIR Conference on Research and Development in Information Retrieval in the Asia Pacific Region, SIGIR-AP \u201923, pp. 212\u2013222. Association for Computing Machinery, New York (2023). https:\/\/doi.org\/10.1145\/3624918.3625324","DOI":"10.1145\/3624918.3625324"}],"container-title":["Lecture Notes in Computer Science","Advances in Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-21289-4_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,20]],"date-time":"2026-05-20T12:12:56Z","timestamp":1779279176000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-21289-4_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9783032212887","9783032212894"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-21289-4_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"25 March 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing Interests"}},{"value":"ECIR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Information Retrieval","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Delft","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"The Netherlands","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2026","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 March 2026","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 April 2026","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"48","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecir2026","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ecir2026.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}