{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T04:14:15Z","timestamp":1743048855551,"version":"3.40.3"},"publisher-location":"Cham","reference-count":23,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031282379"},{"type":"electronic","value":"9783031282386"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-28238-6_27","type":"book-chapter","created":{"date-parts":[[2023,3,16]],"date-time":"2023-03-16T17:03:18Z","timestamp":1678986198000},"page":"377-385","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Is Cross-Modal Information Retrieval Possible Without Training?"],"prefix":"10.1007","author":[{"given":"Hyunjin","family":"Choi","sequence":"first","affiliation":[]},{"given":"Hyunjae","family":"Lee","sequence":"additional","affiliation":[]},{"given":"Seongho","family":"Joe","sequence":"additional","affiliation":[]},{"given":"Youngjune","family":"Gwon","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,3,17]]},"reference":[{"key":"27_CR1","unstructured":"Brown, T., et al.: Language models are few-shot learners. In: Advances in Neural Information Processing Systems, vol. 33, pp. 1877\u20131901 (2020)"},{"key":"27_CR2","doi-asserted-by":"publisher","unstructured":"Choi, H., Kim, J., Joe, S., Gwon, Y.: Evaluation of BERT and ALBERT sentence embedding performance on downstream NLP tasks. In: 2020 25th International Conference on Pattern Recognition (ICPR), pp. 5482\u20135487 (2021). https:\/\/doi.org\/10.1109\/ICPR48806.2021.9412102","DOI":"10.1109\/ICPR48806.2021.9412102"},{"key":"27_CR3","unstructured":"Devlin, J., Chang, M., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: NAACL-HLT (1), pp. 4171\u20134186. Association for Computational Linguistics (2019)"},{"key":"27_CR4","unstructured":"Dosovitskiy, A., et al.: An image is worth $$16\\times 16$$ words: transformers for image recognition at scale. In: International Conference on Learning Representations (2021). https:\/\/openreview.net\/forum?id=YicbFdNTTy"},{"key":"27_CR5","doi-asserted-by":"crossref","unstructured":"Gao, T., Yao, X., Chen, D.: SimCSE: simple contrastive learning of sentence embeddings. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pp. 6894\u20136910 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.552"},{"key":"27_CR6","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR, pp. 770\u2013778. IEEE Computer Society (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"27_CR7","unstructured":"Huang, Z., Zeng, Z., Liu, B., Fu, D., Fu, J.: Pixel-BERT: aligning image pixels with text by deep multi-modal transformers. CoRR abs\/2004.00849 (2020)"},{"key":"27_CR8","unstructured":"Jia, C., et al.: Scaling up visual and vision-language representation learning with noisy text supervision. In: Meila, M., Zhang, T. (eds.) Proceedings of the 38th International Conference on Machine Learning, ICML 2021, Virtual Event. Proceedings of Machine Learning Research, 18\u201324 July 2021, vol. 139, pp. 4904\u20134916. PMLR (2021)"},{"key":"27_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"121","DOI":"10.1007\/978-3-030-58577-8_8","volume-title":"Computer Vision \u2013 ECCV 2020","author":"X Li","year":"2020","unstructured":"Li, X., et al.: Oscar: object-semantics aligned pre-training for vision-language tasks. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12375, pp. 121\u2013137. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58577-8_8"},{"key":"27_CR10","unstructured":"Liu, H., Dai, Z., So, D., Le, Q.V.: Pay attention to MLPs. In: Thirty-Fifth Conference on Neural Information Processing Systems (2021). https:\/\/openreview.net\/forum?id=KBnXrODoBW"},{"key":"27_CR11","unstructured":"Liu, Y., et al.: RoBERTa: a robustly optimized BERT pretraining approach. CoRR abs\/1907.11692 (2019). http:\/\/arxiv.org\/abs\/1907.11692"},{"key":"27_CR12","unstructured":"Lu, J., Batra, D., Parikh, D., Lee, S.: ViLBERT: pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. In: Advances in Neural Information Processing Systems, vol. 32. Curran Associates, Inc. (2019). https:\/\/proceedings.neurips.cc\/paper\/2019\/hash\/c74d97b01eae257e44aa9d5bade97baf-Abstract.html"},{"key":"27_CR13","unstructured":"Qi, D., Su, L., Song, J., Cui, E., Bharti, T., Sacheti, A.: ImageBERT: cross-modal pre-training with large-scale weak-supervised image-text data. CoRR abs\/2001.07966 (2020)"},{"key":"27_CR14","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: Meila, M., Zhang, T. (eds.) Proceedings of the 38th International Conference on Machine Learning, ICML 2021, Virtual Event. Proceedings of Machine Learning Research, 18\u201324 July 2021, vol. 139, pp. 8748\u20138763. PMLR (2021)"},{"key":"27_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1007\/978-3-030-58598-3_10","volume-title":"Computer Vision \u2013 ECCV 2020","author":"MB Sariyildiz","year":"2020","unstructured":"Sariyildiz, M.B., Perez, J., Larlus, D.: Learning visual representations with caption annotations. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12353, pp. 153\u2013170. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58598-3_10"},{"issue":"1","key":"27_CR16","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/BF02289451","volume":"31","author":"P Sch\u00f6nemann","year":"1966","unstructured":"Sch\u00f6nemann, P.: A generalized solution of the orthogonal procrustes problem. Psychometrika 31(1), 1\u201310 (1966). https:\/\/doi.org\/10.1007\/BF02289451","journal-title":"Psychometrika"},{"key":"27_CR17","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. In: Bengio, Y., LeCun, Y. (eds.) 3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, 7\u20139 May 2015, Conference Track Proceedings (2015). http:\/\/arxiv.org\/abs\/1409.1556"},{"key":"27_CR18","unstructured":"Su, W., et al.: VL-BERT: pre-training of generic visual-linguistic representations. In: 8th International Conference on Learning Representations, ICLR 2020, Addis Ababa, Ethiopia, 26\u201330 April 2020. OpenReview.net (2020)"},{"key":"27_CR19","doi-asserted-by":"crossref","unstructured":"Sun, C., Shrivastava, A., Singh, S., Gupta, A.: Revisiting unreasonable effectiveness of data in deep learning era. CoRR abs\/1707.02968 (2017). http:\/\/arxiv.org\/abs\/1707.02968","DOI":"10.1109\/ICCV.2017.97"},{"key":"27_CR20","doi-asserted-by":"crossref","unstructured":"Tan, H., Bansal, M.: LXMERT: learning cross-modality encoder representations from transformers. In: Inui, K., Jiang, J., Ng, V., Wan, X. (eds.) Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing, EMNLP-IJCNLP 2019, Hong Kong, China, 3\u20137 November 2019, pp. 5099\u20135110. Association for Computational Linguistics (2019)","DOI":"10.18653\/v1\/D19-1514"},{"key":"27_CR21","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"27_CR22","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1162\/tacl_a_00166","volume":"2","author":"P Young","year":"2014","unstructured":"Young, P., Lai, A., Hodosh, M., Hockenmaier, J.: From image descriptions to visual denotations: new similarity metrics for semantic inference over event descriptions. Trans. Assoc. Comput. Linguist. 2, 67\u201378 (2014)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"27_CR23","unstructured":"Zhang, Y., Jiang, H., Miura, Y., Manning, C.D., Langlotz, C.P.: Contrastive learning of medical visual representations from paired images and text. CoRR abs\/2010.00747 (2020)"}],"container-title":["Lecture Notes in Computer Science","Advances in Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-28238-6_27","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,5]],"date-time":"2024-03-05T13:48:18Z","timestamp":1709646498000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-28238-6_27"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031282379","9783031282386"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-28238-6_27","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"17 March 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECIR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Information Retrieval","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Dublin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ireland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 April 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 April 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"45","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecir2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ecir2023.org\/index.html?v=1.0","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"489","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"77","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"83","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"16% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}