{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T14:51:14Z","timestamp":1780066274212,"version":"3.54.0"},"publisher-location":"Cham","reference-count":57,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031560262","type":"print"},{"value":"9783031560279","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-56027-9_26","type":"book-chapter","created":{"date-parts":[[2024,3,19]],"date-time":"2024-03-19T07:02:49Z","timestamp":1710831769000},"page":"421-438","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["Cross-Modal Retrieval for\u00a0Knowledge-Based Visual Question Answering"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0882-8684","authenticated-orcid":false,"given":"Paul","family":"Lerner","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0755-2361","authenticated-orcid":false,"given":"Olivier","family":"Ferret","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7249-8715","authenticated-orcid":false,"given":"Camille","family":"Guinaudeau","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,3,20]]},"reference":[{"key":"26_CR1","doi-asserted-by":"publisher","unstructured":"Adjali, O., Grimal, P., Ferret, O., Ghannay, S., Le Borgne, H.: Explicit knowledge integration for knowledge-aware visual question answering about named entities. In: Proceedings of the 2023 ACM International Conference on Multimedia Retrieval, pp. 29\u201338. ICMR \u201923, Association for Computing Machinery, New York, NY, USA (2023). https:\/\/doi.org\/10.1145\/3591106.3592227","DOI":"10.1145\/3591106.3592227"},{"key":"26_CR2","doi-asserted-by":"publisher","unstructured":"Alberts, H., et al.: VisualSem: a high-quality knowledge graph for vision and language. In: Proceedings of the 1st Workshop on Multilingual Representation Learning, pp. 138\u2013152. Association for Computational Linguistics, Punta Cana, Dominican Republic (Nov 2021). https:\/\/doi.org\/10.18653\/v1\/2021.mrl-1.13, https:\/\/aclanthology.org\/2021.mrl-1.13","DOI":"10.18653\/v1\/2021.mrl-1.13"},{"key":"26_CR3","doi-asserted-by":"publisher","unstructured":"Antol, S., Agrawal, A., Lu, J., Mitchell, M., Batra, D., Zitnick, C.L., Parikh, D.: VQA: Visual Question Answering. In: 2015 IEEE International Conference on Computer Vision (ICCV), pp. 2425\u20132433. IEEE, Santiago, Chile (Dec 2015). https:\/\/doi.org\/10.1109\/ICCV.2015.279, http:\/\/ieeexplore.ieee.org\/document\/7410636\/","DOI":"10.1109\/ICCV.2015.279"},{"key":"26_CR4","doi-asserted-by":"publisher","unstructured":"Baltru\u0161aitis, T., Ahuja, C., Morency, L.P.: Multimodal machine learning: a survey and taxonomy. IEEE Trans. Pattern Anal. Mach. Intell. 41(2), 423\u2013443 (2019). https:\/\/doi.org\/10.1109\/TPAMI.2018.2798607, conference Name: IEEE Transactions on Pattern Analysis and Machine Intelligence","DOI":"10.1109\/TPAMI.2018.2798607"},{"key":"26_CR5","doi-asserted-by":"publisher","unstructured":"Bassani, E.: ranx: a blazing-fast python library for ranking evaluation and comparison. In: Hagen, M., Verberne, S., Macdonald, C., Seifert, C., Balog, K., N\u00f8rv\u00e5g, K., Setty, V. (eds.) Advances in Information Retrieval, pp. 259\u2013264. Lecture Notes in Computer Science, Springer International Publishing, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-030-99739-7_30","DOI":"10.1007\/978-3-030-99739-7_30"},{"key":"26_CR6","doi-asserted-by":"crossref","unstructured":"Bokhari, M.U., Hasan, F.: Multimodal information retrieval: challenges and future trends. Int. J. Comput. Appl. 74(14) (2013), publisher: Foundation of Computer Science","DOI":"10.5120\/12951-9967"},{"key":"26_CR7","doi-asserted-by":"publisher","unstructured":"Bulian, J., Buck, C., Gajewski, W., B\u00f6rschinger, B., Schuster, T.: Tomayto, tomahto. beyond token-level answer equivalence for question answering evaluation. In: Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, pp. 291\u2013305. Association for Computational Linguistics, Abu Dhabi, United Arab Emirates (Dec 2022). https:\/\/doi.org\/10.18653\/v1\/2022.emnlp-main.20, https:\/\/aclanthology.org\/2022.emnlp-main.20","DOI":"10.18653\/v1\/2022.emnlp-main.20"},{"key":"26_CR8","doi-asserted-by":"publisher","unstructured":"Chen, Y., et al.: Can pre-trained vision and language models answer visual information-seeking questions? (Feb 2023). https:\/\/doi.org\/10.48550\/arXiv.2302.11713, http:\/\/arxiv.org\/abs\/2302.11713, arXiv:2302.11713 [cs]","DOI":"10.48550\/arXiv.2302.11713"},{"issue":"240","key":"26_CR9","first-page":"1","volume":"24","author":"A Chowdhery","year":"2023","unstructured":"Chowdhery, A., et al.: Palm: scaling language modeling with pathways. J. Mach. Learn. Res. 24(240), 1\u2013113 (2023)","journal-title":"J. Mach. Learn. Res."},{"key":"26_CR10","doi-asserted-by":"crossref","unstructured":"Couairon, G., Douze, M., Cord, M., Schwenk, H.: Embedding arithmetic of multimodal queries for image retrieval. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops, pp. 4950\u20134958 (June 2022)","DOI":"10.1109\/CVPRW56347.2022.00542"},{"key":"26_CR11","doi-asserted-by":"publisher","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255 (Jun 2009). https:\/\/doi.org\/10.1109\/CVPR.2009.5206848, iSSN: 1063-6919","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"26_CR12","doi-asserted-by":"crossref","unstructured":"Deng, J., Guo, J., Xue, N., Zafeiriou, S.: Arcface: additive angular margin loss for deep face recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (June 2019). https:\/\/openaccess.thecvf.com\/content_CVPR_2019\/html\/Deng_ArcFace_Additive_Angular_Margin_Loss_for_Deep_Face_Recognition_CVPR_2019_paper.html","DOI":"10.1109\/CVPR.2019.00482"},{"key":"26_CR13","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: Pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pp. 4171\u20134186. Association for Computational Linguistics, Minneapolis, Minnesota (Jun 2019). https:\/\/doi.org\/10.18653\/v1\/N19-1423, https:\/\/aclanthology.org\/N19-1423","DOI":"10.18653\/v1\/N19-1423"},{"key":"26_CR14","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. In: International Conference on Learning Representations (2021). https:\/\/openreview.net\/forum?id=YicbFdNTTy"},{"key":"26_CR15","unstructured":"Fisher, R.A.: The design of experiments. The design of experiments. (2nd Ed) (1937). https:\/\/www.cabdirect.org\/cabdirect\/abstract\/19371601600, publisher: Oliver & Boyd, Edinburgh & London"},{"key":"26_CR16","doi-asserted-by":"publisher","unstructured":"Gan, Z., Li, L., Li, C., Wang, L., Liu, Z., Gao, J.: Vision-language pre-training: basics, recent advances, and future trends. Found. Trends. Comput. Graph. Vis. 14(3\u20134), 163\u2013352 (dec 2022). https:\/\/doi.org\/10.1561\/0600000105","DOI":"10.1561\/0600000105"},{"key":"26_CR17","doi-asserted-by":"publisher","unstructured":"Garcia-Olano, D., Onoe, Y., Ghosh, J.: Improving and diagnosing knowledge-based visual question answering via entity enhanced knowledge injection. In: Companion Proceedings of the Web Conference 2022, pp. 705\u2013715. WWW \u201922, Association for Computing Machinery, New York, NY, USA (2022). https:\/\/doi.org\/10.1145\/3487553.3524648","DOI":"10.1145\/3487553.3524648"},{"key":"26_CR18","doi-asserted-by":"crossref","unstructured":"Gard\u00e8res, F., Ziaeefard, M.: ConceptBert: Concept-Aware Representation for Visual Question Answering. Findings of the Association for Computational Linguistics: EMNLP 2020, pp. 10 (2020). https:\/\/aclanthology.org\/2020.findings-emnlp.44\/","DOI":"10.18653\/v1\/2020.findings-emnlp.44"},{"key":"26_CR19","doi-asserted-by":"crossref","unstructured":"Gui, L., Wang, B., Huang, Q., Hauptmann, A., Bisk, Y., Gao, J.: KAT: a knowledge augmented transformer for vision-and-language. In: Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp. 956\u2013968. Association for Computational Linguistics, Seattle, United States (Jul 2022), https:\/\/aclanthology.org\/2022.naacl-main.70","DOI":"10.18653\/v1\/2022.naacl-main.70"},{"key":"26_CR20","doi-asserted-by":"publisher","unstructured":"Guo, W., Wang, J., Wang, S.: Deep multimodal representation learning: a survey. IEEE Access 7, 63373\u201363394 (2019). https:\/\/doi.org\/10.1109\/ACCESS.2019.2916887, conference Name: IEEE Access","DOI":"10.1109\/ACCESS.2019.2916887"},{"key":"26_CR21","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference On Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016), https:\/\/openaccess.thecvf.com\/content_cvpr_2016\/papers\/He_Deep_Residual_Learning_CVPR_2016_paper.pdf","DOI":"10.1109\/CVPR.2016.90"},{"key":"26_CR22","doi-asserted-by":"publisher","unstructured":"Heo, Y.J., Kim, E.S., Choi, W.S., Zhang, B.T.: Hypergraph Transformer: Weakly-supervised multi-hop reasoning for knowledge-based visual question answering. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 373\u2013390. Association for Computational Linguistics, Dublin, Ireland (May 2022). https:\/\/doi.org\/10.18653\/v1\/2022.acl-long.29, https:\/\/aclanthology.org\/2022.acl-long.29","DOI":"10.18653\/v1\/2022.acl-long.29"},{"key":"26_CR23","doi-asserted-by":"crossref","unstructured":"Hu, Y., Hua, H., Yang, Z., Shi, W., Smith, N.A., Luo, J.: Promptcap: prompt-guided task-aware image captioning (2023)","DOI":"10.1109\/ICCV51070.2023.00277"},{"key":"26_CR24","unstructured":"Hu, Z., et al.: AVIS: Autonomous Visual Information Seeking with Large Language Models (Jun 2023). http:\/\/arxiv.org\/abs\/2306.08129, arXiv:2306.08129 [cs]"},{"key":"26_CR25","doi-asserted-by":"publisher","unstructured":"Izacard, G., Grave, E.: leveraging passage retrieval with generative models for open domain question answering. In: Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume, pp. 874\u2013880. Association for Computational Linguistics, Online (Apr 2021). https:\/\/doi.org\/10.18653\/v1\/2021.eacl-main.74, https:\/\/aclanthology.org\/2021.eacl-main.74","DOI":"10.18653\/v1\/2021.eacl-main.74"},{"key":"26_CR26","doi-asserted-by":"publisher","unstructured":"Ji, Z., et al.: Survey of hallucination in natural language generation. ACM Comput. Surv. 55(12), 248:1\u2013248:38 (Mar 2023). https:\/\/doi.org\/10.1145\/3571730, https:\/\/dl.acm.org\/doi\/10.1145\/3571730","DOI":"10.1145\/3571730"},{"issue":"3","key":"26_CR27","doi-asserted-by":"publisher","first-page":"535","DOI":"10.1109\/TBDATA.2019.2921572","volume":"7","author":"J Johnson","year":"2019","unstructured":"Johnson, J., Douze, M., J\u00e9gou, H.: Billion-scale similarity search with GPUs. IEEE Trans. Big Data 7(3), 535\u2013547 (2019). https:\/\/doi.org\/10.1109\/TBDATA.2019.2921572","journal-title":"IEEE Trans. Big Data"},{"key":"26_CR28","doi-asserted-by":"crossref","unstructured":"Karpukhin, V., et al.: Dense passage retrieval for open-domain question answering. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP). pp. 6769\u20136781. Association for Computational Linguistics, Online (Nov 2020), https:\/\/www.aclweb.org\/anthology\/2020.emnlp-main.550","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"26_CR29","doi-asserted-by":"publisher","unstructured":"Khan, S., Naseer, M., Hayat, M., Zamir, S.W., Khan, F.S., Shah, M.: Transformers in vision: a survey. ACM Comput. Surv. 54(10s) (sep 2022). https:\/\/doi.org\/10.1145\/3505244","DOI":"10.1145\/3505244"},{"key":"26_CR30","doi-asserted-by":"publisher","unstructured":"Lerner, P., Ferret, O., Guinaudeau, C.: Multimodal inverse cloze task for knowledge-based visual question answering. In: Advances in Information Retrieval (ECIR 2023), pp. 569\u2013587. Springer Nature Switzerland, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-28244-7_36","DOI":"10.1007\/978-3-031-28244-7_36"},{"key":"26_CR31","doi-asserted-by":"publisher","unstructured":"Lerner, P., et al.: ViQuAE, a dataset for knowledge-based visual question answering about named entities. In: Proceedings of The 45th International ACM SIGIR Conference on Research and Development in Information Retrieval. SIGIR \u201922, Association for Computing Machinery, New York, NY, USA (2022). https:\/\/doi.org\/10.1145\/3477495.3531753, https:\/\/hal.archives-ouvertes.fr\/hal-03650618","DOI":"10.1145\/3477495.3531753"},{"key":"26_CR32","unstructured":"Lhoest, Q.,et al.: Datasets: a community library for natural language processing. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing: System Demonstrations. pp. 175\u2013184. Association for Computational Linguistics, Online and Punta Cana, Dominican Republic (Nov 2021), https:\/\/aclanthology.org\/2021.emnlp-demo.21"},{"key":"26_CR33","doi-asserted-by":"publisher","unstructured":"Li, L., et al.: M$$^3$$IT: A Large-Scale Dataset towards Multi-Modal Multilingual Instruction Tuning (Jun 2023). https:\/\/doi.org\/10.48550\/arXiv.2306.04387, http:\/\/arxiv.org\/abs\/2306.04387, arXiv:2306.04387 [cs]","DOI":"10.48550\/arXiv.2306.04387"},{"key":"26_CR34","unstructured":"Lin, C.Y.: Rouge: a package for automatic evaluation of summaries. In: Text Summarization Branches Out, pp. 74\u201381 (2004)"},{"key":"26_CR35","unstructured":"Liu, Z., Xiong, C., Lv, Y., Liu, Z., Yu, G.: Universal vision-language dense retrieval: learning a unified representation space for multi-modal retrieval. In: The Eleventh International Conference on Learning Representations (2023). https:\/\/openreview.net\/forum?id=PQOlkgsBsik"},{"key":"26_CR36","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: International Conference on Learning Representations (2019), https:\/\/openreview.net\/forum?id=Bkg6RiCqY7"},{"key":"26_CR37","doi-asserted-by":"crossref","unstructured":"Marino, K., Rastegari, M., Farhadi, A., Mottaghi, R.: OK-VQA: a visual question answering benchmark requiring external knowledge. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3195\u20133204 (2019), https:\/\/ieeexplore.ieee.org\/document\/8953725\/","DOI":"10.1109\/CVPR.2019.00331"},{"key":"26_CR38","doi-asserted-by":"crossref","unstructured":"Mensink, T., et al.: Encyclopedic vqa: Visual questions about detailed properties of fine-grained categories. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 3113\u20133124 (October 2023)","DOI":"10.1109\/ICCV51070.2023.00289"},{"key":"26_CR39","unstructured":"Paszke, A., et al.: PyTorch: an imperative style, high-performance deep learning library. In: Advances in Neural Information Processing Systems 32 (2019). https:\/\/papers.nips.cc\/paper\/2019\/hash\/bdbca288fee7f92f2bfa9f7012727740-Abstract.html"},{"key":"26_CR40","doi-asserted-by":"crossref","unstructured":"Pezeshkpour, P., Chen, L., Singh, S.: Embedding multimodal relational data for knowledge base completion. In: Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pp. 3208\u20133218 (2018)","DOI":"10.18653\/v1\/D18-1359"},{"key":"26_CR41","unstructured":"Radford, A., , et al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"26_CR42","doi-asserted-by":"publisher","first-page":"146","DOI":"10.1007\/978-3-031-20074-8_9","volume-title":"Computer Vision \u2013 ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part VIII","author":"D Schwenk","year":"2022","unstructured":"Schwenk, D., Khandelwal, A., Clark, C., Marino, K., Mottaghi, R.: A-OKVQA: a benchmark for\u00a0visual question answering using world knowledge. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) Computer Vision \u2013 ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part VIII, pp. 146\u2013162. Springer Nature Switzerland, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20074-8_9"},{"key":"26_CR43","doi-asserted-by":"crossref","unstructured":"Shah, S., Mishra, A., Yadati, N., Talukdar, P.P.: KVQA: knowledge-aware visual question answering. In: Proceedings of the AAAI Conference on Artificial Intelligence. 33, pp. 8876\u20138884, 2019. https:\/\/144.208.67.177\/ojs\/index.php\/AAAI\/article\/view\/4915","DOI":"10.1609\/aaai.v33i01.33018876"},{"key":"26_CR44","doi-asserted-by":"publisher","unstructured":"Smucker, M.D., Allan, J., Carterette, B.: A comparison of statistical significance tests for information retrieval evaluation. In: Proceedings of the sixteenth ACM conference on Conference on information and knowledge management, pp. 623\u2013632. CIKM \u201907, Association for Computing Machinery, New York, NY, USA (Nov 2007). https:\/\/doi.org\/10.1145\/1321440.1321528","DOI":"10.1145\/1321440.1321528"},{"key":"26_CR45","doi-asserted-by":"publisher","unstructured":"Srinivasan, K., Raman, K., Chen, J., Bendersky, M., Najork, M.: Wit: Wikipedia-based image text dataset for multimodal multilingual machine learning. In: Proceedings of the 44th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 2443\u20132449. SIGIR \u201921, Association for Computing Machinery, New York, NY, USA (2021). https:\/\/doi.org\/10.1145\/3404835.3463257","DOI":"10.1145\/3404835.3463257"},{"key":"26_CR46","doi-asserted-by":"publisher","unstructured":"Sun, W., Fan, Y., Guo, J., Zhang, R., Cheng, X.: Visual named entity linking: a new dataset and a baseline. In: Findings of the Association for Computational Linguistics: EMNLP 2022. pp. 2403\u20132415. Association for Computational Linguistics, Abu Dhabi, United Arab Emirates (Dec 2022). https:\/\/doi.org\/10.18653\/v1\/2022.findings-emnlp.178, https:\/\/aclanthology.org\/2022.findings-emnlp.178","DOI":"10.18653\/v1\/2022.findings-emnlp.178"},{"key":"26_CR47","doi-asserted-by":"crossref","unstructured":"Van Horn, G., et al.: The iNaturalist species classification and detection dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 8769\u20138778 (2018). https:\/\/openaccess.thecvf.com\/content_cvpr_2018\/html\/Van_Horn_The_INaturalist_Species_CVPR_2018_paper.html","DOI":"10.1109\/CVPR.2018.00914"},{"key":"26_CR48","doi-asserted-by":"publisher","unstructured":"Vickers, P., Aletras, N., Monti, E., Barrault, L.. In: Factuality: efficient integration of relevant facts for visual question answering. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 2: Short Papers), pp. 468\u2013475. Association for Computational Linguistics, Online (Aug 2021). https:\/\/doi.org\/10.18653\/v1\/2021.acl-short.60, https:\/\/aclanthology.org\/2021.acl-short.60","DOI":"10.18653\/v1\/2021.acl-short.60"},{"key":"26_CR49","doi-asserted-by":"publisher","unstructured":"Wang, Z., Ng, P., Ma, X., Nallapati, R., Xiang, B.: Multi-passage BERT: a globally normalized bert model for open-domain question answering. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pp. 5878\u20135882. Association for Computational Linguistics, Hong Kong, China (Nov 2019). https:\/\/doi.org\/10.18653\/v1\/D19-1599, https:\/\/www.aclweb.org\/anthology\/D19-1599","DOI":"10.18653\/v1\/D19-1599"},{"key":"26_CR50","doi-asserted-by":"publisher","unstructured":"Weston, J., Chopra, S., Bordes, A.: Memory networks (2014). https:\/\/doi.org\/10.48550\/ARXIV.1410.3916, https:\/\/arxiv.org\/abs\/1410.3916","DOI":"10.48550\/ARXIV.1410.3916"},{"key":"26_CR51","doi-asserted-by":"crossref","unstructured":"Weyand, T., Araujo, A., Cao, B., Sim, J.: Google landmarks dataset v2 - A large-scale benchmark for instance-level recognition and retrieval. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2575\u20132584 (2020), https:\/\/openaccess.thecvf.com\/content_CVPR_2020\/html\/Weyand_Google_Landmarks_Dataset_v2_-_A_Large-Scale_Benchmark_for_Instance-Level_CVPR_2020_paper.html","DOI":"10.1109\/CVPR42600.2020.00265"},{"key":"26_CR52","unstructured":"Wilcke, W.X., Bloem, P., de Boer, V., Veer, R.H.v.t., van Harmelen, F.A.H.: End-to-End Entity Classification on Multimodal Knowledge Graphs. arXiv:2003.12383 [cs] (Mar 2020). http:\/\/arxiv.org\/abs\/2003.12383, arXiv: 2003.12383"},{"key":"26_CR53","unstructured":"Wolf, T., et al.: HuggingFace\u2019s Transformers: State-of-the-art Natural Language Processing. arXiv:1910.03771 [cs] (Jul 2020), http:\/\/arxiv.org\/abs\/1910.03771"},{"key":"26_CR54","doi-asserted-by":"crossref","unstructured":"Xie, R., Liu, Z., Luan, H., Sun, M.: Image-embodied knowledge representation learning. In: Proceedings of the 26th International Joint Conference on Artificial Intelligence, pp. 3140\u20133146. IJCAI\u201917, AAAI Press, Melbourne, Australia (Aug 2017)","DOI":"10.24963\/ijcai.2017\/438"},{"key":"26_CR55","doi-asserted-by":"publisher","unstructured":"Xu, J., Croft, W.B.: Query expansion using local and global document analysis. In: Proceedings of the 19th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 4\u201311. SIGIR \u201996, Association for Computing Machinery, New York, NY, USA (1996). https:\/\/doi.org\/10.1145\/243199.243202","DOI":"10.1145\/243199.243202"},{"key":"26_CR56","doi-asserted-by":"publisher","unstructured":"Zamani, H., Diaz, F., Dehghani, M., Metzler, D., Bendersky, M.: Retrieval-enhanced machine learning. In: Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 2875\u20132886. SIGIR \u201922, Association for Computing Machinery, New York, NY, USA (Jul 2022). https:\/\/doi.org\/10.1145\/3477495.3531722","DOI":"10.1145\/3477495.3531722"},{"key":"26_CR57","doi-asserted-by":"crossref","unstructured":"Zhang, D., Cao, R., Wu, S.: Information fusion in visual question answering: a survey. Information Fusion 52, 268\u2013280 (2019). https:\/\/www.sciencedirect.com\/science\/article\/pii\/S1566253518308893","DOI":"10.1016\/j.inffus.2019.03.005"}],"container-title":["Lecture Notes in Computer Science","Advances in Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-56027-9_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,19]],"date-time":"2024-03-19T07:12:22Z","timestamp":1710832342000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-56027-9_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031560262","9783031560279"],"references-count":57,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-56027-9_26","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"20 March 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECIR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Information Retrieval","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 March 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 March 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecir2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.ecir2024.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"578","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"110","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"69","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"19% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"31 (Tracks: Workshop, Tutorial, Industry, Doctoral Consortium)","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}