{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,22]],"date-time":"2025-11-22T11:31:42Z","timestamp":1763811102318,"version":"3.40.3"},"publisher-location":"Cham","reference-count":35,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031306747"},{"type":"electronic","value":"9783031306754"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-30675-4_45","type":"book-chapter","created":{"date-parts":[[2023,4,14]],"date-time":"2023-04-14T10:02:24Z","timestamp":1681466544000},"page":"607-622","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Multimodal Entity Linking with\u00a0Mixed Fusion Mechanism"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8342-5834","authenticated-orcid":false,"given":"Gongrui","family":"Zhang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3583-3569","authenticated-orcid":false,"given":"Chenghuan","family":"Jiang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0821-8330","authenticated-orcid":false,"given":"Zhongheng","family":"Guan","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8782-857X","authenticated-orcid":false,"given":"Peng","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,4,15]]},"reference":[{"key":"45_CR1","doi-asserted-by":"crossref","unstructured":"Adjali, O., Besan\u00e7on, R., Ferret, O., et al.: Multimodal entity linking for tweets. In: ECIR (2020)","DOI":"10.1007\/978-3-030-45439-5_31"},{"key":"45_CR2","doi-asserted-by":"crossref","unstructured":"Blanco, R., Ottaviano, G., Meij, E.: Fast and space-efficient entity linking for queries. In: WSDM (2015)","DOI":"10.1145\/2684822.2685317"},{"key":"45_CR3","unstructured":"Bunescu, R., Pasca, M.: Using encyclopedic knowledge for named entity disambiguation. In: EACL (2006)"},{"key":"45_CR4","unstructured":"Chen, T., Kornblith, S., Norouzi, M., et al.: A simple framework for contrastive learning of visual representations. In: ICML (2020)"},{"key":"45_CR5","doi-asserted-by":"crossref","unstructured":"Cho, K., van Merri\u00ebnboer, B., Gulcehre, C., et al.: Learning phrase representations using RNN encoder-decoder for statistical machine translation. In: EMNLP (2014)","DOI":"10.3115\/v1\/D14-1179"},{"key":"45_CR6","unstructured":"Cucerzan, S.: Large-scale named entity disambiguation based on Wikipedia data. In: EMNLP-CoNLL (2007)"},{"key":"45_CR7","doi-asserted-by":"crossref","unstructured":"Daher, H., Besan\u00e7on, R., Ferret, O., et al.: Supervised learning of entity disambiguation models by negative sample selection. In: CICLing (2017)","DOI":"10.1007\/978-3-319-77113-7_26"},{"key":"45_CR8","unstructured":"Devlin, J., Chang, M.W., Lee, K., et al.: BERT: pre-training of deep bidirectional transformers for language understanding. In: NAACL (2019)"},{"key":"45_CR9","doi-asserted-by":"crossref","unstructured":"Dolmans, T.C., Poel, M., van \u2019t Klooster, J.W.J., et al.: Perceived mental workload classification using intermediate fusion multimodal deep learning. Front. Hum. Neurosci. 14 (2021)","DOI":"10.3389\/fnhum.2020.609096"},{"key":"45_CR10","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. In: ICLR (2021)"},{"key":"45_CR11","doi-asserted-by":"crossref","unstructured":"Dzogang, F., Lesot, M.J., Rifqi, M., et al.: Early fusion of low level features for emotion mining. Biomed. Inform. Insights 5, BII-S8973 (2012)","DOI":"10.4137\/BII.S8973"},{"key":"45_CR12","doi-asserted-by":"crossref","unstructured":"Eshel, Y., Cohen, N., Radinsky, K., et al.: Named entity disambiguation for noisy text. In: CoNLL (2017)","DOI":"10.18653\/v1\/K17-1008"},{"key":"45_CR13","doi-asserted-by":"crossref","unstructured":"Gan, J., Luo, J., Wang, H., et al.: Multimodal entity linking: a new dataset and a baseline. In: MM (2021)","DOI":"10.1145\/3474085.3475400"},{"key":"45_CR14","doi-asserted-by":"crossref","unstructured":"Globerson, A., Lazic, N., Chakrabarti, S., et al.: Collective entity resolution with multi-focal attention. In: ACL (2016)","DOI":"10.18653\/v1\/P16-1059"},{"key":"45_CR15","doi-asserted-by":"crossref","unstructured":"Guo, Z., Barbosa, D.: Entity linking with a unified semantic representation. In: WWW (2014)","DOI":"10.1145\/2567948.2579705"},{"key":"45_CR16","unstructured":"Hoffart, J., Yosef, M.A., Bordino, I., et al.: Robust disambiguation of named entities in text. In: EMNLP (2011)"},{"key":"45_CR17","doi-asserted-by":"crossref","unstructured":"Khan, S., Naseer, M., Hayat, M., et al.: Transformers in vision: a survey. ACM Comput. Surv. 54(10s) (2022)","DOI":"10.1145\/3505244"},{"key":"45_CR18","unstructured":"Kim, W., Son, B., Kim, I.: Vilt: vision-and-language transformer without convolution or region supervision. In: ICML (2021)"},{"key":"45_CR19","doi-asserted-by":"crossref","unstructured":"Lemnaru, C., Potolea, R.: Imbalanced classification problems: systematic study, issues and best practices. In: ICEIS (2012)","DOI":"10.1007\/978-3-642-29958-2_3"},{"key":"45_CR20","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: ICLR (2019)"},{"key":"45_CR21","unstructured":"Lu, J., Yang, J., Batra, D., et al.: Hierarchical question-image co-attention for visual question answering. In: NIPS (2016)"},{"key":"45_CR22","doi-asserted-by":"crossref","unstructured":"Moon, S., Neves, L., Carvalho, V.: Multimodal named entity disambiguation for noisy social media posts. In: ACL (2018)","DOI":"10.18653\/v1\/P18-1186"},{"key":"45_CR23","unstructured":"Nagrani, A., Yang, S., Arnab, A., et al.: Attention bottlenecks for multimodal fusion. In: NIPS (2021)"},{"key":"45_CR24","unstructured":"Radford, A., Kim, J.W., Hallacy, C., et al.: Learning transferable visual models from natural language supervision. In: ICML (2021)"},{"issue":"2","key":"45_CR25","doi-asserted-by":"publisher","first-page":"443","DOI":"10.1109\/TKDE.2014.2327028","volume":"27","author":"W Shen","year":"2015","unstructured":"Shen, W., Wang, J., Han, J.: Entity linking with a knowledge base: issues, techniques, and solutions. IEEE Trans. Knowl. Data Eng. 27(2), 443\u2013460 (2015)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"45_CR26","doi-asserted-by":"crossref","unstructured":"Singh, A., Hu, R., Goswami, V., et al.: Flava: a foundational language and vision alignment model. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01519"},{"key":"45_CR27","doi-asserted-by":"crossref","unstructured":"Snoek, C.G.M., Worring, M., Smeulders, A.W.M.: Early versus late fusion in semantic video analysis. In: MM (2005)","DOI":"10.1145\/1101149.1101236"},{"key":"45_CR28","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., et al.: Attention is all you need. In: NIPS (2017)"},{"key":"45_CR29","doi-asserted-by":"crossref","unstructured":"Wang, P., Wu, J., Chen, X.: Multimodal entity linking with gated hierarchical fusion and contrastive training. In: SIGIR (2022)","DOI":"10.1145\/3477495.3531867"},{"key":"45_CR30","doi-asserted-by":"crossref","unstructured":"Wang, Y., Shen, Y., Liu, Z., et al.: Words can shift: dynamically adjusting word representations using nonverbal behaviors. In: AAAI (2019)","DOI":"10.1609\/aaai.v33i01.33017216"},{"key":"45_CR31","doi-asserted-by":"crossref","unstructured":"Wolf, T., Debut, L., Sanh, V., et al.: Transformers: state-of-the-art natural language processing. In: EMNLP (2020)","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"45_CR32","doi-asserted-by":"crossref","unstructured":"Wu, L., Petroni, F., Josifoski, M., et al.: Scalable zero-shot entity linking with dense entity retrieval. In: EMNLP (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.519"},{"key":"45_CR33","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1162\/tacl_a_00166","volume":"2","author":"P Young","year":"2014","unstructured":"Young, P., Lai, A., Hodosh, M., et al.: From image descriptions to visual denotations: new similarity metrics for semantic inference over event descriptions. TACL 2, 67\u201378 (2014)","journal-title":"TACL"},{"key":"45_CR34","doi-asserted-by":"crossref","unstructured":"Zhang, L., Li, Z., Yang, Q.: Attention-based multimodal entity linking with high-quality images. In: DASFAA (2021)","DOI":"10.1007\/978-3-030-73197-7_35"},{"key":"45_CR35","doi-asserted-by":"crossref","unstructured":"Zhou, X., Wang, P., Li, G., et al.: Weibo-mel, Wikidata-mel and Richpedia-mel: multimodal entity linking benchmark datasets. In: CCKS (2021)","DOI":"10.1007\/978-981-16-6471-7_27"}],"container-title":["Lecture Notes in Computer Science","Database Systems for Advanced Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-30675-4_45","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T12:17:07Z","timestamp":1710245827000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-30675-4_45"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031306747","9783031306754"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-30675-4_45","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"15 April 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DASFAA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Database Systems for Advanced Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tianjin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 April 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 April 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dasfaa2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.tjudb.cn\/dasfaa2023\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"652","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"125","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"66","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"19% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7.3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}