{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T20:49:40Z","timestamp":1771620580441,"version":"3.50.1"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2025,11,19]],"date-time":"2025-11-19T00:00:00Z","timestamp":1763510400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,11,19]],"date-time":"2025-11-19T00:00:00Z","timestamp":1763510400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J. King Saud Univ. Comput. Inf. Sci."],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s44443-025-00355-1","type":"journal-article","created":{"date-parts":[[2025,11,19]],"date-time":"2025-11-19T10:20:31Z","timestamp":1763547631000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["TS-SLink: a two-stage span linking network for joint multimodal entity-relation extraction"],"prefix":"10.1007","volume":"37","author":[{"given":"Jinkang","family":"Zheng","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-6410-2627","authenticated-orcid":false,"given":"Yahui","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guozhe","family":"Jin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhenguo","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rongyi","family":"Cui","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,11,19]]},"reference":[{"key":"355_CR1","first-page":"32897","volume":"35","author":"H Bao","year":"2022","unstructured":"Bao H, Wang W, Dong L, Liu Q, Mohammed OK, Aggarwal K, Som S, Piao S, Wei F (2022) Vlmo: Unified vision-language pre-training with mixture-of-modality-experts. Adv Neural Inf Process Syst 35:32897\u201332912","journal-title":"Adv Neural Inf Process Syst"},{"key":"355_CR2","doi-asserted-by":"crossref","unstructured":"Chen Z, Zhang Y, Fang Y, Geng Y, Guo L, Chen X, Li Q, Zhang W, Chen J, Zhu Y et al (2024) Knowledge graphs meet multi-modal learning: A comprehensive survey. arXiv:2402.05391","DOI":"10.2139\/ssrn.5044404"},{"key":"355_CR3","doi-asserted-by":"crossref","unstructured":"Chen X, Zhang N, Li L, Yao Y, Deng S, Tan C, Huang F, Si L, Chen H (2022) Good visual guidance makes a better extractor: Hierarchical visual prefix for multimodal entity and relation extraction. arXiv:2205.03521","DOI":"10.18653\/v1\/2022.findings-naacl.121"},{"key":"355_CR4","doi-asserted-by":"crossref","unstructured":"Devlin J, Chang M-W, Lee K, Toutanova K (2019) Bert: Pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies, Volume 1 (long and Short Papers), pp 4171\u20134186","DOI":"10.18653\/v1\/N19-1423"},{"key":"355_CR5","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S et al (2020) An image is worth 16x16 words: Transformers for image recognition at scale. arXiv:2010.11929"},{"key":"355_CR6","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2024.125608","volume":"262","author":"Y Gong","year":"2025","unstructured":"Gong Y, Lv X, Yuan Z, Hu F, Cai Z, Chen Y, Wang Z, You X (2025) Ce-dcvsi: Multimodal relational extraction based on collaborative enhancement of dual-channel visual semantic information. Expert Syst Appl 262:125608","journal-title":"Expert Syst Appl"},{"issue":"3","key":"355_CR7","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2024.104033","volume":"62","author":"S Huang","year":"2025","unstructured":"Huang S, Cai Y, Yuan L, Wang J (2025) A knowledge-enhanced network for joint multimodal entity-relation extraction. Inf Process Manag 62(3):104033","journal-title":"Inf Process Manag"},{"key":"355_CR8","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2024.126130","volume":"266","author":"W Jia","year":"2025","unstructured":"Jia W, Ma R, Yan L, Niu W, Ma Z (2025) Joint entity and relation extraction with table filling based on graph convolutional networks. Expert Syst Appl 266:126130","journal-title":"Expert Syst Appl"},{"issue":"5","key":"355_CR9","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1007\/s10462-025-11122-z","volume":"58","author":"SA Jolfaei","year":"2025","unstructured":"Jolfaei SA, Mohebi A (2025) A review on persian question answering systems: from traditional to modern approaches. Artif Intell Rev 58(5):127","journal-title":"Artif Intell Rev"},{"key":"355_CR10","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2024.101749","volume":"90","author":"Y Kong","year":"2025","unstructured":"Kong Y, Yang Z, Ding Z, Liu W, Zhang S, Xu J, Lin H (2025) Tr-net: Token relation inspired table filling network for joint entity and relation extraction. Comput Speech Language 90:101749","journal-title":"Comput Speech Language"},{"key":"355_CR11","first-page":"9694","volume":"34","author":"J Li","year":"2021","unstructured":"Li J, Selvaraju R, Gotmare A, Joty S, Xiong C, Hoi SCH (2021) Align before fuse: Vision and language representation learning with momentum distillation. Adv Neural Inf Process Syst 34:9694\u20139705","journal-title":"Adv Neural Inf Process Syst"},{"key":"355_CR12","doi-asserted-by":"publisher","first-page":"16254","DOI":"10.1609\/aaai.v37i13.26987","volume":"37","author":"L Li","year":"2023","unstructured":"Li L, Chen X, Qiao S, Xiong F, Chen H, Zhang N (2023) On analyzing the role of image for visual-enhanced relation extraction (student abstract). Proceedings of the AAAI conference on artificial intelligence 37:16254\u201316255","journal-title":"Proceedings of the AAAI conference on artificial intelligence"},{"key":"355_CR13","first-page":"17612","volume":"35","author":"VW Liang","year":"2022","unstructured":"Liang VW, Zhang Y, Kwon Y, Yeung S, Zou JY (2022) Mind the gap: Understanding the modality gap in multi-modal contrastive representation learning. Adv Neural Inf Process Syst 35:17612\u201317625","journal-title":"Adv Neural Inf Process Syst"},{"issue":"4","key":"355_CR14","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1023\/B:BTTJ.0000047600.45421.6d","volume":"22","author":"H Liu","year":"2004","unstructured":"Liu H, Singh P (2004) Conceptnet\u2013a practical commonsense reasoning tool-kit. BT Technol J 22(4):211\u2013226","journal-title":"BT Technol J"},{"key":"355_CR15","first-page":"4283","volume":"2024","author":"X Liu","year":"2024","unstructured":"Liu X, Hu C, Zhang R, Sun K, Mensah S, Mao Y (2024) Multimodal relation extraction via a mixture of hierarchical visual context learners. Proceedings of the ACM Web conference 2024:4283\u20134294","journal-title":"Proceedings of the ACM Web conference"},{"key":"355_CR16","doi-asserted-by":"crossref","unstructured":"Liu Y, Zhang K, Tong R, Cai C, Chen D, Wu X (2025) A two-stage boundary-enhanced contrastive learning approach for nested named entity recognition. Expert Syst Appl 126707","DOI":"10.1016\/j.eswa.2025.126707"},{"key":"355_CR17","doi-asserted-by":"crossref","unstructured":"Lopez I, Swaminathan A, Vedula K, Narayanan S, Nateghi\u00a0Haredasht F, Ma SP, Liang AS, Tate S, Maddali M, Gallo RJ et al (2025) Clinical entity augmented retrieval for clinical information extraction. npj Digital Med 8(1):45","DOI":"10.1038\/s41746-024-01377-1"},{"key":"355_CR18","doi-asserted-by":"crossref","unstructured":"Luo W, Xia Y, Tianshu S, Li S (2024) Shapley value-based contrastive alignment for multimodal information extraction. In: Proceedings of the 32nd ACM international conference on multimedia, pp 5270\u20135279","DOI":"10.1145\/3664647.3681367"},{"key":"355_CR19","doi-asserted-by":"crossref","unstructured":"Manning CD, Surdeanu M, Bauer J, Finkel JR, Bethard S, McClosky D (2014) The stanford corenlp natural language processing toolkit. In: Proceedings of 52nd annual meeting of the association for computational linguistics: system demonstrations, pp 55\u201360","DOI":"10.3115\/v1\/P14-5010"},{"key":"355_CR20","unstructured":"Radford A, Kim J.W, Hallacy C, Ramesh A, Goh G, Agarwal S, Sastry G, Askell A, Mishkin P, Clark J et al (2021) Learning transferable visual models from natural language supervision. In: International conference on machine learning, PmLR, pp 8748\u20138763"},{"key":"355_CR21","unstructured":"Ren S, He K, Girshick R, Sun J (2015) Faster r-cnn: Towards real-time object detection with region proposal networks. Adv Neural Inf Process Syst 28"},{"key":"355_CR22","doi-asserted-by":"publisher","first-page":"11285","DOI":"10.1609\/aaai.v36i10.21379","volume":"36","author":"Y-M Shang","year":"2022","unstructured":"Shang Y-M, Huang H, Mao X (2022) Onerel: Joint entity and relation extraction with one module in one step. Proceedings of the AAAI Conference on Artificial Intelligence 36:11285\u201311293","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"355_CR23","doi-asserted-by":"crossref","unstructured":"Srija C, Padmaja B, Smrithi Y, Karthikeya M (2024) Vit-gpt2: Vision transformer based automatic image captioning. In: International advanced computing conference, Springer, pp 20\u201332","DOI":"10.1007\/978-3-031-84602-1_2"},{"key":"355_CR24","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2024.128726","volume":"613","author":"L Sun","year":"2025","unstructured":"Sun L, Zhang P, Gao F, An Y, Li Z, Zhao Y (2025) Sf-gpt: A training-free method to enhance capabilities for knowledge graph construction in llms. Neurocomputing 613:128726","journal-title":"Neurocomputing"},{"key":"355_CR25","unstructured":"Swarup A, Pan T, Wilson R, Bhandarkar A, Woodard D (2025) Llm4re: A data-centric feasibility study for relation extraction. In: Proceedings of the 31st international conference on computational linguistics, pp 6670\u20136691"},{"key":"355_CR26","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.112504","volume":"304","author":"G Wang","year":"2024","unstructured":"Wang G, Liu J, Xie J, Zhu Z, Zhou F (2024) Joint multimodal entity-relation extraction based on temporal enhancement and similarity-gated attention. Knowl-Based Syst 304:112504","journal-title":"Knowl-Based Syst"},{"key":"355_CR27","doi-asserted-by":"crossref","unstructured":"Wang Y, Yu B, Zhang Y, Liu T, Zhu H, Sun L (2020) Tplinker: Single-stage joint extraction of entities and relations through token pair linking. arXiv:2010.13415","DOI":"10.18653\/v1\/2020.coling-main.138"},{"key":"355_CR28","doi-asserted-by":"crossref","unstructured":"Wu S, Fei H, Cao Y, Bing L, Chua T-S (2023) Information screening whilst exploiting! multimodal relation extraction with feature denoising and multimodal topic modeling. arXiv:2305.11719","DOI":"10.18653\/v1\/2023.acl-long.823"},{"key":"355_CR29","doi-asserted-by":"crossref","unstructured":"Wu Z, Zheng C, Cai Y, Chen J, Leung H-f, Li Q (2020) Multimodal representation with embedded visual guiding objects for named entity recognition in social media posts. In: Proceedings of the 28th ACM international conference on multimedia, pp 1038\u20131046","DOI":"10.1145\/3394171.3413650"},{"key":"355_CR30","doi-asserted-by":"crossref","unstructured":"Yang X, Gong X, Tang B, Lei Y, Deng Y, Ouyang H, Zhao G, Luo L, Feng Y, Duan B et al (2024) Cag: A consistency-adaptive text-image alignment generation for joint multimodal entity-relation extraction. In: Proceedings of the 33rd ACM international conference on information and knowledge management, pp 4183\u20134187","DOI":"10.1145\/3627673.3679883"},{"key":"355_CR31","doi-asserted-by":"crossref","unstructured":"Yang Z, Gong B, Wang L, Huang W, Yu D, Luo J (2019) A fast and accurate one-stage approach to visual grounding. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 4683\u20134693","DOI":"10.1109\/ICCV.2019.00478"},{"key":"355_CR32","unstructured":"Ye J, Chen X, Xu N, Zu C, Shao Z, Liu S, Cui Y, Zhou Z, Gong C, Shen Y et al (2023) A comprehensive capability analysis of gpt-3 and gpt-3.5 series models. arXiv:2303.10420"},{"key":"355_CR33","doi-asserted-by":"publisher","first-page":"11051","DOI":"10.1609\/aaai.v37i9.26309","volume":"37","author":"L Yuan","year":"2023","unstructured":"Yuan L, Cai Y, Wang J, Li Q (2023) Joint multimodal entity-relation extraction based on edge-enhanced graph alignment network and word-pair relation tagging. Proceedings of the AAAI conference on artificial intelligence 37:11051\u201311059","journal-title":"Proceedings of the AAAI conference on artificial intelligence"},{"key":"355_CR34","first-page":"469","volume":"2024","author":"H Yuan","year":"2024","unstructured":"Yuan H, Sun Q, Fu X, Ji C, Li J (2024) Dynamic graph information bottleneck. Proceedings of the ACM web conference 2024:469\u2013480","journal-title":"Proceedings of the ACM web conference"},{"key":"355_CR35","doi-asserted-by":"crossref","unstructured":"Yuan L, Cai Y, Xu J, Li Q, Wang T (2024) A fine-grained network for joint multimodal entity-relation extraction. IEEE Trans Knowl Data Eng","DOI":"10.1109\/TKDE.2024.3485107"},{"key":"355_CR36","doi-asserted-by":"crossref","unstructured":"Yu J, Jiang J, Yang L, Xia R (2020) Improving multimodal named entity recognition via entity span detection with unified multimodal transformer. Association for Computational Linguistics","DOI":"10.18653\/v1\/2020.acl-main.306"},{"key":"355_CR37","doi-asserted-by":"publisher","first-page":"14347","DOI":"10.1609\/aaai.v35i16.17687","volume":"35","author":"D Zhang","year":"2021","unstructured":"Zhang D, Wei S, Li S, Wu H, Zhu Q, Zhou G (2021) Multi-modal graph fusion for named entity recognition with targeted visual guidance. Proceedings of the AAAI conference on artificial intelligence 35:14347\u201314355","journal-title":"Proceedings of the AAAI conference on artificial intelligence"},{"key":"355_CR38","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2024.121787","volume":"699","author":"Y Zhang","year":"2025","unstructured":"Zhang Y, Li G, Gao H, Dang D (2025) Multi-scale interaction network for multimodal entity and relation extraction. Inf Sci 699:121787","journal-title":"Inf Sci"},{"key":"355_CR39","doi-asserted-by":"crossref","unstructured":"Zhang Q, Fu J, Liu X, Huang X (2018) Adaptive co-attention network for named entity recognition in tweets. In: Proceedings of the AAAI conference on artificial intelligence, vol 32","DOI":"10.1609\/aaai.v32i1.11962"},{"key":"355_CR40","doi-asserted-by":"crossref","unstructured":"Zhang X, Yuan J, Li L, Liu J (2023) Reducing the bias of visual objects in multimodal named entity recognition. In: Proceedings of the Sixteenth ACM international conference on web search and data mining, pp 958\u2013966","DOI":"10.1145\/3539597.3570485"},{"issue":"3","key":"355_CR41","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2023.103264","volume":"60","author":"Q Zhao","year":"2023","unstructured":"Zhao Q, Gao T, Guo N (2023) Tsvfn: Two-stage visual fusion network for multimodal relation extraction. Inf Process Manag 60(3):103264","journal-title":"Inf Process Manag"},{"key":"355_CR42","doi-asserted-by":"crossref","unstructured":"Zheng C, Wu Z, Wang T, Cai Y, Li Q (2020) Object-aware multimodal named entity recognition in social media posts with adversarial learning. IEEE Trans Multimed 23:2520\u20132532","DOI":"10.1109\/TMM.2020.3013398"},{"key":"355_CR43","doi-asserted-by":"crossref","unstructured":"Zheng C, Feng J, Fu Z, Cai Y, Li Q, Wang T (2021) Multimodal relation extraction with efficient graph alignment. In: Proceedings of the 29th ACM international conference on multimedia, pp 5298\u20135306","DOI":"10.1145\/3474085.3476968"}],"container-title":["Journal of King Saud University Computer and Information Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s44443-025-00355-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s44443-025-00355-1","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s44443-025-00355-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,5]],"date-time":"2026-01-05T18:47:39Z","timestamp":1767638859000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s44443-025-00355-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,19]]},"references-count":43,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["355"],"URL":"https:\/\/doi.org\/10.1007\/s44443-025-00355-1","relation":{},"ISSN":["1319-1578","2213-1248"],"issn-type":[{"value":"1319-1578","type":"print"},{"value":"2213-1248","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,19]]},"assertion":[{"value":"24 July 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 October 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 November 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no relevant financial or non-financial interests to disclose.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing Interests"}}],"article-number":"324"}}