{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T21:25:11Z","timestamp":1757625911060,"version":"3.44.0"},"reference-count":35,"publisher":"Springer Science and Business Media LLC","issue":"13","license":[{"start":{"date-parts":[[2025,8,28]],"date-time":"2025-08-28T00:00:00Z","timestamp":1756339200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,8,28]],"date-time":"2025-08-28T00:00:00Z","timestamp":1756339200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Science Foundation of China","doi-asserted-by":"crossref","award":["No. 62006108"],"award-info":[{"award-number":["No. 62006108"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100010031","name":"Postdoctoral Research Foundation of China","doi-asserted-by":"publisher","award":["No. 2022M710593"],"award-info":[{"award-number":["No. 2022M710593"]}],"id":[{"id":"10.13039\/501100010031","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Special funding for basic scientific research business expenses of undergraduate universities in Liaoning Province","award":["No. LJ212410165031"],"award-info":[{"award-number":["No. LJ212410165031"]}]},{"name":"Liaoning Province Educational Science \"14th Five-Year Plan\" Project"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"DOI":"10.1007\/s11227-025-07772-z","type":"journal-article","created":{"date-parts":[[2025,8,28]],"date-time":"2025-08-28T09:53:30Z","timestamp":1756374810000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Enhancing multimodal named entity recognition with multi-granularity knowledge distillation"],"prefix":"10.1007","volume":"81","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0482-7430","authenticated-orcid":false,"given":"Xinyu","family":"He","sequence":"first","affiliation":[]},{"given":"Shixin","family":"Li","sequence":"additional","affiliation":[]},{"given":"Binhe","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,8,28]]},"reference":[{"key":"7772_CR1","doi-asserted-by":"crossref","unstructured":"Moon S, Neves L, Carvalho V (2018) Multimodal named entity recognition for short social media posts. Proceedings of NAACL. 852\u2013860","DOI":"10.18653\/v1\/N18-1078"},{"key":"7772_CR2","unstructured":"Huang Z, Xu W, Yu K (2015) Bidirectional LSTM-CRF models for sequence tagging. CoRR, abs\/1508.01991."},{"key":"7772_CR3","doi-asserted-by":"crossref","unstructured":"Lu D, Neves L, Carvalho V, et al. (2018) Visual attention model for name tagging in multimodal social media. Proceedings of ACL. 1990\u20131999","DOI":"10.18653\/v1\/P18-1185"},{"key":"7772_CR4","unstructured":"Krizhevsky A, Sutskever I, Hinton G E. (2012) Imagenet classification with deep convolutional neural networks. Proceedings of NIPS. 1097\u20131105"},{"key":"7772_CR5","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neural Comput 9:1735\u20131780","journal-title":"Neural Comput"},{"key":"7772_CR6","doi-asserted-by":"crossref","unstructured":"Zhang Q, Fu J, Liu X, et al. (2018) Adaptive co-attention network for named entity recognition in tweets. McIlraith S A, Weinberger K Q. Proceedings of AAAI. 5674\u20135681","DOI":"10.1609\/aaai.v32i1.11962"},{"key":"7772_CR7","doi-asserted-by":"crossref","unstructured":"Yu J, Jiang J, Yang L, et al. (2020) Improving multimodal named entity recognition via entity span detection with unified multimodal transformer. Proceedings of ACL. 3342\u20133352","DOI":"10.18653\/v1\/2020.acl-main.306"},{"key":"7772_CR8","unstructured":"Vaswani A, Shazeer N, Parmar N, et al. (2017) Attention is all you need. Processing of NIPS. 5998\u20136008"},{"key":"7772_CR9","doi-asserted-by":"crossref","unstructured":"Sun L, Wang J, Su Y, et al. (2020) RIVA: A pre-trained tweet multimodal model based on textimage relation for multimodal NER. Proceedings of COLING. 1852\u20131862","DOI":"10.18653\/v1\/2020.coling-main.168"},{"key":"7772_CR10","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i15.17633","author":"L Sun","year":"2021","unstructured":"Sun L, Wang J, Zhang K et al (2021) Rpbert: a text-image relation propagation-based BERT model for multimodal NER. Proc AAAI Conf Artif Intell. https:\/\/doi.org\/10.1609\/aaai.v35i15.17633","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"7772_CR11","unstructured":"Devlin J, Chang M W, Lee K, et al. (2018) Bert: pre-training of deep bidirectional transformers for language understanding"},{"issue":"16","key":"7772_CR12","first-page":"14347","volume":"35","author":"D Zhang","year":"2021","unstructured":"Zhang D, Wei S, Li S et al (2021) Multi-modal graph fusion for named entity recognition with targeted visual guidance. Proc AAAI Conf Artif Intell 35(16):14347\u201314355","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"7772_CR13","doi-asserted-by":"crossref","unstructured":"Chen X, Zhang N, Li L, et al. (2022) Good visual guidance makes a better extractor: hierarchical visual prefix for multimodal entity and relation extraction.","DOI":"10.18653\/v1\/2022.findings-naacl.121"},{"issue":"4","key":"7772_CR14","doi-asserted-by":"publisher","first-page":"545","DOI":"10.1587\/transinf.2022EDP7116","volume":"106","author":"P Wang","year":"2023","unstructured":"Wang P, Chen X, Shang Z et al (2023) Multimodal named entity recognition with bottleneck fusion and contrastive learning. IEICE Trans Inf Syst 106(4):545\u2013555","journal-title":"IEICE Trans Inf Syst"},{"key":"7772_CR15","doi-asserted-by":"publisher","first-page":"52","DOI":"10.1016\/j.patrec.2023.10.004","volume":"175","author":"Z Chen","year":"2023","unstructured":"Chen Z, Zhang Y, Mi S (2023) Assisting multimodal named entity recognition by cross-modal auxiliary tasks. Pattern Recogn Lett 175:52\u201358","journal-title":"Pattern Recogn Lett"},{"key":"7772_CR16","doi-asserted-by":"crossref","unstructured":"Zhang X, Yuan J, Li L, et al. (2023) Reducing the bias of visual objects in multimodal named entity recognition\/\/Proceedings of the Sixteenth ACM international Conference on web search and data mining. 958\u2013966","DOI":"10.1145\/3539597.3570485"},{"issue":"1","key":"7772_CR17","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2023.103546","volume":"61","author":"P Liu","year":"2024","unstructured":"Liu P, Wang G, Li H et al (2024) Multi-granularity cross-modal representation learning for named entity recognition on social media. Inf Process Manage 61(1):103546","journal-title":"Inf Process Manage"},{"key":"7772_CR18","doi-asserted-by":"crossref","unstructured":"Yu J, Li Z, Wang J, et al. Grounded multimodal named entity recognition on social media\/\/Proceedings of the 61st annual meeting of the association for computational linguistics (Volume 1: Long Papers). 9141\u20139154","DOI":"10.18653\/v1\/2023.acl-long.508"},{"key":"7772_CR19","unstructured":"Lafferty J, McCallum A, Pereira F C N. (2001) Conditional random fields: probabilistic models for segmenting and labeling sequence data"},{"key":"7772_CR20","unstructured":"Honnibal M, Montani I, Van Landeghem S, et al. (2020) spaCy: Industrial-strength natural language processing in python"},{"key":"7772_CR21","doi-asserted-by":"crossref","unstructured":"Rombach R, Blattmann A, Lorenz D, et al. (2022) High-resolution image synthesis with latent diffusion models\/\/Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition. 10684\u201310695","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"7772_CR22","unstructured":"OpenAI. (2023).\u00a0ChatGPT\u00a0(October 18 version) [Large language model]. https:\/\/chat.openai.com"},{"key":"7772_CR23","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, et al. (2020) An image is worth 16x16 words: transformers for image recognition at scale"},{"issue":"5","key":"7772_CR24","first-page":"4766","volume":"38","author":"H Shen","year":"2024","unstructured":"Shen H, Zhao T, Zhu M et al (2024) Groundvlp: harnessing zero-shot visual grounding from vision-language pre-training and open-vocabulary object detection. Proc AAAI Conf Artif Intell 38(5):4766\u20134775","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"7772_CR25","unstructured":"Xuezhe M, Eduard H (2016) End-to-end sequence labeling via bi-directional lstm-cnns-crf. In 6819 Proceedings of the 54th annual meeting of the association for computational linguistics (Volume 1: Long Papers), 1064\u20131074"},{"key":"7772_CR26","unstructured":"Guillaume L, Miguel B, Sandeep S, Kazuya K, Chris D (2016) Neural architectures for named entity recognition. In Proceedings of NAACL-HLT, 260\u2013270."},{"key":"7772_CR27","doi-asserted-by":"crossref","unstructured":"Arshad O,Gallo I,Nawaz S,et al. (2019) Aiding intratext representations with visual context for multimodal named entity recognition. In: 2019 International Conference on document analysis and recognition (ICDAR).Sydney, NSW, Australia:IEEE, 337\u2013342","DOI":"10.1109\/ICDAR.2019.00061"},{"key":"7772_CR28","doi-asserted-by":"crossref","unstructured":"Li J, Li H, Pan Z, et al. Prompting chatgpt in MNER: enhanced multimodal named entity recognition with auxiliary refined knowledge\/\/The 2023 Conference on empirical methods in natural language processing. 2023","DOI":"10.18653\/v1\/2023.findings-emnlp.184"},{"key":"7772_CR29","unstructured":"Liunian HL, Mark Y, Da Y, Cho-Jui H, Kai-Wei C (2019) Visualbert: a simple and performant baseline for vision and language"},{"key":"7772_CR30","doi-asserted-by":"crossref","unstructured":"Zhiwei Wu, Changmeng Zheng, Yi Cai, Junying Chen, Ho-fung Leung, and Qing Li. 2020. Multimodal representation with embedded visual guiding objects for named entity recognition in social media posts. In: Proceedings of the 28th ACM international Conference on multimedia, 1038\u20131046.","DOI":"10.1145\/3394171.3413650"},{"key":"7772_CR31","first-page":"6293","volume":"2022","author":"B Zhou","year":"2022","unstructured":"Zhou B, Zhang Y, Song K et al (2022) A Span-based multimodal variational autoencoder for semi-supervised multimodal named entity recognition\/\/proceedings of the. Conf Emp Methods Nat Lang Process 2022:6293\u20136302","journal-title":"Conf Emp Methods Nat Lang Process"},{"key":"7772_CR32","doi-asserted-by":"crossref","unstructured":"Zhao F, Li C, Wu Z, et al. (2022) Learning from different text-image pairs: a relation-enhanced graph convolutional network for multimodal NER\/\/Proceedings of the 30th ACM international Conference on multimedia. 3983\u20133992","DOI":"10.1145\/3503161.3548228"},{"issue":"4","key":"7772_CR33","doi-asserted-by":"publisher","first-page":"4109","DOI":"10.1007\/s10489-021-02546-5","volume":"52","author":"L Liu","year":"2022","unstructured":"Liu L, Wang M, Zhang M et al (2022) Uamner: uncertainty-aware multimodal named entity recognition in social media posts. Appl Intell 52(4):4109\u20134125","journal-title":"Appl Intell"},{"key":"7772_CR34","doi-asserted-by":"publisher","first-page":"1274","DOI":"10.1109\/TASLP.2023.3345146","volume":"32","author":"S Cui","year":"2024","unstructured":"Cui S, Cao J, Cong X et al (2024) Enhancing multimodal entity and relation extraction with variational information bottleneck. IEEE\/ACM Trans Audio Speech Lang Process 32:1274\u20131285","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"7772_CR35","doi-asserted-by":"crossref","unstructured":"Lin TY, Maire M, Belongie S, et al. (2014) Microsoft coco: common objects in context\/\/Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6\u201312, 2014, Proceedings, Part V 13. Springer International Publishing, 740\u2013755","DOI":"10.1007\/978-3-319-10602-1_48"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-025-07772-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-025-07772-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-025-07772-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T19:55:36Z","timestamp":1757447736000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-025-07772-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,28]]},"references-count":35,"journal-issue":{"issue":"13","published-online":{"date-parts":[[2025,8]]}},"alternative-id":["7772"],"URL":"https:\/\/doi.org\/10.1007\/s11227-025-07772-z","relation":{},"ISSN":["1573-0484"],"issn-type":[{"type":"electronic","value":"1573-0484"}],"subject":[],"published":{"date-parts":[[2025,8,28]]},"assertion":[{"value":"18 March 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 August 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 August 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"1283"}}