{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T22:39:19Z","timestamp":1773527959788,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":26,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819620531","type":"print"},{"value":"9789819620548","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-2054-8_3","type":"book-chapter","created":{"date-parts":[[2025,1,2]],"date-time":"2025-01-02T15:45:56Z","timestamp":1735832756000},"page":"30-43","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["A Multi-expert Collaborative Framework for\u00a0Multimodal Named Entity Recognition"],"prefix":"10.1007","author":[{"given":"Bo","family":"Xu","sequence":"first","affiliation":[]},{"given":"Haiqi","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Shouang","family":"Wei","sequence":"additional","affiliation":[]},{"given":"Ming","family":"Du","sequence":"additional","affiliation":[]},{"given":"Hui","family":"Song","sequence":"additional","affiliation":[]},{"given":"Hongya","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,1,3]]},"reference":[{"key":"3_CR1","unstructured":"Bai, J., et al.: Qwen-VL: a frontier large vision-language model with versatile abilities. arXiv preprint arXiv:2308.12966 (2023)"},{"key":"3_CR2","doi-asserted-by":"crossref","unstructured":"Bao, X., Tian, M., Zha, Z., Qin, B.: MPMRC-MNER: a unified MRC framework for multimodal named entity recognition based multimodal prompt. In: Proceedings of the 32nd ACM International Conference on Information and Knowledge Management, pp. 47\u201356 (2023)","DOI":"10.1145\/3583780.3614975"},{"key":"3_CR3","unstructured":"Bian, J., Zheng, J., Zhang, Y., Zhu, S.: Inspire the large language model by external knowledge on biomedical named entity recognition. arXiv preprint arXiv:2309.12278 (2023)"},{"key":"3_CR4","unstructured":"Cai, C., et al.: In-context learning for few-shot multimodal named entity recognition. In: The 2023 Conference on Empirical Methods in Natural Language Processing (2023)"},{"key":"3_CR5","unstructured":"Chen, F., Feng, Y.: Chain-of-thought prompt distillation for multimodal named entity and multimodal relation extraction. arXiv preprint arXiv:2306.14122 (2023)"},{"key":"3_CR6","doi-asserted-by":"crossref","unstructured":"Chen, X., et al.: Good visual guidance make a better extractor: hierarchical visual prefix for multimodal entity and relation extraction. In: Findings of the Association for Computational Linguistics: NAACL 2022, pp. 1607\u20131618 (2022)","DOI":"10.18653\/v1\/2022.findings-naacl.121"},{"key":"3_CR7","unstructured":"Hoffmann, J., et\u00a0al.: Training compute-optimal large language models. arXiv preprint arXiv:2203.15556 (2022)"},{"key":"3_CR8","doi-asserted-by":"crossref","unstructured":"Hsieh, C.Y., et al.: Distilling step-by-step! outperforming larger language models with less training data and smaller model sizes. In: Findings of the Association for Computational Linguistics: ACL 2023, pp. 8003\u20138017 (2023)","DOI":"10.18653\/v1\/2023.findings-acl.507"},{"key":"3_CR9","unstructured":"Hu, E.J., et\u00a0al.: LORA: low-rank adaptation of large language models. In: International Conference on Learning Representations (2021)"},{"key":"3_CR10","doi-asserted-by":"crossref","unstructured":"Jia, M., et al.: MNER-QG: an end-to-end MRC framework for multimodal named entity recognition with query grounding. arXiv preprint arXiv:2211.14739 (2022)","DOI":"10.1609\/aaai.v37i7.25971"},{"key":"3_CR11","doi-asserted-by":"crossref","unstructured":"Li, J., Li, H., Pan, Z., Pan, G.: Prompt ChatGPT in MNER: improved multimodal named entity recognition method based on auxiliary refining knowledge from ChatGPT. arXiv e-prints, pp. arXiv\u20132305 (2023)","DOI":"10.18653\/v1\/2023.findings-emnlp.184"},{"key":"3_CR12","unstructured":"Liu, H., Li, C., Wu, Q., Lee, Y.J.: Visual instruction tuning. In: Thirty-seventh Conference on Neural Information Processing Systems (2023)"},{"key":"3_CR13","unstructured":"Liu, P., et al.: A novel framework for multimodal named entity recognition with multi-level alignments. arXiv preprint arXiv:2305.08372 (2023)"},{"key":"3_CR14","doi-asserted-by":"crossref","unstructured":"Lu, D., Neves, L., Carvalho, V., Zhang, N., Ji, H.: Visual attention model for name tagging in multimodal social media. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics, pp. 1990\u20131999 (2018)","DOI":"10.18653\/v1\/P18-1185"},{"key":"3_CR15","doi-asserted-by":"crossref","unstructured":"Moon, S., Neves, L., Carvalho, V.: Multimodal named entity disambiguation for noisy social media posts. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics, pp. 2000\u20132008 (2018)","DOI":"10.18653\/v1\/P18-1186"},{"key":"3_CR16","unstructured":"Team, G., et\u00a0al.: Gemini: a family of highly capable multimodal models. arXiv preprint arXiv:2312.11805 (2023)"},{"key":"3_CR17","doi-asserted-by":"crossref","unstructured":"Tsai, Y.H.H., Bai, S., Liang, P.P., Kolter, J.Z., Morency, L.P., Salakhutdinov, R.: Multimodal transformer for unaligned multimodal language sequences. In: Proceedings of the conference. Association for Computational Linguistics. Meeting, vol.\u00a02019, p.\u00a06558. NIH Public Access (2019)","DOI":"10.18653\/v1\/P19-1656"},{"key":"3_CR18","doi-asserted-by":"crossref","unstructured":"Wang, X., et al.: ITA: image-text alignments for multi-modal named entity recognition. arXiv preprint arXiv:2112.06482 (2021)","DOI":"10.18653\/v1\/2022.naacl-main.232"},{"key":"3_CR19","unstructured":"Wang, X., et al.: Self-consistency improves chain of thought reasoning in language models. arXiv preprint arXiv:2203.11171 (2022)"},{"key":"3_CR20","doi-asserted-by":"crossref","unstructured":"Wang, X., et al.: CAT-MNER: multimodal named entity recognition with knowledge-refined cross-modal attention. In: 2022 IEEE International Conference on Multimedia and Expo (ICME), pp.\u00a01\u20136. IEEE (2022)","DOI":"10.1109\/ICME52920.2022.9859972"},{"key":"3_CR21","doi-asserted-by":"crossref","unstructured":"Wu, J., Gong, C., Cao, Z., Fu, G.: MCG-MNER: a multi-granularity cross-modality generative framework for multimodal NER with instruction. In: Proceedings of the 31st ACM International Conference on Multimedia, pp. 3209\u20133218 (2023)","DOI":"10.1145\/3581783.3612470"},{"key":"3_CR22","doi-asserted-by":"crossref","unstructured":"Xu, B., Huang, S., Sha, C., Wang, H.: MAF: a general matching and alignment framework for multimodal named entity recognition. In: Proceedings of the Fifteenth ACM International Conference on Web Search and Data Mining. ACM (2022)","DOI":"10.1145\/3488560.3498475"},{"key":"3_CR23","unstructured":"Yang, Z., et al.: The dawn of LMMS: preliminary explorations with GPT-4V(ision). arXiv e-prints, pp. arXiv\u20132309 (2023)"},{"key":"3_CR24","doi-asserted-by":"crossref","unstructured":"Yu, J., Jiang, J., Yang, L., Xia, R.: Improving multimodal named entity recognition via entity span detection with unified multimodal transformer. Association for Computational Linguistics (2020)","DOI":"10.18653\/v1\/2020.acl-main.306"},{"key":"3_CR25","doi-asserted-by":"crossref","unstructured":"Zhang, D., Wei, S., Li, S., Wu, H., Zhu, Q., Zhou, G.: Multi-modal graph fusion for named entity recognition with targeted visual guidance. In: Proceedings of the AAAI Conference on Artificial Intelligence, no.\u00a016, pp. 14347\u201314355 (2021)","DOI":"10.1609\/aaai.v35i16.17687"},{"key":"3_CR26","doi-asserted-by":"crossref","unstructured":"Zheng, C., Wu, Z., Feng, J., Fu, Z., Cai, Y.: MNRE: a challenge multimodal dataset for neural relation extraction with visual evidence in social media posts. In: 2021 IEEE International Conference on Multimedia and Expo (ICME), pp.\u00a01\u20136. IEEE (2021)","DOI":"10.1109\/ICME51207.2021.9428274"}],"container-title":["Lecture Notes in Computer Science","MultiMedia Modeling"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-2054-8_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,23]],"date-time":"2025-03-23T01:43:23Z","timestamp":1742694203000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-2054-8_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819620531","9789819620548"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-2054-8_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"3 January 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MMM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Multimedia Modeling","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Nara","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 January 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 January 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"mmm2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/mmm2025.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}