{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T10:00:04Z","timestamp":1775815204181,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T00:00:00Z","timestamp":1745280000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62272250,62302243"],"award-info":[{"award-number":["62272250,62302243"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Natural Science Foundation of Tianjin, China","award":["22JCJQJC00150,23JCYBJC01230"],"award-info":[{"award-number":["22JCJQJC00150,23JCYBJC01230"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,4,28]]},"DOI":"10.1145\/3696410.3714832","type":"proceedings-article","created":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T22:52:18Z","timestamp":1745362338000},"page":"4823-4831","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Multimodal Graph-Based Variational Mixture of Experts Network for Zero-Shot Multimodal Information Extraction"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7577-6204","authenticated-orcid":false,"given":"Baohang","family":"Zhou","sequence":"first","affiliation":[{"name":"College of Computer Science, VCIP, DISSec, TMCC, TBI Center, Nankai University, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4906-5828","authenticated-orcid":false,"given":"Ying","family":"Zhang","sequence":"additional","affiliation":[{"name":"College of Computer Science, VCIP, DISSec, TMCC, TBI Center, Nankai University, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0326-7152","authenticated-orcid":false,"given":"Yu","family":"Zhao","sequence":"additional","affiliation":[{"name":"College of Computer Science, VCIP, DISSec, TMCC, TBI Center, Nankai University, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5386-9912","authenticated-orcid":false,"given":"Xuhui","family":"Sui","sequence":"additional","affiliation":[{"name":"College of Computer Science, VCIP, DISSec, TMCC, TBI Center, Nankai University, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5876-6856","authenticated-orcid":false,"given":"Xiaojie","family":"Yuan","sequence":"additional","affiliation":[{"name":"College of Computer Science, VCIP, DISSec, TMCC, TBI Center, Nankai University, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,4,22]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"VLMo: Unified Vision-Language Pre-Training with Mixture-of-Modality-Experts. In Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022","author":"Bao Hangbo","year":"2022","unstructured":"Hangbo Bao, Wenhui Wang, Li Dong, Qiang Liu, Owais Khan Mohammed, Kriti Aggarwal, Subhojit Som, Songhao Piao, and Furu Wei. 2022. VLMo: Unified Vision-Language Pre-Training with Mixture-of-Modality-Experts. In Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans, LA, USA, November 28 - December 9, 2022."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.272"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.210"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2023.3345146"},{"key":"e_1_3_2_1_5_1","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019","volume":"1","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019, Minneapolis, MN, USA, June 2--7, 2019, Volume 1 (Long and Short Papers). Association for Computational Linguistics, 4171--4186."},{"key":"e_1_3_2_1_6_1","volume-title":"9th International Conference on Learning Representations, ICLR 2021","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3--7, 2021."},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation, LREC\/COLING 2024","author":"Gong Jiaying","year":"2024","unstructured":"Jiaying Gong and Hoda Eldardiry. 2024. Prompt-based Zero-shot Relation Extraction with Semantic Knowledge Augmentation. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation, LREC\/COLING 2024, 20--25 May, 2024, Torino, Italy. ELRA and ICCL, 13143--13156."},{"key":"e_1_3_2_1_8_1","volume-title":"Explaining and Harnessing Adversarial Examples. In 3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7--9, 2015, Conference Track Proceedings.","author":"Goodfellow Ian J.","year":"2015","unstructured":"Ian J. Goodfellow, Jonathon Shlens, and Christian Szegedy. 2015. Explaining and Harnessing Adversarial Examples. In 3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7--9, 2015, Conference Track Proceedings."},{"key":"e_1_3_2_1_9_1","volume-title":"Auto-Encoding Variational Bayes. In 2nd International Conference on Learning Representations, ICLR 2014, Banff, AB, Canada, April 14--16, 2014, Conference Track Proceedings.","author":"Diederik","unstructured":"Diederik P. Kingma and Max Welling. 2014. Auto-Encoding Variational Bayes. In 2nd International Conference on Learning Representations, ICLR 2014, Banff, AB, Canada, April 14--16, 2014, Conference Track Proceedings."},{"key":"e_1_3_2_1_10_1","volume-title":"Visual Instruction Tuning. In Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023","author":"Liu Haotian","year":"2023","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2023. Visual Instruction Tuning. In Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023."},{"key":"e_1_3_2_1_11_1","volume-title":"COLING","author":"Ma Yukun","year":"2016","unstructured":"Yukun Ma, Erik Cambria, and Sa Gao. 2016. Label Embedding for Zero-shot Fine-grained Named Entity Typing. In COLING 2016, 26th International Conference on Computational Linguistics, Proceedings of the Conference: Technical Papers, December 11--16, 2016, Osaka, Japan. ACL, 171--180."},{"key":"e_1_3_2_1_12_1","first-page":"2579","article-title":"Visualizing Data using t-SNE","volume":"9","author":"Der Maaten Laurens Van","year":"2008","unstructured":"Laurens Van Der Maaten and Hinton Geoffrey. 2008. Visualizing Data using t-SNE. Journal of Machine Learning Research, Vol. 9, 2605 (2008), 2579--2605.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2858821"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1078"},{"key":"e_1_3_2_1_15_1","volume-title":"Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems","author":"Mustafa Basil","year":"2022","unstructured":"Basil Mustafa, Carlos Riquelme, Joan Puigcerver, Rodolphe Jenatton, and Neil Houlsby. 2022. Multimodal Contrastive Learning with LIMoE: the Language-Image Mixture of Experts. In Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans, LA, USA, November 28 - December 9, 2022."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1087"},{"key":"e_1_3_2_1_17_1","volume-title":"Neural Zero-Shot Fine-Grained Entity Typing. In Companion of The 2020 Web Conference 2020","author":"Ren Yankun","year":"2020","unstructured":"Yankun Ren, Jianbin Lin, and Jun Zhou. 2020. Neural Zero-Shot Fine-Grained Entity Typing. In Companion of The 2020 Web Conference 2020, Taipei, Taiwan, April 20--24, 2020. ACM \/ IW3C2, 846--847."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ITW.2015.7133169"},{"key":"e_1_3_2_1_19_1","volume-title":"LLaMA: Open and Efficient Foundation Language Models. CoRR","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, Aur\u00e9lien Rodriguez, Armand Joulin, Edouard Grave, and Guillaume Lample. 2023. LLaMA: Open and Efficient Foundation Language Models. CoRR, Vol. abs\/2302.13971 (2023)."},{"key":"e_1_3_2_1_20_1","volume-title":"Pan","author":"Wan Hai","year":"2021","unstructured":"Hai Wan, Manrong Zhang, Jianfeng Du, Ziling Huang, Yufei Yang, and Jeff Z. Pan. 2021. FL-MSRE: A Few-Shot Learning based Approach to Multimodal Social Relation Extraction. In Thirty-Fifth AAAI Conference on Artificial Intelligence, AAAI 2021, Thirty-Third Conference on Innovative Applications of Artificial Intelligence, IAAI 2021, The Eleventh Symposium on Educational Advances in Artificial Intelligence, EAAI 2021, Virtual Event, February 2--9, 2021. AAAI Press, 13916--13923."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.328"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","unstructured":"Bo Xu Shizhou Huang Chaofeng Sha and Hongya Wang. 2022. MAF: A General Matching and Alignment Framework for Multimodal Named Entity Recognition. In WSDM '22: The Fifteenth ACM International Conference on Web Search and Data Mining Virtual Event \/ Tempe AZ USA February 21 - 25 2022. ACM 1215--1223.","DOI":"10.1145\/3488560.3498475"},{"key":"e_1_3_2_1_23_1","volume-title":"Proceedings of the 28th International Conference on Computational Linguistics, COLING 2020, Barcelona, Spain (Online), December 8--13","author":"Zhang Tao","year":"2020","unstructured":"Tao Zhang, Congying Xia, Chun-Ta Lu, and Philip S. Yu. 2020. MZET: Memory Augmented Zero-Shot Fine-grained Named Entity Typing. In Proceedings of the 28th International Conference on Computational Linguistics, COLING 2020, Barcelona, Spain (Online), December 8--13, 2020. International Committee on Computational Linguistics, 77--87."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.1027"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.369"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657838"},{"key":"e_1_3_2_1_27_1","volume-title":"Multimodal Relation Extraction with Efficient Graph Alignment. In MM '21: ACM Multimedia Conference","author":"Zheng Changmeng","year":"2021","unstructured":"Changmeng Zheng, Junhao Feng, Ze Fu, Yi Cai, Qing Li, and Tao Wang. 2021. Multimodal Relation Extraction with Efficient Graph Alignment. In MM '21: ACM Multimedia Conference, Virtual Event, China, October 20 - 24, 2021. ACM, 5298--5306."},{"key":"e_1_3_2_1_28_1","volume-title":"A Comprehensive Survey on Automatic Knowledge Graph Construction. CoRR","author":"Zhong Lingfeng","year":"2023","unstructured":"Lingfeng Zhong, Jia Wu, Qian Li, Hao Peng, and Xindong Wu. 2023. A Comprehensive Survey on Automatic Knowledge Graph Construction. CoRR, Vol. abs\/2302.05019 (2023)."},{"key":"e_1_3_2_1_29_1","volume-title":"Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING","author":"Zhou Baohang","year":"2024","unstructured":"Baohang Zhou, Ying Zhang, Kehui Song, Hongru Wang, Yu Zhao, Xuhui Sui, and Xiaojie Yuan. 2024. MCIL: Multimodal Counterfactual Instance Learning for Low-resource Entity-based Multimodal Information Extraction. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024). ELRA and ICCL, Torino, Italia, 11101--11110."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-2034"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2022.3224228"}],"event":{"name":"WWW '25: The ACM Web Conference 2025","location":"Sydney NSW Australia","acronym":"WWW '25","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM on Web Conference 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714832","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3696410.3714832","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:42Z","timestamp":1750295922000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714832"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,22]]},"references-count":31,"alternative-id":["10.1145\/3696410.3714832","10.1145\/3696410"],"URL":"https:\/\/doi.org\/10.1145\/3696410.3714832","relation":{},"subject":[],"published":{"date-parts":[[2025,4,22]]},"assertion":[{"value":"2025-04-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}