{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:55:15Z","timestamp":1781538915140,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":36,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T00:00:00Z","timestamp":1781481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62376182"],"award-info":[{"award-number":["62376182"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,16]]},"DOI":"10.1145\/3805622.3810577","type":"proceedings-article","created":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:42:57Z","timestamp":1781534577000},"page":"1184-1193","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["RMPL: Relation-aware Multi-task Progressive Learning with Stage-wise Training for Multimedia Event Extraction"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-7300-1351","authenticated-orcid":false,"given":"Yongkang","family":"Jin","sequence":"first","affiliation":[{"name":"School of Computer Science and Technology, Soochow University, Suzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-4276-7561","authenticated-orcid":false,"given":"Jianwen","family":"Luo","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Soochow University, Suzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-3619-1525","authenticated-orcid":false,"given":"Jingjing","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Soochow University, Suzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4747-293X","authenticated-orcid":false,"given":"Jianmin","family":"Yao","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Soochow University, Suzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0606-3718","authenticated-orcid":false,"given":"Yu","family":"Hong","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Soochow University, Suzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,15]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"Shuai Bai Yuxuan Cai Ruizhe Chen Keqin Chen Xionghui Chen Zesen Cheng Lianghao Deng Wei Ding Chang Gao Chunjiang Ge Wenbin Ge Zhifang Guo Qidong Huang Jie Huang Fei Huang Binyuan Hui Shutong Jiang Zhaohai Li Mingsheng Li Mei Li Kaixin Li Zicheng Lin Junyang Lin Xuejing Liu Jiawei Liu Chenglong Liu Yang Liu Dayiheng Liu Shixuan Liu Dunjie Lu Ruilin Luo Chenxu Lv Rui Men Lingchen Meng Xuancheng Ren Xingzhang Ren Sibo Song Yuchong Sun Jun Tang Jianhong Tu Jianqiang Wan Peng Wang Pengfei Wang Qiuyue Wang Yuxuan Wang Tianbao Xie Yiheng Xu Haiyang Xu Jin Xu Zhibo Yang Mingkun Yang Jianxin Yang An Yang Bowen Yu Fei Zhang Hang Zhang Xi Zhang Bo Zheng Humen Zhong Jingren Zhou Fan Zhou Jing Zhou Yuanzhi Zhu and Ke Zhu. 2025. Qwen3-VL Technical Report. arxiv:https:\/\/arXiv.org\/abs\/2511.21631\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2511.21631"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i11.33246"},{"key":"e_1_3_3_2_4_2","unstructured":"Hyung\u00a0Won Chung Le Hou Shayne Longpre Barret Zoph Yi Tay William Fedus Yunxuan Li Xuezhi Wang Mostafa Dehghani Siddhartha Brahma Albert Webson Shixiang\u00a0Shane Gu Zhuyun Dai Mirac Suzgun Xinyun Chen Aakanksha Chowdhery Alex Castro-Ros Marie Pellat Kevin Robinson Dasha Valter Sharan Narang Gaurav Mishra Adams Yu Vincent Zhao Yanping Huang Andrew Dai Hongkun Yu Slav Petrov Ed\u00a0H. Chi Jeff Dean Jacob Devlin Adam Roberts Denny Zhou Quoc\u00a0V. Le and Jason Wei. 2022. Scaling Instruction-Finetuned Language Models. arxiv:https:\/\/arXiv.org\/abs\/2210.11416\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2210.11416"},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1423"},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","DOI":"10.63317\/4o6ysc5sicba"},{"key":"e_1_3_3_2_7_2","volume-title":"9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3-7, 2021","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3-7, 2021. OpenReview.net. https:\/\/openreview.net\/forum?id=YicbFdNTTy"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612526"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.acl-long.1588"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/3652583.3658076"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01593"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.230"},{"key":"e_1_3_3_2_13_2","unstructured":"Zhongqiu Li Shiquan Wang Ruiyu Fang Mengjiao Bao Zhenhe Wu Shuangyong Song Yongxiang Li and Zhongjiang He. 2025. MR-UIE: Multi-Perspective Reasoning with Reinforcement Learning for Universal Information Extraction. arxiv:https:\/\/arXiv.org\/abs\/2509.09082\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2509.09082"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.findings-emnlp.419"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02484"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548132"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"publisher","unstructured":"Yang Liu Fang Liu Licheng Jiao Qianyue Bao Long Sun Shuo Li Lingling Li and Xu Liu. 2024. Multi-Grained Gradual Inference Model for Multimedia Event Extraction. IEEE Transactions on Circuits and Systems for Video Technology 34 10 (2024) 10507\u201310520. 10.1109\/TCSVT.2024.3402242","DOI":"10.1109\/TCSVT.2024.3402242"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.395"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.findings-emnlp.421"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58548-8_19"},{"key":"e_1_3_3_2_21_2","unstructured":"Alec Radford Jong\u00a0Wook Kim Chris Hallacy Aditya Ramesh Gabriel Goh Sandhini Agarwal Girish Sastry Amanda Askell Pamela Mishkin Jack Clark Gretchen Krueger and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. arxiv:https:\/\/arXiv.org\/abs\/2103.00020\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2103.00020"},{"key":"e_1_3_3_2_22_2","unstructured":"Colin Raffel Noam Shazeer Adam Roberts Katherine Lee Sharan Narang Michael Matena Yanqi Zhou Wei Li and Peter\u00a0J. Liu. 2020. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. Journal of Machine Learning Research 21 140 (2020) 1\u201367. http:\/\/jmlr.org\/papers\/v21\/20-074.html"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-emnlp.381"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.findings-acl.677"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i17.29873"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.1109\/ADICS58448.2024.10533619"},{"key":"e_1_3_3_2_27_2","unstructured":"Peng Wang Shuai Bai Sinan Tan Shijie Wang Zhihao Fan Jinze Bai Keqin Chen Xuejing Liu Jialin Wang Wenbin Ge Yang Fan Kai Dang Mengfei Du Xuancheng Ren Rui Men Dayiheng Liu Chang Zhou Jingren Zhou and Junyang Lin. 2024. Qwen2-VL: Enhancing Vision-Language Model\u2019s Perception of the World at Any Resolution. arxiv:https:\/\/arXiv.org\/abs\/2409.12191\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2409.12191"},{"key":"e_1_3_3_2_28_2","unstructured":"Weiyun Wang Zhangwei Gao Lixin Gu Hengjun Pu Long Cui Xingguang Wei Zhaoyang Liu Linglin Jing Shenglong Ye Jie Shao Zhaokai Wang Zhe Chen Hongjie Zhang Ganlin Yang Haomin Wang Qi Wei Jinhui Yin Wenhao Li Erfei Cui Guanzhou Chen Zichen Ding Changyao Tian Zhenyu Wu Jingjing Xie Zehao Li Bowen Yang Yuchen Duan Xuehui Wang Zhi Hou Haoran Hao Tianyi Zhang Songze Li Xiangyu Zhao Haodong Duan Nianchen Deng Bin Fu Yinan He Yi Wang Conghui He Botian Shi Junjun He Yingtong Xiong Han Lv Lijun Wu Wenqi Shao Kaipeng Zhang Huipeng Deng Biqing Qi Jiaye Ge Qipeng Guo Wenwei Zhang Songyang Zhang Maosong Cao Junyao Lin Kexian Tang Jianfei Gao Haian Huang Yuzhe Gu Chengqi Lyu Huanze Tang Rui Wang Haijun Lv Wanli Ouyang Limin Wang Min Dou Xizhou Zhu Tong Lu Dahua Lin Jifeng Dai Weijie Su Bowen Zhou Kai Chen Yu Qiao Wenhai Wang and Gen Luo. 2025. InternVL3.5: Advancing Open-Source Multimodal Models in Versatility Reasoning and Efficiency. arxiv:https:\/\/arXiv.org\/abs\/2508.18265\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2508.18265"},{"key":"e_1_3_3_2_29_2","unstructured":"Xiao Wang Wei Zhou Can Zu Han Xia Tianze Chen Yuan Zhang Rui Zheng Junjie Ye Qi Zhang Tao Gui Jihua Kang J. Yang Siyuan Li and Chunsai Du. 2023. InstructUIE: Multi-task Instruction Tuning for Unified Information Extraction. ArXiv abs\/2304.08085 (2023). https:\/\/api.semanticscholar.org\/CorpusID:258179792"},{"key":"e_1_3_3_2_30_2","first-page":"734","volume-title":"Proceedings of the 18th International Natural Language Generation Conference","author":"Xing Fuyu","year":"2025","unstructured":"Fuyu Xing, Zimu Wang, Wei Wang, and Haiyang Zhang. 2025. Benchmarking and Improving LVLMs on Event Extraction from Multimedia Documents. In Proceedings of the 18th International Natural Language Generation Conference. Association for Computational Linguistics, Hanoi, Vietnam, 734\u2013742. https:\/\/aclanthology.org\/2025.inlg-main.42\/"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.309"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.597"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.emnlp-main.205"},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICME59968.2025.11210082"},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.acl-long.1298"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"crossref","unstructured":"Changmeng Zheng Junhao Feng Ze Fu Yiru Cai Qing Li and Tao Wang. 2021. Multimodal Relation Extraction with Efficient Graph Alignment. Proceedings of the 29th ACM International Conference on Multimedia (2021). https:\/\/api.semanticscholar.org\/CorpusID:239011558","DOI":"10.1145\/3474085.3476968"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3476968"}],"event":{"name":"ICMR '26: International Conference on Multimedia Retrieval","location":"Amsterdam The Netherlands","acronym":"ICMR '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2026 International Conference on Multimedia Retrieval"],"original-title":[],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:15:21Z","timestamp":1781536521000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805622.3810577"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,15]]},"references-count":36,"alternative-id":["10.1145\/3805622.3810577","10.1145\/3805622"],"URL":"https:\/\/doi.org\/10.1145\/3805622.3810577","relation":{},"subject":[],"published":{"date-parts":[[2026,6,15]]},"assertion":[{"value":"2026-06-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}