{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:16:12Z","timestamp":1765307772167,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":25,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3758191","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T05:44:48Z","timestamp":1761371088000},"page":"12571-12577","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["A New Dataset and Benchmark for Grounding Multimodal Misinformation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-0036-2736","authenticated-orcid":false,"given":"Bingjian","family":"Yang","sequence":"first","affiliation":[{"name":"National Engineering Research Center for Multimedia Software, School of Computer Science, Wuhan University, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9482-0111","authenticated-orcid":false,"given":"Danni","family":"Xu","sequence":"additional","affiliation":[{"name":"School of Computing, National University of Singapore, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-1433-1108","authenticated-orcid":false,"given":"Kaipeng","family":"Niu","sequence":"additional","affiliation":[{"name":"National Engineering Research Center for Multimedia Software, School of Computer Science, Wuhan University, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4417-6628","authenticated-orcid":false,"given":"Wenxuan","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Computer Science, Peking University, Beijing, China and State Key Laboratory for Multimedia Information Processing, Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3846-9157","authenticated-orcid":false,"given":"Zheng","family":"Wang","sequence":"additional","affiliation":[{"name":"National Engineering Research Center for Multimedia Software, School of Computer Science, Wuhan University, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4846-2015","authenticated-orcid":false,"given":"Mohan","family":"Kankanhalli","sequence":"additional","affiliation":[{"name":"School of Computing, National University of Singapore, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al., 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_2_1","volume-title":"Norris","author":"Blanchar John C.","year":"2024","unstructured":"John C. Blanchar and Catherine J. Norris. 2024. Trump, Twitter, and Truth Judgments: The Effects of ''Disputed'' Tags and Political Knowledge on the Judged Truthfulness of Election Misinformation. HKS Misinformation Review (September 2024). https:\/\/misinforeview.hks.harvard.edu\/article\/trump-twitter-and-truth-judgments-the-effects-of-disputed-tags-and-political-knowledge-on-the-judged-truthfulness-of-election-misinformation\/"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680663"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680795"},{"key":"e_1_3_2_1_5_1","unstructured":"Lizhi Chen Zhong Qian Peifeng Li and Qiaoming Zhu. 2025. Multimodal Fake News Video Explanation: Dataset Analysis and Evaluation. arXiv:2501.08514 [cs.CV] https:\/\/arxiv.org\/abs\/2501.08514"},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 24185-24198","author":"Chen Zhe","year":"2024","unstructured":"Zhe Chen, Jiannan Wu, Wenhai Wang, Weijie Su, Guo Chen, Sen Xing, Muyan Zhong, Qinglong Zhang, Xizhou Zhu, Lewei Lu, et al., 2024. Internvl: Scaling up vision foundation models and aligning for generic visual-linguistic tasks. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 24185-24198."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00939"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i27.35048"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.2104235118"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i12.26689"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i12.26689"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01240"},{"key":"e_1_3_2_1_13_1","unstructured":"Nikhila Ravi Valentin Gabeur Yuan-Ting Hu Ronghang Hu Chaitanya Ryali Tengyu Ma Haitham Khedr Roman R\u00e4dle Chloe Rolland Laura Gustafson et al. 2024. Sam 2: Segment anything in images and videos. arXiv preprint arXiv:2408.00714 (2024)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2025.3533919"},{"key":"e_1_3_2_1_15_1","volume-title":"Detecting and Grounding Multi-Modal Media Manipulation. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Shao Rui","year":"2023","unstructured":"Rui Shao, Tianxing Wu, and Ziwei Liu. 2023. Detecting and Grounding Multi-Modal Media Manipulation. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3367749"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3685517"},{"key":"e_1_3_2_1_18_1","volume-title":"Deepfakes and disinformation: Exploring the impact of synthetic political video on deception, uncertainty, and trust in news. Social media society","author":"Vaccari Cristian","year":"2020","unstructured":"Cristian Vaccari and Andrew Chadwick. 2020. Deepfakes and disinformation: Exploring the impact of synthetic political video on deception, uncertainty, and trust in news. Social media society, Vol. 6, 1 (2020), 2056305120903408."},{"key":"e_1_3_2_1_19_1","unstructured":"Yihao Wang Lizhi Chen Zhong Qian and Peifeng Li. 2024. Official-NV: An LLM-Generated News Video Dataset for Multimodal Fake News Detection. arXiv preprint arXiv:2407.19493 (2024)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.22215\/timreview\/1282"},{"key":"e_1_3_2_1_21_1","first-page":"2901","volume-title":"FIRE: Fact-checking with Iterative Retrieval and Verification. In Findings of the Association for Computational Linguistics: NAACL 2025","author":"Xie Zhuohan","year":"2025","unstructured":"Zhuohan Xie, Rui Xing, Yuxia Wang, Jiahui Geng, Hasan Iqbal, Dhruv Sahnan, Iryna Gurevych, and Preslav Nakov. 2025. FIRE: Fact-checking with Iterative Retrieval and Verification. In Findings of the Association for Computational Linguistics: NAACL 2025, Luis Chiruzzo, Alan Ritter, and Lu Wang (Eds.). Association for Computational Linguistics, Albuquerque, New Mexico, 2901-2914. https:\/\/aclanthology.org\/2025.findings-naacl.158\/"},{"key":"e_1_3_2_1_22_1","unstructured":"Kaiying Yan Moyang Liu Yukun Liu Ruibo Fu Zhengqi Wen Jianhua Tao Xuefei Liu and Guanjun Li. 2025. MTPareto: A MultiModal Targeted Pareto Framework for Fake News Detection. arXiv:2501.06764 [cs.LG] https:\/\/arxiv.org\/abs\/2501.06764"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591879"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01068"},{"key":"e_1_3_2_1_25_1","volume-title":"VMID: A Multimodal Fusion LLM Framework for Detecting and Identifying Misinformation of Short Videos. arXiv:2411.10032 [cs.CV] https:\/\/arxiv.org\/abs\/2411.10032","author":"Zhong Weihao","year":"2024","unstructured":"Weihao Zhong, Yinhao Xiao, Minghui Xu, and Xiuzhen Cheng. 2024. VMID: A Multimodal Fusion LLM Framework for Detecting and Identifying Misinformation of Short Videos. arXiv:2411.10032 [cs.CV] https:\/\/arxiv.org\/abs\/2411.10032"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3758191","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:13:55Z","timestamp":1765307635000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3758191"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":25,"alternative-id":["10.1145\/3746027.3758191","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3758191","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}