{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,18]],"date-time":"2026-04-18T08:57:53Z","timestamp":1776502673047,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No.62441237, No.62172435"],"award-info":[{"award-number":["No.62441237, No.62172435"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755534","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T05:47:42Z","timestamp":1761371262000},"page":"11581-11589","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["A Multimodal Deviation Perceiving Framework for Weakly-Supervised Temporal Forgery Localization"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-8702-4521","authenticated-orcid":false,"given":"Wenbo","family":"Xu","sequence":"first","affiliation":[{"name":"School of Computer Science and Engineering, Sun Yat-sen University, Guangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2692-5928","authenticated-orcid":false,"given":"Junyan","family":"Wu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Sun Yat-sen University, Guangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4068-1766","authenticated-orcid":false,"given":"Wei","family":"Lu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Sun Yat-sen University, Guangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3225-4649","authenticated-orcid":false,"given":"Xiangyang","family":"Luo","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Mathematical Engineering and Advanced Computing, Zhengzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8967-8525","authenticated-orcid":false,"given":"Qian","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Cyber Science and Engineering, Wuhan University, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"wav2vec 2.0: A Framework for Self-supervised Learning of Speech Representations. Advances in neural information processing systems","author":"Baevski Alexei","year":"2020","unstructured":"Alexei Baevski, Yuhao Zhou, Abdelrahman Mohamed, and Michael Auli. 2020. wav2vec 2.0: A Framework for Self-supervised Learning of Speech Representations. Advances in neural information processing systems, Vol. 33 (2020), 12449-12460."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680795"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2023.103818"},{"key":"e_1_3_2_1_4_1","volume-title":"Content Driven Audio-Visual Deepfake Dataset and Multimodal Method for Temporal Forgery Localization. In 2022 International Conference on Digital Image Computing: Techniques and Applications. 1-10","author":"Cai Zhixi","year":"2022","unstructured":"Zhixi Cai, Kalin Stefanov, Abhinav Dhall, and Munawar Hayat. 2022. Do You Really Mean That? Content Driven Audio-Visual Deepfake Dataset and Multimodal Method for Temporal Forgery Localization. In 2022 International Conference on Digital Image Computing: Techniques and Applications. 1-10."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3253390"},{"key":"e_1_3_2_1_6_1","volume-title":"International Conference on Machine Learning. 1597-1607","author":"Chen Ting","year":"2020","unstructured":"Ting Chen, Simon Kornblith, Mohammad Norouzi, and Geoffrey Hinton. 2020. A Simple Framework for Contrastive Learning of Visual Representations. In International Conference on Machine Learning. 1597-1607."},{"key":"e_1_3_2_1_7_1","volume-title":"Adaptive Zone Learning for Weakly Supervised Object Localization","author":"Chen Zhiwei","year":"2024","unstructured":"Zhiwei Chen, Siwei Wang, Liujuan Cao, Yunhang Shen, and Rongrong Ji. 2024. Adaptive Zone Learning for Weakly Supervised Object Localization. IEEE Transactions on Neural Networks and Learning Systems (2024)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413700"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00389"},{"key":"e_1_3_2_1_10_1","volume-title":"Full-Stage Pseudo Label Quality Enhancement for Weakly-supervised Temporal Action Localization. arXiv preprint arXiv:2407.08971","author":"Feng Qianhan","year":"2024","unstructured":"Qianhan Feng, Wenshuo Li, Tong Lin, and Xinghao Chen. 2024. Full-Stage Pseudo Label Quality Enhancement for Weakly-supervised Temporal Action Localization. arXiv preprint arXiv:2407.08971 (2024)."},{"key":"e_1_3_2_1_11_1","volume-title":"Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence. 758-766","author":"Fu Haolong","year":"2024","unstructured":"Haolong Fu, Jin Yuan, Guojin Zhong, Xuan He, Jiacheng Lin, and Zhiyong Li. 2024. CF-Deformable DETR: An End-to-End Alignment-Free Model for Weakly Aligned Visible-Infrared Object Detection. In Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence. 758-766."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"Zhihao Gu Taiping Yao Yang Chen Ran Yi Shouhong Ding and Lizhuang Ma. 2022. Region-Aware Temporal Inconsistency Learning for DeepFake Video Detection. 920-926.","DOI":"10.24963\/ijcai.2022\/129"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00436"},{"key":"e_1_3_2_1_15_1","volume-title":"Complete Instances Mining for Weakly Supervised Instance Segmentation. arXiv preprint arXiv:2402.07633","author":"Li Zecheng","year":"2024","unstructured":"Zecheng Li, Zening Zeng, Yuqi Liang, and Jin-Gang Yu. 2024. Complete Instances Mining for Weakly Supervised Instance Segmentation. arXiv preprint arXiv:2402.07633 (2024)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681537"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3326694"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3233063"},{"key":"e_1_3_2_1_19_1","first-page":"746","volume-title":"Nature","volume":"264","author":"McGurk Harry","year":"1976","unstructured":"Harry McGurk and John MacDonald. 1976. Hearing lips and seeing voices. Nature, Vol. 264, 5588 (1976), 746-748."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681672"},{"key":"e_1_3_2_1_21_1","volume-title":"BYOL for Audio: Self-Supervised Learning for General-Purpose Audio Representation. In 2021 International Joint Conference on Neural Networks. 1-8.","author":"Niizumi Daisuke","year":"2021","unstructured":"Daisuke Niizumi, Daiki Takeuchi, Yasunori Ohishi, Noboru Harada, and Kunio Kashino. 2021. BYOL for Audio: Self-Supervised Learning for General-Purpose Audio Representation. In 2021 International Joint Conference on Neural Networks. 1-8."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3093446"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i1.32054"},{"key":"e_1_3_2_1_24_1","volume-title":"DiRLoc: Disentanglement Representation Learning for Robust Image Forgery Localization","author":"Sheng Ziqi","year":"2024","unstructured":"Ziqi Sheng, Zuomin Qu, Wei Lu, Xiaochun Cao, and Jiwu Huang. 2024. DiRLoc: Disentanglement Representation Learning for Robust Image Forgery Localization. IEEE Transactions on Dependable and Secure Computing (2024)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01808"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01939"},{"key":"e_1_3_2_1_27_1","volume-title":"Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence. 1281-1289","author":"Su Hao","year":"2024","unstructured":"Hao Su and Meng Yang. 2024. A Consistency and Integration Model with Adaptive Thresholds for Weakly Supervised Object Localization. In Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence. 1281-1289."},{"key":"e_1_3_2_1_28_1","volume-title":"Temporal Action Localization in the Deep Learning Era: A Survey","author":"Wang Binglu","year":"2023","unstructured":"Binglu Wang, Yongqiang Zhao, Le Yang, Teng Long, and Xuelong Li. 2023. Temporal Action Localization in the Deep Learning Era: A Survey. IEEE Transactions on Pattern Analysis and Machine Intelligence (2023)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2868668"},{"key":"e_1_3_2_1_30_1","volume-title":"Weakly-supervised Audio Temporal Forgery Localization via Progressive Audio-language Co-learning Network. arXiv preprint arXiv:2505.01880","author":"Wu Junyan","year":"2025","unstructured":"Junyan Wu, Wenbo Xu, Wei Lu, Xiangyang Luo, Rui Yang, and Shize Guo. 2025. Weakly-supervised Audio Temporal Forgery Localization via Progressive Audio-language Co-learning Network. arXiv preprint arXiv:2505.01880 (2025)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2024.3361151"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-024-02128-1"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2023.3290752"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i7.28516"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01575"},{"key":"e_1_3_2_1_36_1","volume-title":"ActionFormer: Localizing Moments of Actions with Transformers. In European Conference on Computer Vision. 492-510","author":"Zhang Chen-Lin","year":"2022","unstructured":"Chen-Lin Zhang, Jianxin Wu, and Yin Li. 2022. ActionFormer: Localizing Moments of Actions with Transformers. In European Conference on Computer Vision. 492-510."},{"key":"e_1_3_2_1_37_1","first-page":"5866","article-title":"Weakly Supervised Object Localization and Detection: A Survey","volume":"44","author":"Zhang Dingwen","year":"2021","unstructured":"Dingwen Zhang, Junwei Han, Gong Cheng, and Ming-Hsuan Yang. 2021b. Weakly Supervised Object Localization and Detection: A Survey. IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 44, 9 (2021), 5866-5885.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3613767"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3688984"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01453"},{"key":"e_1_3_2_1_41_1","volume-title":"A brief introduction to weakly supervised learning. National science review","author":"Zhou Zhi-Hua","year":"2018","unstructured":"Zhi-Hua Zhou. 2018. A brief introduction to weakly supervised learning. National science review, Vol. 5, 1 (2018), 44-53."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.110077"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755534","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:21:11Z","timestamp":1765308071000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755534"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":42,"alternative-id":["10.1145\/3746027.3755534","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755534","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}