{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T05:27:05Z","timestamp":1781587625035,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":56,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Major Key Project of PCL","award":["PCL2023A05"],"award-info":[{"award-number":["PCL2023A05"]}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/100017052","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U23B2022, U22A2030"],"award-info":[{"award-number":["U23B2022, U22A2030"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/100017052","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Guangdong Major Project of Basic and Applied Basic Research","award":["2023B0303000010"],"award-info":[{"award-number":["2023B0303000010"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681672","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:41Z","timestamp":1729925981000},"page":"6297-6306","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":14,"title":["FRADE: Forgery-aware Audio-distilled Multimodal Learning for Deepfake Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-6867-498X","authenticated-orcid":false,"given":"Fan","family":"Nie","sequence":"first","affiliation":[{"name":"School of Computer Science and Engineering, Sun Yat-Sen University &amp; Pengcheng Laboratory, Guangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7520-9031","authenticated-orcid":false,"given":"Jiangqun","family":"Ni","sequence":"additional","affiliation":[{"name":"School of Cyber Science and Technology, Sun Yat-Sen University &amp; Pengcheng Laboratory, Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-9632-3785","authenticated-orcid":false,"given":"Jian","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Sun Yat-Sen University, Guangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-5012-7151","authenticated-orcid":false,"given":"Bin","family":"Zhang","sequence":"additional","affiliation":[{"name":"Pengcheng Laboratory, Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4783-876X","authenticated-orcid":false,"given":"Weizhe","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Cyberspace Science, Harbin Institute of Technology &amp; Pengcheng Laboratory, Harbin, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1609\/AAAI.V35I2.16193"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413700"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.21437\/INTERSPEECH.2020--2650"},{"key":"e_1_3_2_1_5_1","volume-title":"The deepfake detection challenge (dfdc) dataset. arXiv preprint arXiv:2006.07397","author":"Dolhansky Brian","year":"2020","unstructured":"Brian Dolhansky, Joanna Bitton, Ben Pflaum, Jikuo Lu, Russ Howes, Menglin Wang, and Cristian Canton Ferrer. 2020. The deepfake detection challenge (dfdc) dataset. arXiv preprint arXiv:2006.07397 (2020)."},{"key":"e_1_3_2_1_6_1","volume-title":"9th International Conference on Learning Representations, ICLR. https:\/\/openreview.net\/forum?id=YicbFdNTTy","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In 9th International Conference on Learning Representations, ICLR. https:\/\/openreview.net\/forum?id=YicbFdNTTy"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01963"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01011"},{"key":"e_1_3_2_1_9_1","volume-title":"Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems NeurIPS.","author":"Grill Jean-Bastien","year":"2020","unstructured":"Jean-Bastien Grill, Florian Strub, Florent Altch\u00e9, Corentin Tallec, Pierre H. Richemond, Elena Buchatskaya, Carl Doersch, Bernardo \u00c1vila Pires, Zhaohan Guo, Mohammad Gheshlaghi Azar, Bilal Piot, Koray Kavukcuoglu, R\u00e9mi Munos, and Michal Valko. 2020. Bootstrap Your Own Latent - A New Approach to Self-Supervised Learning. In Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems NeurIPS."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1609\/AAAI.V36I1.19955"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.24963\/IJCAI.2022"},{"key":"e_1_3_2_1_12_1","volume-title":"Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems.","author":"Guan Jiazhi","year":"2022","unstructured":"Jiazhi Guan, Hang Zhou, Zhibin Hong, Errui Ding, Jingdong Wang, Chengbin Quan, and Youjian Zhao. 2022. Delving into Sequential Patches for Deepfake Detection. In Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01453"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00500"},{"key":"e_1_3_2_1_15_1","volume-title":"Proceedings of the 36th International Conference on Machine Learning, ICML","volume":"97","author":"Houlsby Neil","year":"2019","unstructured":"Neil Houlsby, Andrei Giurgiu, Stanislaw Jastrzebski, Bruna Morrone, Quentin de Laroussilhe, Andrea Gesmundo, Mona Attariyan, and Sylvain Gelly. 2019. Parameter-Efficient Transfer Learning for NLP. In Proceedings of the 36th International Conference on Machine Learning, ICML, Vol. 97. 2790--2799."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00589"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/J.ASOC.2023.110124"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1609\/AAAI.V36I1.19990"},{"key":"e_1_3_2_1_19_1","volume-title":"Advances in Neural Information Processing Systems 31: Annual Conference on Neural Information Processing Systems NeurIPS. 4485--4495","author":"Jia Ye","year":"2018","unstructured":"Ye Jia, Yu Zhang, Ron J. Weiss, Quan Wang, Jonathan Shen, Fei Ren, Zhifeng Chen, Patrick Nguyen, Ruoming Pang, Ignacio L\u00f3pez-Moreno, and Yonghui Wu. 2018. Transfer Learning from Speaker Verification to Multispeaker Text-To-Speech Synthesis. In Advances in Neural Information Processing Systems 31: Annual Conference on Neural Information Processing Systems NeurIPS. 4485--4495."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","unstructured":"Jee-weon Jung Hee-Soo Heo Ju-ho Kim Hye-jin Shim and Ha-Jin Yu. 2019. RawNet: Advanced End-to-End Deep Neural Network Using Raw Waveforms for Text-Independent Speaker Verification. In Annual Conference of the International Speech Communication Association Interspeech. 1268--1272. https:\/\/doi.org\/10.21437\/INTERSPEECH.2019--1982","DOI":"10.21437\/INTERSPEECH.2019--1982"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747766"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","unstructured":"Jee-weon Jung Seung-bin Kim Hye-jin Shim Ju-ho Kim and Ha-Jin Yu. 2020. Improved RawNet with Feature Map Scaling for Text-Independent Speaker Verification Using Raw Waveforms. In Annual Conference of the International Speech Communication Association Interspeech. 1496--1500. https:\/\/doi.org\/10.21437\/INTERSPEECH.2020--1011","DOI":"10.21437\/INTERSPEECH.2020--1011"},{"key":"e_1_3_2_1_23_1","volume-title":"Woo","author":"Khalid Hasam","year":"2021","unstructured":"Hasam Khalid, Shahroz Tariq, Minha Kim, and Simon S. Woo. 2021. FakeAVCeleb: A Novel Audio-Video Multimodal Deepfake Dataset. In Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks 1, NeurIPS Datasets and Benchmarks."},{"key":"e_1_3_2_1_24_1","volume-title":"Deepfakes: a new threat to face recognition? assessment and detection. arXiv preprint arXiv:1812.08685","author":"Korshunov Pavel","year":"2018","unstructured":"Pavel Korshunov and S\u00e9bastien Marcel. 2018. Deepfakes: a new threat to face recognition? assessment and detection. arXiv preprint arXiv:1812.08685 (2018)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01057"},{"key":"e_1_3_2_1_26_1","volume-title":"Vision Transformer for Small-Size Datasets. CoRR","author":"Lee Seung Hoon","year":"2021","unstructured":"Seung Hoon Lee, Seunghyun Lee, and Byung Cheol Song. 2021. Vision Transformer for Small-Size Datasets. CoRR, Vol. abs\/2112.13492 (2021)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00505"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00228"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00083"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1016\/J.PATCOG.2023.109628"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3326694"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3312738"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_34_1","volume-title":"Forgery-aware adaptive vision transformer for face forgery detection. arXiv preprint arXiv:2309.11092","author":"Luo Anwei","year":"2023","unstructured":"Anwei Luo, Rizhao Cai, Chenqi Kong, Xiangui Kang, Jiwu Huang, and Alex C Kot. 2023. Forgery-aware adaptive vision transformer for face forgery detection. arXiv preprint arXiv:2309.11092 (2023)."},{"key":"e_1_3_2_1_35_1","unstructured":"Kowalski Marek. 2020. FaceSwap. https:\/\/github.com\/MarekKowalski\/FaceSwap"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2022.3233774"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413570"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00728"},{"key":"e_1_3_2_1_39_1","volume-title":"ST-Adapter: Parameter-Efficient Image-to-Video Transfer Learning. In Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems NeurIPS.","author":"Pan Junting","year":"2022","unstructured":"Junting Pan, Ziyi Lin, Xiatian Zhu, Jing Shao, and Hongsheng Li. 2022. ST-Adapter: Parameter-Efficient Image-to-Video Transfer Learning. In Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems NeurIPS."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413532"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00009"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01816"},{"key":"e_1_3_2_1_43_1","volume-title":"First Order Motion Model for Image Animation. In Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems NeurIPS. 7135--7145","author":"Siarohin Aliaksandr","year":"2019","unstructured":"Aliaksandr Siarohin, St\u00e9phane Lathuili\u00e8re, Sergey Tulyakov, Elisa Ricci, and Nicu Sebe. 2019. First Order Motion Model for Image Animation. In Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems NeurIPS. 7135--7145."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.97"},{"key":"e_1_3_2_1_45_1","volume-title":"TVLT: Textless Vision-Language Transformer. In Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems NeurIPS.","author":"Tang Zineng","year":"2022","unstructured":"Zineng Tang, Jaemin Cho, Yixin Nie, and Mohit Bansal. 2022. TVLT: Textless Vision-Language Transformer. In Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems NeurIPS."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00402"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2023.3262148"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3309899"},{"key":"e_1_3_2_1_49_1","volume-title":"Audio-driven talking face video generation with learning-based personalized head pose. arXiv preprint arXiv:2002.10137","author":"Yi Ran","year":"2020","unstructured":"Ran Yi, Zipeng Ye, Juyong Zhang, Hujun Bao, and Yong-Jin Liu. 2020. Audio-driven talking face video generation with learning-based personalized head pose. arXiv preprint arXiv:2002.10137 (2020)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00384"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2016.2603342"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3625100"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00222"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00828"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01477"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01453"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681672","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681672","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:50Z","timestamp":1750295870000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681672"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":56,"alternative-id":["10.1145\/3664647.3681672","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681672","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}