{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T13:10:27Z","timestamp":1765545027957,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":57,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T00:00:00Z","timestamp":1743379200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,31]]},"DOI":"10.1145\/3672608.3707934","type":"proceedings-article","created":{"date-parts":[[2025,5,14]],"date-time":"2025-05-14T18:30:17Z","timestamp":1747247417000},"page":"767-774","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["X3A: Efficient Multimodal Deepfake Detection with Score-Level Fusion"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-9511-9309","authenticated-orcid":false,"given":"Chan","family":"Park","sequence":"first","affiliation":[{"name":"Sungkyunkwan University, Suwon, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-0008-0727","authenticated-orcid":false,"given":"Bohyun","family":"Moon","sequence":"additional","affiliation":[{"name":"Sungkyunkwan University, Suwon, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-8519-4979","authenticated-orcid":false,"given":"Minsun","family":"Jeon","sequence":"additional","affiliation":[{"name":"Sungkyunkwan University, Suwon, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0505-2988","authenticated-orcid":false,"given":"Jee-weon","family":"Jung","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8983-1542","authenticated-orcid":false,"given":"Simon S.","family":"Woo","sequence":"additional","affiliation":[{"name":"Sungkyunkwan University, Suwon, Republic of Korea"}]}],"member":"320","published-online":{"date-parts":[[2025,5,14]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2024. Deepfake Video of Zelensky's Wife Raises Concerns. https:\/\/edition.cnn.com\/2024\/07\/02\/europe\/deepfake-video-zelensky-wife-intl-latam\/index.html Accessed: 2024-10-03."},{"key":"e_1_3_2_1_2_1","unstructured":"2024. Furious Ducky Bhai Offers Rs1 Million for Original Content That Led to Wife's AI-Generated Video. https:\/\/tribune.com.pk\/story\/2464434\/furious-ducky-bhai-offers-rs1-million-for-original-content-that-led-to-wifes-ai-generated-video Accessed: 2024-10-03."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/WIFS.2018.8630761"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00383"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00152"},{"key":"e_1_3_2_1_6_1","volume-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations. Advances in neural information processing systems 33","author":"Baevski Alexei","year":"2020","unstructured":"Alexei Baevski, Yuhao Zhou, Abdelrahman Mohamed, and Michael Auli. 2020. wav2vec 2.0: A framework for self-supervised learning of speech representations. Advances in neural information processing systems 33 (2020), 12449\u201312460."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3595353.3595885"},{"key":"e_1_3_2_1_8_1","volume-title":"Munawar Hayat, Abhinav Dhall, and Kalin Stefanov.","author":"Cai Zhixi","year":"2023","unstructured":"Zhixi Cai, Shreya Ghosh, Aman Pankaj Adatia, Munawar Hayat, Abhinav Dhall, and Kalin Stefanov. 2023. AV-Deepfake 1M: A large-scale LLM-driven audio-visual deepfake dataset. arXiv preprint arXiv:2311.15308 (2023)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2023.103818"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/DICTA56598.2022.10034605"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00408"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00408"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01815"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413700"},{"key":"e_1_3_2_1_16_1","volume-title":"Deepfake detection using spatiotemporal convolutional networks. arXiv preprint arXiv:2006.14749","author":"Lima Oscar De","year":"2020","unstructured":"Oscar De Lima, Sean Franklin, Shreshtha Basu, Blake Karwoski, and Annet George. 2020. Deepfake detection using spatiotemporal convolutional networks. arXiv preprint arXiv:2006.14749 (2020)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2867747"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00028"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01011"},{"key":"e_1_3_2_1_20_1","volume-title":"International conference on machine learning. PMLR, 3247\u20133258","author":"Frank Joel","year":"2020","unstructured":"Joel Frank, Thorsten Eisenhofer, Lea Sch\u00f6nherr, Asja Fischer, Dorothea Kolossa, and Thorsten Holz. 2020. Leveraging frequency analysis for deep fake image recognition. In International conference on machine learning. PMLR, 3247\u20133258."},{"key":"e_1_3_2_1_21_1","volume-title":"Spatio-temporal features for generalized detection of deepfake videos. arXiv preprint arXiv:2010.11844","author":"Ganiyusufoglu Ipek","year":"2020","unstructured":"Ipek Ganiyusufoglu, L Minh Ng\u00f4, Nedko Savov, Sezer Karaoglu, and Theo Gevers. 2020. Spatio-temporal features for generalized detection of deepfake videos. arXiv preprint arXiv:2010.11844 (2020)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i1.19955"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/AVSS.2018.8639163"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01453"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01453"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2021.3089437"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.179"},{"key":"e_1_3_2_1_28_1","volume-title":"Rawnet: Advanced end-to-end deep neural network using raw waveforms for text-independent speaker verification. arXiv preprint arXiv:1904.08104","author":"Heo Hee-Soo","year":"2019","unstructured":"Jee-weon Jung, Hee-Soo Heo, Ju-ho Kim, Hye-jin Shim, and Ha-Jin Yu. 2019. Rawnet: Advanced end-to-end deep neural network using raw waveforms for text-independent speaker verification. arXiv preprint arXiv:1904.08104 (2019)."},{"key":"e_1_3_2_1_29_1","volume-title":"ICASSP 2022-2022 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, 6367\u20136371","author":"Heo Hee-Soo","year":"2022","unstructured":"Jee-weon Jung, Hee-Soo Heo, Hemlata Tak, Hye-jin Shim, Joon Son Chung, Bong-Jin Lee, Ha-Jin Yu, and Nicholas Evans. 2022. Aasist: Audio anti-spoofing using integrated spectro-temporal graph attention networks. In ICASSP 2022-2022 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, 6367\u20136371."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA58977.2023.00207"},{"key":"e_1_3_2_1_31_1","volume-title":"Contextual Cross-Modal Attention for Audio-Visual Deepfake Detection and Localization. arXiv preprint arXiv:2408.01532","author":"Katamneni Vinaya Sree","year":"2024","unstructured":"Vinaya Sree Katamneni and Ajita Rattani. 2024. Contextual Cross-Modal Attention for Audio-Visual Deepfake Detection and Localization. arXiv preprint arXiv:2408.01532 (2024)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-024-10810-6"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3476099.3484315"},{"key":"e_1_3_2_1_34_1","volume-title":"FakeAVCeleb: A novel audio-video multimodal deepfake dataset. arXiv preprint arXiv:2108.05080","author":"Khalid Hasam","year":"2021","unstructured":"Hasam Khalid, Shahroz Tariq, Minha Kim, and Simon S Woo. 2021. FakeAVCeleb: A novel audio-video multimodal deepfake dataset. arXiv preprint arXiv:2108.05080 (2021)."},{"key":"e_1_3_2_1_35_1","volume-title":"Why South Korea is on high alert over deepfake sex crimes","author":"Kim Hyunsu","year":"2024","unstructured":"Hyunsu Kim. 2024. Why South Korea is on high alert over deepfake sex crimes. Reuters (30 August 2024). https:\/\/www.reuters.com\/world\/asia-pacific\/why-south-korea-is-high-alert-over-deepfake-sex-crimes-2024-08-30\/ Accessed on [2024-10-03]."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/WIFS.2018.8630787"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3586183.3606775"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2019.8803661"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/URTC60662.2023.10534969"},{"key":"e_1_3_2_1_40_1","volume-title":"Deepfake generation and detection: A benchmark and survey. arXiv preprint arXiv:2403.17881","author":"Pei Gan","year":"2024","unstructured":"Gan Pei, Jiangning Zhang, Menghan Hu, Guangtao Zhai, Chengjie Wang, Zhenyu Zhang, Jian Yang, Chunhua Shen, and Dacheng Tao. 2024. Deepfake generation and detection: A benchmark and survey. arXiv preprint arXiv:2403.17881 (2024)."},{"key":"e_1_3_2_1_41_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 993\u20131000","author":"Raza Muhammad Anas","year":"2023","unstructured":"Muhammad Anas Raza and Khalid Mahmood Malik. 2023. Multimodaltrace: Deepfake detection using audiovisual representation learning. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 993\u20131000."},{"key":"e_1_3_2_1_42_1","first-page":"80","article-title":"Recurrent convolutional strategies for face manipulation detection in videos","volume":"3","author":"Sabir Ekraam","year":"2019","unstructured":"Ekraam Sabir, Jiaxin Cheng, Ayush Jaiswal, Wael AbdAlmageed, Iacopo Masi, and Prem Natarajan. 2019. Recurrent convolutional strategies for face manipulation detection in videos. Interfaces (GUI) 3, 1 (2019), 80\u201387.","journal-title":"Interfaces (GUI)"},{"volume-title":"A hybrid CNN-LSTM model for video deepfake detection by leveraging optical flow features. In 2022 international joint conference on neural networks (IJCNN)","author":"Saikia Pallabi","key":"e_1_3_2_1_43_1","unstructured":"Pallabi Saikia, Dhwani Dholaria, Priyanka Yadav, Vaidehi Patel, and Mohendra Roy. 2022. A hybrid CNN-LSTM model for video deepfake detection by leveraging optical flow features. In 2022 international joint conference on neural networks (IJCNN). IEEE, 1\u20137."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.23919\/APSIPAASC55919.2022.9980296"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01816"},{"key":"e_1_3_2_1_46_1","unstructured":"Yadvender Singh. 2024. MMTFD: A multimodal detector for temporal forgeries detection. Ph.D. Dissertation. Faculty of the Graduate School of the University at Buffalo The State ...."},{"key":"e_1_3_2_1_47_1","volume-title":"End-to-end spectro-temporal graph attention networks for speaker verification anti-spoofing and speech deepfake detection. arXiv preprint arXiv:2107.12710","author":"Tak Hemlata","year":"2021","unstructured":"Hemlata Tak, Jee-weon Jung, Jose Patino, Madhu Kamble, Massimiliano Todisco, and Nicholas Evans. 2021. End-to-end spectro-temporal graph attention networks for speaker verification anti-spoofing and speech deepfake detection. arXiv preprint arXiv:2107.12710 (2021)."},{"key":"e_1_3_2_1_48_1","volume-title":"Automatic speaker verification spoofing and deepfake detection using wav2vec 2.0 and data augmentation. arXiv preprint arXiv:2202.12233","author":"Tak Hemlata","year":"2022","unstructured":"Hemlata Tak, Massimiliano Todisco, Xin Wang, Jee-weon Jung, Junichi Yamagishi, and Nicholas Evans. 2022. Automatic speaker verification spoofing and deepfake detection using wav2vec 2.0 and data augmentation. arXiv preprint arXiv:2202.12233 (2022)."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01165"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"e_1_3_2_1_51_1","volume-title":"Hye jin Shim, Ju ho Kim, and Ha-Jin Yu.","author":"Jung Jee","year":"2020","unstructured":"Jee weon Jung, Seung bin Kim, Hye jin Shim, Ju ho Kim, and Ha-Jin Yu. 2020. Improved RawNet with Feature Map Scaling for Text-independent Speaker Verification using Raw Waveforms. arXiv:2004.00526 [eess.AS] https:\/\/arxiv.org\/abs\/2004.00526"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-024-02128-1"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2022.3233236"},{"key":"e_1_3_2_1_54_1","volume-title":"An initial investigation for detecting partially spoofed audio. arXiv preprint arXiv:2104.02518","author":"Zhang Lin","year":"2021","unstructured":"Lin Zhang, Xin Wang, Erica Cooper, Junichi Yamagishi, Jose Patino, and Nicholas Evans. 2021. An initial investigation for detecting partially spoofed audio. arXiv preprint arXiv:2104.02518 (2021)."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01477"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01453"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20065-6_23"}],"event":{"name":"SAC '25: 40th ACM\/SIGAPP Symposium on Applied Computing","sponsor":["SIGAPP ACM Special Interest Group on Applied Computing"],"location":"Catania International Airport Catania Italy","acronym":"SAC '25"},"container-title":["Proceedings of the 40th ACM\/SIGAPP Symposium on Applied Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3672608.3707934","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3672608.3707934","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:57:36Z","timestamp":1750298256000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3672608.3707934"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,31]]},"references-count":57,"alternative-id":["10.1145\/3672608.3707934","10.1145\/3672608"],"URL":"https:\/\/doi.org\/10.1145\/3672608.3707934","relation":{},"subject":[],"published":{"date-parts":[[2025,3,31]]},"assertion":[{"value":"2025-05-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}