{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:52:49Z","timestamp":1764550369217,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","funder":[{"name":"the Fundamental Research Funds for the Central Universities","award":["3282024049"],"award-info":[{"award-number":["3282024049"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746265.3759665","type":"proceedings-article","created":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T22:20:29Z","timestamp":1759962029000},"page":"110-117","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["VAD-Lip: Visual and Audio Deepfake Detection via Lip Features"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-0974-3191","authenticated-orcid":false,"given":"JinYu","family":"Wang","sequence":"first","affiliation":[{"name":"Beijing Electronic Science and Technology Institute, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3873-1653","authenticated-orcid":false,"given":"Xin","family":"Jin","sequence":"additional","affiliation":[{"name":"Cyber Security, Beijing Electronic Science and Technology Institute, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-0348-1893","authenticated-orcid":false,"given":"Huaye","family":"Wang","sequence":"additional","affiliation":[{"name":"Beijing Electronic Science and Technology Institute, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-5434-9573","authenticated-orcid":false,"given":"Longteng","family":"Jiang","sequence":"additional","affiliation":[{"name":"Beijing Electronic Science and Technology Institute, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/WIFS.2018.8630761"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIRCA51532.2021.9544734"},{"key":"e_1_3_2_1_3_1","volume-title":"Lipnet: End-to-end sentence-level lipreading. arXiv preprint arXiv:1611.01599","author":"Assael Yannis M","year":"2016","unstructured":"Yannis M Assael, Brendan Shillingford, Shimon Whiteson, and Nando De Freitas. 2016. Lipnet: End-to-end sentence-level lipreading. arXiv preprint arXiv:1611.01599 (2016)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/DICTA56598.2022.10034605"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i3.32241"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.195"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413700"},{"key":"e_1_3_2_1_8_1","volume-title":"Predictions of subjective ratings and spoofing assessments of voice conversion challenge 2020 submissions. arXiv preprint arXiv:2009.03554","author":"Das Rohan Kumar","year":"2020","unstructured":"Rohan Kumar Das, Tomi Kinnunen, Wen-Chin Huang, Zhenhua Ling, Junichi Yamagishi, Yi Zhao, Xiaohai Tian, and Tomoki Toda. 2020. Predictions of subjective ratings and spoofing assessments of voice conversion challenge 2020 submissions. arXiv preprint arXiv:2009.03554 (2020)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00525"},{"key":"e_1_3_2_1_10_1","volume-title":"The deepfake detection challenge (dfdc) dataset. arXiv preprint arXiv:2006.07397","author":"Dolhansky Brian","year":"2020","unstructured":"Brian Dolhansky, Joanna Bitton, Ben Pflaum, Jikuo Lu, Russ Howes, Menglin Wang, and Cristian Canton Ferrer. 2020. The deepfake detection challenge (dfdc) dataset. arXiv preprint arXiv:2006.07397 (2020)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00434"},{"key":"e_1_3_2_1_12_1","volume-title":"EfficientNet-Based Deepfake Detection: A Robust Approach for Real and Fake Media Classification. In 2024 Global Conference on Communications and Information Technologies (GCCIT). IEEE, 1-6.","author":"Jain Eshika","year":"2024","unstructured":"Eshika Jain and Danish Kundra. 2024. EfficientNet-Based Deepfake Detection: A Robust Approach for Real and Fake Media Classification. In 2024 Global Conference on Communications and Information Technologies (GCCIT). IEEE, 1-6."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00296"},{"key":"e_1_3_2_1_14_1","volume-title":"FakeAVCeleb: A novel audio-video multimodal deepfake dataset. arXiv preprint arXiv:2108.05080","author":"Khalid Hasam","year":"2021","unstructured":"Hasam Khalid, Shahroz Tariq, Minha Kim, and Simon S Woo. 2021. FakeAVCeleb: A novel audio-video multimodal deepfake dataset. arXiv preprint arXiv:2108.05080 (2021)."},{"volume-title":"Convolutional neural networks with swift for tensorflow: image recognition and dataset categorization","author":"Koonce Brett","key":"e_1_3_2_1_15_1","unstructured":"Brett Koonce. 2021. ResNet 34. In Convolutional neural networks with swift for tensorflow: image recognition and dataset categorization. Springer, 51-61."},{"key":"e_1_3_2_1_16_1","volume-title":"Deepfakes: a new threat to face recognition? assessment and detection. arXiv preprint arXiv:1812.08685","author":"Korshunov Pavel","year":"2018","unstructured":"Pavel Korshunov and S\u00e9bastien Marcel. 2018. Deepfakes: a new threat to face recognition? assessment and detection. arXiv preprint arXiv:1812.08685 (2018)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00327"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413570"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413532"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00009"},{"key":"e_1_3_2_1_21_1","volume-title":"Traitement du Signal","volume":"40","author":"Saxena Akash","year":"2023","unstructured":"Akash Saxena, Dharmendra Yadav, Manish Gupta, Sunil Phulre, Tripti Arjariya, Varshali Jaiswal, and Rakesh Kumar Bhujade. 2023. Detecting Deepfakes: A Novel Framework Employing XceptionNet-Based Convolutional Neural Networks. Traitement du Signal, Vol. 40, 3 (2023)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.367"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2017.2761539"},{"key":"e_1_3_2_1_24_1","volume-title":"Tacotron: Towards end-to-end speech synthesis. arXiv preprint arXiv:1703.10135","author":"Wang Yuxuan","year":"2017","unstructured":"Yuxuan Wang, RJ Skerry-Ryan, Daisy Stanton, Yonghui Wu, Ron J Weiss, Navdeep Jaitly, Zongheng Yang, Ying Xiao, Zhifeng Chen, Samy Bengio, et al., 2017. Tacotron: Towards end-to-end speech synthesis. arXiv preprint arXiv:1703.10135 (2017)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12234"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2023.3262148"},{"key":"e_1_3_2_1_27_1","first-page":"2","article-title":"Multi-task learning for audio-visual active speaker detection","volume":"4","author":"Zhang Yuan-Hang","year":"2019","unstructured":"Yuan-Hang Zhang, Jingyun Xiao, Shuang Yang, and Shiguang Shan. 2019. Multi-task learning for audio-visual active speaker detection. The ActivityNet Large-Scale Activity Recognition Challenge, Vol. 4 (2019), 2.","journal-title":"The ActivityNet Large-Scale Activity Recognition Challenge"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.229"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01453"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413769"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 1st on Deepfake Forensics Workshop: Detection, Attribution, Recognition, and Adversarial Challenges in the Era of AI-Generated Media"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746265.3759665","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:51:07Z","timestamp":1764550267000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746265.3759665"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":30,"alternative-id":["10.1145\/3746265.3759665","10.1145\/3746265"],"URL":"https:\/\/doi.org\/10.1145\/3746265.3759665","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}