{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T02:40:36Z","timestamp":1759891236961,"version":"build-2065373602"},"publisher-location":"New York, NY, USA","reference-count":25,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,5,8]],"date-time":"2025-05-08T00:00:00Z","timestamp":1746662400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,5,8]]},"DOI":"10.1145\/3701716.3715283","type":"proceedings-article","created":{"date-parts":[[2025,5,23]],"date-time":"2025-05-23T16:09:41Z","timestamp":1748016581000},"page":"725-728","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Explainable Manipulated Videos Detection Using Multimodal Large Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9385-6101","authenticated-orcid":false,"given":"Khoa-Dang","family":"Tran","sequence":"first","affiliation":[{"name":"University of New South Wales, Sydney, NSW, Australia"}]}],"member":"320","published-online":{"date-parts":[[2025,5,23]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al. 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1002\/aaai.12188"},{"key":"e_1_3_2_2_3_1","volume-title":"Vicuna: An open-source chatbot impressing gpt-4 with 90%* chatgpt quality. See https:\/\/vicuna. lmsys. org (accessed","author":"Chiang Wei-Lin","year":"2023","unstructured":"Wei-Lin Chiang, Zhuohan Li, Zi Lin, Ying Sheng, Zhanghao Wu, Hao Zhang, Lianmin Zheng, Siyuan Zhuang, Yonghao Zhuang, Joseph E Gonzalez, et al. 2023. Vicuna: An open-source chatbot impressing gpt-4 with 90%* chatgpt quality. See https:\/\/vicuna. lmsys. org (accessed 14 April 2023) 2, 3 (2023), 6."},{"key":"e_1_3_2_2_4_1","volume-title":"Richard Youngs, and Kate Jones.","author":"Colomina Carme","year":"2021","unstructured":"Carme Colomina, H\u00e9ctor S\u00e1nchez Margalef, Richard Youngs, and Kate Jones. 2021. The impact of disinformation on democratic processes and human rights in the world. Brussels: European Parliament (2021), 1--19."},{"key":"e_1_3_2_2_5_1","volume-title":"d.]. The deepfake detection challenge (dfdc) preview dataset. arXiv","author":"Dolhansky B","year":"2019","unstructured":"B Dolhansky, R Howes, B Pflaum, N Baram, and CC Ferrer. [n. d.]. The deepfake detection challenge (dfdc) preview dataset. arXiv 2019. arXiv preprint arXiv:1910.08854 ([n. d.])."},{"key":"e_1_3_2_2_6_1","unstructured":"Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur Alan Schelten Amy Yang Angela Fan et al. 2024. The llama 3 herd of models. arXiv preprint arXiv:2407.21783 (2024)."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.122946"},{"key":"e_1_3_2_2_8_1","volume-title":"International conference on machine learning. PMLR","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In International conference on machine learning. PMLR, 19730--19742."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589334.3645381"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681089"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2023.102103"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.663"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACVW58289.2023.00071"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-021-11782-3"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01240"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00009"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW60793.2023.00048"},{"key":"e_1_3_2_2_18_1","unstructured":"Pranab Sahoo Ayush Kumar Singh Sriparna Saha Vinija Jain Samrat Mondal and Aman Chadha. [n. d.]. A Systematic Survey of Prompt Engineering in Large Language Models: Techniques and Applications. ([n. d.])."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/BigData52589.2021.9671928"},{"key":"e_1_3_2_2_20_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 6904--6913","author":"Shao Rui","year":"2023","unstructured":"Rui Shao, Tianxing Wu, and Ziwei Liu. 2023. Detecting and grounding multimodal media manipulation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 6904--6913."},{"key":"e_1_3_2_2_21_1","volume-title":"A systematic literature review on deepfake detection techniques. Multimedia Tools and Applications","author":"Sharma Vishal Kumar","year":"2024","unstructured":"Vishal Kumar Sharma, Rakesh Garg, and Quentin Caudron. 2024. A systematic literature review on deepfake detection techniques. Multimedia Tools and Applications (2024), 1--43."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/MMSP59012.2023.10337658"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1098\/rsos.230964"},{"key":"e_1_3_2_2_24_1","volume-title":"Video-llama: An instructiontuned audio-visual language model for video understanding. arXiv preprint arXiv:2306.02858","author":"Zhang Hang","year":"2023","unstructured":"Hang Zhang, Xin Li, and Lidong Bing. 2023. Video-llama: An instructiontuned audio-visual language model for video understanding. arXiv preprint arXiv:2306.02858 (2023)."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.amjmed.2023.02.011"}],"event":{"name":"WWW '25: The ACM Web Conference 2025","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Sydney NSW Australia","acronym":"WWW '25"},"container-title":["Companion Proceedings of the ACM on Web Conference 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701716.3715283","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3701716.3715283","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T02:01:09Z","timestamp":1759888869000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701716.3715283"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,8]]},"references-count":25,"alternative-id":["10.1145\/3701716.3715283","10.1145\/3701716"],"URL":"https:\/\/doi.org\/10.1145\/3701716.3715283","relation":{},"subject":[],"published":{"date-parts":[[2025,5,8]]},"assertion":[{"value":"2025-05-23","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}