{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T10:01:29Z","timestamp":1771495289529,"version":"3.50.1"},"reference-count":38,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Engineering Applications of Artificial Intelligence"],"published-print":{"date-parts":[[2026,4]]},"DOI":"10.1016\/j.engappai.2026.114153","type":"journal-article","created":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T03:39:56Z","timestamp":1770781196000},"page":"114153","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Multi-granularity alignment and cross-modal reasoning for fake news video explanation"],"prefix":"10.1016","volume":"169","author":[{"given":"Chao","family":"Cheng","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Weiwei","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.engappai.2026.114153_bib1","series-title":"Proceedings of the Acl Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarization","first-page":"65","article-title":"METEOR: an automatic metric for MT evaluation with improved correlation with human judgments[C]","author":"Banerjee","year":"2005"},{"key":"10.1016\/j.engappai.2026.114153_bib2","series-title":"Proceedings of the 32nd ACM International Conference on Multimedia","first-page":"1351","article-title":"Fakingrecipe: detecting fake news on short video platforms from the perspective of creative process","author":"Bu","year":"2024"},{"key":"10.1016\/j.engappai.2026.114153_bib3","article-title":"Multimodal fake news video explanation: Dataset, analysis and evaluation","author":"Chen","year":"2025","journal-title":"arXiv preprint arXiv:2501.08514"},{"key":"10.1016\/j.engappai.2026.114153_bib4","article-title":"Rouge: a package for automatic evaluation of summaries[C]","volume":"2004","author":"Chin-Yew","year":"2004"},{"key":"10.1016\/j.engappai.2026.114153_bib5","series-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)","first-page":"4171","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2019"},{"issue":"10","key":"10.1016\/j.engappai.2026.114153_bib6","doi-asserted-by":"crossref","first-page":"2222","DOI":"10.1109\/TNNLS.2016.2582924","article-title":"LSTM: a search space odyssey","volume":"28","author":"Greff","year":"2016","journal-title":"IEEE Transact. Neural Networks Learn. Syst."},{"issue":"12","key":"10.1016\/j.engappai.2026.114153_bib7","doi-asserted-by":"crossref","first-page":"4453","DOI":"10.1109\/TCSVT.2019.2957309","article-title":"Video dialog via multi-grained convolutional self-attention context multi-modal networks","volume":"30","author":"Gu","year":"2019","journal-title":"IEEE Trans. Circ. Syst. Video Technol."},{"key":"10.1016\/j.engappai.2026.114153_bib8","series-title":"Proceedings of the 32nd ACM International Conference on Information and Knowledge Management","first-page":"659","article-title":"Interpretable fake news detection with graph evidence[C]","author":"Guo","year":"2023"},{"key":"10.1016\/j.engappai.2026.114153_bib9","doi-asserted-by":"crossref","DOI":"10.1109\/TCSS.2024.3373661","article-title":"Cross-modal attention network for detecting multimodal misinformation from multiple platforms","author":"Guo","year":"2024","journal-title":"IEEE Transactions on Computational Social Systems"},{"issue":"1","key":"10.1016\/j.engappai.2026.114153_bib10","doi-asserted-by":"crossref","first-page":"87","DOI":"10.1109\/TPAMI.2022.3152247","article-title":"A survey on vision transformer","volume":"45","author":"Han","year":"2022","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.engappai.2026.114153_bib11","article-title":"Gpt-4o system card","author":"Hurst","year":"2024","journal-title":"arXiv preprint arXiv:2410.21276"},{"key":"10.1016\/j.engappai.2026.114153_bib12","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"11039","article-title":"Haav: hierarchical aggregation of augmented views for image captioning[C]","author":"Kuo","year":"2023"},{"key":"10.1016\/j.engappai.2026.114153_bib13","series-title":"Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics","article-title":"BART: denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension","author":"Lewis","year":"2020"},{"key":"10.1016\/j.engappai.2026.114153_bib14","article-title":"A survey of multimodal fake news detection: a cross-modal interaction perspective","author":"Li","year":"2025","journal-title":"IEEE Trans. Emerg. Top. Comput. Intell."},{"key":"10.1016\/j.engappai.2026.114153_bib15","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"18041","article-title":"Show, deconfound and tell: image captioning with causal inference[C]","author":"Liu","year":"2022"},{"key":"10.1016\/j.engappai.2026.114153_bib16","first-page":"34892","article-title":"Visual instruction tuning","volume":"36","author":"Liu","year":"2023","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.engappai.2026.114153_bib17","first-page":"5","article-title":"Fixing weight decay regularization in adam[J]","author":"Loshchilov","year":"2017","journal-title":"arXiv preprint arXiv:1711.05101"},{"issue":"2","key":"10.1016\/j.engappai.2026.114153_bib18","doi-asserted-by":"crossref","first-page":"51","DOI":"10.3390\/bs12020051","article-title":"Trust, media credibility, social ties, and the intention to share towards information verification in an age of fake news","volume":"12","author":"Majerczak","year":"2022","journal-title":"Behav. Sci."},{"key":"10.1016\/j.engappai.2026.114153_bib19","series-title":"International Conference on Multimedia Modeling","first-page":"331","article-title":"Multi-modal semantic inconsistency detection in social media news posts","author":"McCrae","year":"2022"},{"issue":"3","key":"10.1016\/j.engappai.2026.114153_bib20","doi-asserted-by":"crossref","DOI":"10.30935\/ojcmt\/12083","article-title":"Developing fake news immunity: fallacies as misinformation triggers during the pandemic","volume":"12","author":"Musi","year":"2022","journal-title":"Online J. Commun. Media Technol."},{"key":"10.1016\/j.engappai.2026.114153_bib21","series-title":"Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics","first-page":"311","article-title":"Bleu: a method for automatic evaluation of machine translation[C]","author":"Papineni","year":"2002"},{"issue":"12","key":"10.1016\/j.engappai.2026.114153_bib22","first-page":"14444","article-title":"Fakesv: a multimodal benchmark with rich social context for fake news detection on short video platforms[C]","volume":"37","author":"Qi","year":"2023","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"10.1016\/j.engappai.2026.114153_bib23","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"13052","article-title":"SNIFFER: multimodal large language model for explainable out-of-context misinformation detection[C]","author":"Qi","year":"2024"},{"issue":"5","key":"10.1016\/j.engappai.2026.114153_bib24","doi-asserted-by":"crossref","DOI":"10.1016\/j.ipm.2025.104120","article-title":"Improving multimodal fake news detection by leveraging cross-modal content correlation","volume":"62","author":"Qiao","year":"2025","journal-title":"Inf. Process. Manag."},{"key":"10.1016\/j.engappai.2026.114153_bib25","series-title":"International Conference on Machine Learning","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision[C]","author":"Radford","year":"2021"},{"key":"10.1016\/j.engappai.2026.114153_bib26","series-title":"Sentence-BERT: Sentence Embeddings Using Siamese BERT-networks","author":"Reimers","year":"2019"},{"key":"10.1016\/j.engappai.2026.114153_bib27","series-title":"Proceedings of the 2023 ACM International Conference on Multimedia Retrieval","first-page":"316","article-title":"Graph interactive network with adaptive gradient for multi-modal rumor detection[C]","author":"Sun","year":"2023"},{"issue":"6","key":"10.1016\/j.engappai.2026.114153_bib28","doi-asserted-by":"crossref","first-page":"301","DOI":"10.1093\/jcmc\/zmab010","article-title":"Seeing is believing: is video modality more powerful in spreading fake news via online messaging apps?","volume":"26","author":"Sundar","year":"2021","journal-title":"J. Computer-Mediated Commun."},{"key":"10.1016\/j.engappai.2026.114153_bib29","series-title":"Proceedings of the 31st ACM International Conference on Multimedia","first-page":"5696","article-title":"Cross-modal contrastive learning for multimodal fake news detection","author":"Wang","year":"2023"},{"key":"10.1016\/j.engappai.2026.114153_bib30","article-title":"Qwen2-vl: enhancing vision-language model's perception of the world at any resolution","author":"Wang","year":"2024","journal-title":"arXiv preprint arXiv:2409.12191"},{"key":"10.1016\/j.engappai.2026.114153_bib31","series-title":"Proceedings of the ACM Web Conference 2024","first-page":"2452","article-title":"Explainable fake news detection with large language model via defense among competing wisdom[C]","author":"Wang","year":"2024"},{"key":"10.1016\/j.engappai.2026.114153_bib32","series-title":"FMNV: a Dataset of Media-Published News Videos for Fake News Detection[C]\/\/International Conference on Intelligent Computing","first-page":"321","author":"Wang","year":"2025"},{"issue":"18","key":"10.1016\/j.engappai.2026.114153_bib33","doi-asserted-by":"crossref","first-page":"20684","DOI":"10.1007\/s11227-023-05465-z","article-title":"Multi-head attention-based model for reconstructing continuous missing time series data","volume":"79","author":"Wu","year":"2023","journal-title":"J. Supercomput."},{"key":"10.1016\/j.engappai.2026.114153_bib34","series-title":"Aggregated Residual Transformations for Deep Neural networks[C]\/\/Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"1492","author":"Xie","year":"2017"},{"key":"10.1016\/j.engappai.2026.114153_bib35","series-title":"Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval","first-page":"2733","article-title":"End-to-end multimodal fact-checking and explanation generation: a challenging dataset and models[C]","author":"Yao","year":"2023"},{"issue":"5","key":"10.1016\/j.engappai.2026.114153_bib36","doi-asserted-by":"crossref","first-page":"6259","DOI":"10.1007\/s11042-021-11733-y","article-title":"Deepfake generation and detection, a survey","volume":"81","author":"Zhang","year":"2022","journal-title":"Multimed. Tool. Appl."},{"key":"10.1016\/j.engappai.2026.114153_bib37","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2024.123568","article-title":"Attention-guided multi-granularity fusion model for video summarization","volume":"249","author":"Zhang","year":"2024","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.engappai.2026.114153_bib38","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2024.123568","article-title":"Attention-guided multi-granularity fusion model for video summarization","volume":"249","author":"Zhang","year":"2024","journal-title":"Expert Syst. Appl."}],"container-title":["Engineering Applications of Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0952197626004343?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0952197626004343?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T08:44:08Z","timestamp":1771490648000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0952197626004343"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4]]},"references-count":38,"alternative-id":["S0952197626004343"],"URL":"https:\/\/doi.org\/10.1016\/j.engappai.2026.114153","relation":{},"ISSN":["0952-1976"],"issn-type":[{"value":"0952-1976","type":"print"}],"subject":[],"published":{"date-parts":[[2026,4]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Multi-granularity alignment and cross-modal reasoning for fake news video explanation","name":"articletitle","label":"Article Title"},{"value":"Engineering Applications of Artificial Intelligence","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.engappai.2026.114153","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"114153"}}