{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,20]],"date-time":"2026-06-20T11:05:17Z","timestamp":1781953517042,"version":"3.54.5"},"reference-count":56,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62221005"],"award-info":[{"award-number":["62221005"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62576063"],"award-info":[{"award-number":["62576063"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62306056"],"award-info":[{"award-number":["62306056"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62136002"],"award-info":[{"award-number":["62136002"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of Chongqing","doi-asserted-by":"publisher","award":["CSTB2023NSCQ-LZX0006"],"award-info":[{"award-number":["CSTB2023NSCQ-LZX0006"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Circuits Syst. Video Technol."],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1109\/tcsvt.2025.3613993","type":"journal-article","created":{"date-parts":[[2025,9,24]],"date-time":"2025-09-24T17:35:21Z","timestamp":1758735321000},"page":"2842-2853","source":"Crossref","is-referenced-by-count":2,"title":["Visual Evidence-Aware for Object Hallucinations Rectification in LLM-Based Video Captioning"],"prefix":"10.1109","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1748-6890","authenticated-orcid":false,"given":"Ye","family":"Wang","sequence":"first","affiliation":[{"name":"Chongqing Key Laboratory of Computational Intelligence, Chongqing 400065, China, also with the Key Laboratory of Cyberspace Big Data Intelligent Security, Ministry of Education, Chongqing 400065, China, and also with the School of Artificial Intelligence, Chongqing University of Posts and Telecommunications, Chongqing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5054-469X","authenticated-orcid":false,"given":"Jiancheng","family":"Zhou","sequence":"additional","affiliation":[{"name":"Chongqing Key Laboratory of Computational Intelligence, Chongqing 400065, China, also with the Key Laboratory of Cyberspace Big Data Intelligent Security, Ministry of Education, Chongqing 400065, China, and also with the School of Artificial Intelligence, Chongqing University of Posts and Telecommunications, Chongqing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6329-3096","authenticated-orcid":false,"given":"Qun","family":"Liu","sequence":"additional","affiliation":[{"name":"Chongqing Key Laboratory of Computational Intelligence, Chongqing 400065, China, also with the Key Laboratory of Cyberspace Big Data Intelligent Security, Ministry of Education, Chongqing 400065, China, and also with the School of Artificial Intelligence, Chongqing University of Posts and Telecommunications, Chongqing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Feng","family":"Hu","sequence":"additional","affiliation":[{"name":"Chongqing Key Laboratory of Computational Intelligence, Chongqing 400065, China, also with the Key Laboratory of Cyberspace Big Data Intelligent Security, Ministry of Education, Chongqing 400065, China, and also with the School of Artificial Intelligence, Chongqing University of Posts and Telecommunications, Chongqing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8521-5232","authenticated-orcid":false,"given":"Guoyin","family":"Wang","sequence":"additional","affiliation":[{"name":"Chongqing Normal University, Chongqing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/877"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3169894"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3165934"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01032"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01816"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01742"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICCSP.2018.8524251"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00676"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1093\/nsr\/nwae403"},{"key":"ref10","first-page":"1","article-title":"Mitigating hallucination in large multi-modal models via robust instruction tuning","volume-title":"Proc. 12th Int. Conf. Learn. Represent.","author":"Liu"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i16.29771"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-26316-3_37"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00253"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1437"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-024-4251-x"},{"key":"ref16","first-page":"1","article-title":"Analyzing and mitigating object hallucination in large vision-language models","volume-title":"Proc. 12th Int. Conf. Learn. Represent.","author":"Zhou"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.20"},{"key":"ref18","article-title":"Evaluation and analysis of hallucination in large vision-language models","author":"Wang","year":"2023","journal-title":"arXiv:2308.15126"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr.2018.00636"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01274"},{"key":"ref21","first-page":"38728","article-title":"mPLUG-2: A modularized multi-modal foundation model across text, image and video","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Xu"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3058626"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2024.3502736"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2024.3502621"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2024.3399933"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.52202\/075280-2142"},{"key":"ref27","article-title":"mPLUG-Owl2: Revolutionizing multi-modal large language model with modality collaboration","author":"Ye","year":"2023","journal-title":"arXiv:2311.04257"},{"key":"ref28","article-title":"Aligning large multi-modal model with robust instruction tuning","author":"Liu","year":"2023","journal-title":"arXiv:2306.14565"},{"key":"ref29","article-title":"Retrieval-augmented generation for large language models: A survey","author":"Gao","year":"2023","journal-title":"arXiv:2312.10997"},{"key":"ref30","first-page":"9459","article-title":"Retrieval-augmented generation for knowledge-intensive NLP tasks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Lewis"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3532682"},{"key":"ref32","first-page":"3929","article-title":"Retrieval augmented language model pre-training","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Guu"},{"key":"ref33","article-title":"A comprehensive survey of hallucination mitigation techniques in large language models","author":"Tonmoy","year":"2024","journal-title":"arXiv:2401.01313"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00278"},{"key":"ref35","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"139","author":"Radford"},{"key":"ref36","first-page":"91","article-title":"Faster R-CNN: Towards real-time object detection with region proposal networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"28","author":"Ren"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00633"},{"issue":"8","key":"ref38","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI Blog"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00377"},{"key":"ref40","first-page":"190","article-title":"Collecting highly parallel data for paraphrase evaluation","volume-title":"Proc. 49th Annu. Meeting Assoc. Comput. Linguistics, Human Lang. Technol.","author":"Chen"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/N15-1173"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.571"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-3348"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46454-1_24"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i3.16353"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00157"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-16788-1_25"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3322329"},{"key":"ref51","first-page":"19730","article-title":"BLIP-2: Bootstrapping language-image pre-training with frozen image encoders and large language models","volume-title":"Proc. ICML","author":"Li"},{"key":"ref52","first-page":"1","article-title":"Unified language-vision pretraining in llm with dynamic discrete visual tokenization","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Jin"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01741"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01426"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-demo.49"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01265"}],"container-title":["IEEE Transactions on Circuits and Systems for Video Technology"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/76\/11424237\/11177574.pdf?arnumber=11177574","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T20:00:46Z","timestamp":1773086446000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11177574\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3]]},"references-count":56,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/tcsvt.2025.3613993","relation":{},"ISSN":["1051-8215","1558-2205"],"issn-type":[{"value":"1051-8215","type":"print"},{"value":"1558-2205","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,3]]}}}