{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T23:25:31Z","timestamp":1781047531199,"version":"3.54.1"},"reference-count":65,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000015","name":"U.S. Department of Energy","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000015","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006227","name":"Lawrence Livermore National Laboratory","doi-asserted-by":"publisher","award":["DE-AC52-07NA27344"],"award-info":[{"award-number":["DE-AC52-07NA27344"]}],"id":[{"id":"10.13039\/100006227","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iccv51701.2025.00692","type":"proceedings-article","created":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T19:45:49Z","timestamp":1777491949000},"page":"7372-7382","source":"Crossref","is-referenced-by-count":1,"title":["TruthPrInt: Mitigating Large Vision-Language Models Object Hallucination via Latent Truthful-Guided Pre-Intervention"],"prefix":"10.1109","author":[{"given":"Jinhao","family":"Duan","sequence":"first","affiliation":[{"name":"Drexel University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Fei","family":"Kong","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Hao","family":"Cheng","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology (Guangzhou)"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"James","family":"Diffenderfer","sequence":"additional","affiliation":[{"name":"LLNL"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Bhavya","family":"Kailkhura","sequence":"additional","affiliation":[{"name":"LLNL"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Lichao","family":"Sun","sequence":"additional","affiliation":[{"name":"Lehigh University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiaofeng","family":"Zhu","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiaoshuang","family":"Shi","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Kaidi","family":"Xu","sequence":"additional","affiliation":[{"name":"Drexel University"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.68"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr46437.2021.00356"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.52202\/079017-3902"},{"key":"ref4","article-title":"Inside: Llms\u2019 internal states retain the power of hallucination detection","author":"Chen","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref5","article-title":"Beyond surface: Probing llama across scales and layers","author":"Chen","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref6","article-title":"Multi-object hallucination in vision-language models","author":"Chen","year":"2024","journal-title":"CoRR"},{"key":"ref7","article-title":"Halc: Object hallucination reduction via adaptive focal-contrast decoding","volume-title":"Forty-first International Conference on Machine Learning. 2, 6","author":"Chen"},{"key":"ref8","article-title":"Expanding performance boundaries of open-source multimodal models with model, data, and testtime scaling","author":"Chen","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref9","article-title":"Dola: Decoding by contrasting layers improves factuality in large language models","volume-title":"The Twelfth International Conference on Learning Representations. 2","author":"Chuang"},{"key":"ref10","article-title":"Seeing is believing: Mitigating hallucination in large vision-language models via clip-guided decoding","volume-title":"ICLR 2024 Workshop on Reliable and Responsible Foundation Models","author":"Deng"},{"key":"ref11","doi-asserted-by":"crossref","volume-title":"Reducing hallucinations in large language models through contextual position encoding","author":"Desrochers","DOI":"10.31219\/osf.io\/exjqb"},{"key":"ref12","article-title":"Haloscope: Harnessing unlabeled 11 m generations for hallucination detection","author":"Du","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.276"},{"key":"ref14","article-title":"Do more details always introduce more hallucinations in 1vlm-based image captioning?","author":"Feng","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref15","article-title":"Characterizing context influence and hallucination in summarization","author":"Flemings","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref16","article-title":"Mme: A comprehensive evaluation benchmark for multimodal large language models","volume":"abs\/2306.13394","author":"Fu","year":"2023","journal-title":"ArXiv"},{"key":"ref17","article-title":"Video-mme: The first-ever comprehensive evaluation benchmark of multi-modal 11 ms in video analysis","author":"Fu","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.439"},{"key":"ref19","article-title":"Decomposing uncertainty for large language models through input clarification ensembling","author":"Hou","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref20","article-title":"A survey of uncertainty estimation in 11 ms: Theory meets practice","author":"Huang","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01274"},{"key":"ref22","article-title":"Enabling ensemble learning for heterogeneous large language models with deep parallel collaboration","author":"Huang","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.52202\/079017-4246"},{"key":"ref24","article-title":"Semantic uncertainty: Linguistic invariances for uncertainty estimation in natural language generation","volume-title":"The Eleventh International Conference on Learning Representations. 1","author":"Kuhn"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01316"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.52202\/075280-1797"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-emnlp.262"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.20"},{"key":"ref29","article-title":"Unveiling the pitfalls of knowledge editing for large language models","volume-title":"The Twelfth International Conference on Learning Representations","author":"Li"},{"key":"ref30","article-title":"Generating with confidence: Uncertainty quantification for black-box large language models","author":"Lin","journal-title":"Transactions on Machine Learning Research. 1"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02484"},{"key":"ref32","article-title":"Visual instruction tuning","author":"Liu","year":"2024","journal-title":"Advances in neural information processing systems, 36"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73010-8_8"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.52202\/068431-1262"},{"key":"ref35","article-title":"Mass editing memory in a transformer","author":"Meng","year":"2022","journal-title":"arXiv preprint arXiv"},{"key":"ref36","article-title":"Relative representations enable zero-shot latent space communication","volume-title":"The Eleventh International Conference on Learning Representations","author":"Moschella"},{"key":"ref37","article-title":"Contrastive decoding improves reasoning in large language models","author":"Lewis","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref38","article-title":"Im2text: Describing images using 1 million captioned photographs","author":"Ordonez","year":"2011","journal-title":"Advances in neural information processing systems, 24"},{"key":"ref39","article-title":"Clip-dpo: Vision-language models as a source of preference for fixing hallucinations in lvlms","author":"Ouali","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref40","first-page":"311","article-title":"Bleu: a method for automatic evaluation of machine translation","volume-title":"Proceedings of the 40th annual meeting of the Association for Computational Linguistics","author":"Papineni","year":"2002"},{"key":"ref41","article-title":"Llmmaps-a visual metaphor for stratified evaluation of large language models","author":"Puchert","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref42","article-title":"A survey of hallucination in large foundation models","author":"Rawte","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1437"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1238"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.127063"},{"key":"ref46","article-title":"Attention is all you need","author":"Vaswani","year":"2017","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref47","article-title":"Behind the magic, merlim:Multimodal evaluation benchmark for large image-language models","volume-title":"arXiv preprint arXiv","author":"Villa","year":"2023"},{"key":"ref48","article-title":"Contrastive region guidance: Improving grounding in vision-language models without training","author":"Wan","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-53302-0_3"},{"key":"ref50","article-title":"Qwen2-vl: Enhancing vision-language model\u2019s perception of the world at any resolution","author":"Wang","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.937"},{"key":"ref52","article-title":"Unified triplet-level hallucination evaluation for large visionlanguage models","author":"Wu","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.414"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-emnlp.493"},{"key":"ref55","article-title":"Order matters in hallucination: Reasoning order as benchmark and reflexive p rompting for large-languagemodels","author":"Xie","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref56","article-title":"Mitigating object hallucination via concentric causal attention","author":"Xing","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52734.2025.01364"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01239"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-024-4251-x"},{"key":"ref60","article-title":"Eventhallusion: Diagnosing event hallucinations in video llms","author":"Zhang","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1145\/3639372"},{"key":"ref62","article-title":"Analyzing and mitigating object hallucination in large vision-language models","volume-title":"The Twelfth International Conference on Learning Representations. 2","author":"Zhou"},{"key":"ref63","article-title":"Analyzing and mitigating object hallucination in large vision-language models","author":"Zhou","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref64","article-title":"Minigpt-4: Enhancing vision-language understanding with advanced large language models","volume-title":"The Twelfth International Conference on Learning Representations. 1, 2, 3","author":"Zhu"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-naacl.294"}],"event":{"name":"2025 IEEE\/CVF International Conference on Computer Vision (ICCV)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,10,19]]},"end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/CVF International Conference on Computer Vision (ICCV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11443115\/11443287\/11446120.pdf?arnumber=11446120","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T05:03:51Z","timestamp":1777611831000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11446120\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":65,"URL":"https:\/\/doi.org\/10.1109\/iccv51701.2025.00692","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}