{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,30]],"date-time":"2026-01-30T03:16:42Z","timestamp":1769743002785,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":28,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819556953","type":"print"},{"value":"9789819556960","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-5696-0_28","type":"book-chapter","created":{"date-parts":[[2026,1,29]],"date-time":"2026-01-29T14:03:27Z","timestamp":1769695407000},"page":"398-411","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["VDIS: Combating Object Hallucination in\u00a0Multimodal Large Language Models"],"prefix":"10.1007","author":[{"given":"Fuchuan","family":"Tang","sequence":"first","affiliation":[]},{"given":"Gaocai","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,30]]},"reference":[{"key":"28_CR1","first-page":"23716","volume":"35","author":"JB Alayrac","year":"2022","unstructured":"Alayrac, J.B., et al.: Flamingo: a visual language model for few-shot learning. Adv. Neural. Inf. Process. Syst. 35, 23716\u201323736 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"28_CR2","unstructured":"Bai, J., et al.: Qwen-VL: A versatile vision-language model for understanding, localization, text reading, and beyond (2023). https:\/\/arxiv.org\/abs\/2308.12966"},{"key":"28_CR3","unstructured":"Dai, W., et al.: InstructBLIP: Towards general-purpose vision-language models with instruction tuning (2023). https:\/\/arxiv.org\/abs\/2305.06500"},{"key":"28_CR4","unstructured":"Fu, C., et al.: MME: A comprehensive evaluation benchmark for multimodal large language models (2024). https:\/\/arxiv.org\/abs\/2306.13394"},{"key":"28_CR5","unstructured":"Gunjal, A., Yin, J., Bas, E.: Detecting and preventing hallucinations in large vision language models (2024). https:\/\/arxiv.org\/abs\/2308.06394"},{"key":"28_CR6","doi-asserted-by":"crossref","unstructured":"Gupta, V., Li, Z., Kortylewski, A., Zhang, C., Li, Y., Yuille, A.: SwapMix: diagnosing and regularizing the over-reliance on visual context in visual question answering. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5078\u20135088 (2022)","DOI":"10.1109\/CVPR52688.2022.00502"},{"key":"28_CR7","unstructured":"Han, Y., Nie, L., Yin, J., Wu, J., Yan, Y.: Visual perturbation-aware collaborative learning for overcoming the language prior problem (2022). https:\/\/arxiv.org\/abs\/2207.11850"},{"key":"28_CR8","unstructured":"Jiang, C., et al.: Hallucination augmented contrastive learning for multimodal large language model (2024). https:\/\/arxiv.org\/abs\/2312.06968"},{"key":"28_CR9","unstructured":"Leng, S., et al.: Mitigating object hallucinations in large vision-language models through visual contrastive decoding (2023). https:\/\/arxiv.org\/abs\/2311.16922"},{"key":"28_CR10","unstructured":"Li, J., Li, D., Xiong, C., Hoi, S.: BLIP: bootstrapping language-image pre-training for unified vision-language understanding and generation. In: Chaudhuri, K., Jegelka, S., Song, L., Szepesvari, C., Niu, G., Sabato, S. (eds.) Proceedings of the 39th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol.\u00a0162, pp. 12888\u201312900. PMLR (2022). https:\/\/proceedings.mlr.press\/v162\/li22n.html"},{"key":"28_CR11","doi-asserted-by":"crossref","unstructured":"Li, Y., Du, Y., Zhou, K., Wang, J., Zhao, W.X., Wen, J.R.: Evaluating object hallucination in large vision-language models (2023). https:\/\/arxiv.org\/abs\/2305.10355","DOI":"10.18653\/v1\/2023.emnlp-main.20"},{"key":"28_CR12","unstructured":"Liu, F., Lin, K., Li, L., Wang, J., Yacoob, Y., Wang, L.: Mitigating hallucination in large multi-modal models via robust instruction tuning (2024). https:\/\/arxiv.org\/abs\/2306.14565"},{"key":"28_CR13","unstructured":"Liu, H., et al.: A survey on hallucination in large vision-language models (2024). https:\/\/arxiv.org\/abs\/2402.00253"},{"key":"28_CR14","unstructured":"Liu, H., Li, C., Wu, Q., Lee, Y.J.: Visual instruction tuning (2023). https:\/\/arxiv.org\/abs\/2304.08485"},{"key":"28_CR15","doi-asserted-by":"crossref","unstructured":"Lovenia, H., Dai, W., Cahyawijaya, S., Ji, Z., Fung, P.: Negative object presence evaluation (nope) to measure object hallucination in vision-language models (2024). https:\/\/arxiv.org\/abs\/2310.05338","DOI":"10.18653\/v1\/2024.alvr-1.4"},{"key":"28_CR16","doi-asserted-by":"crossref","unstructured":"Niu, Y., Tang, K., Zhang, H., Lu, Z., Hua, X.S., Wen, J.R.: Counterfactual VQA: A cause-effect look at language bias (2021). https:\/\/arxiv.org\/abs\/2006.04315","DOI":"10.1109\/CVPR46437.2021.01251"},{"key":"28_CR17","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: Meila, M., Zhang, T. (eds.) Proceedings of the 38th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol.\u00a0139, pp. 8748\u20138763. PMLR (2021). https:\/\/proceedings.mlr.press\/v139\/radford21a.html"},{"key":"28_CR18","doi-asserted-by":"crossref","unstructured":"Sennrich, R., Vamvas, J., Mohammadshahi, A.: Mitigating hallucinations and off-target machine translation with source-contrastive and language-contrastive decoding (2024). https:\/\/arxiv.org\/abs\/2309.07098","DOI":"10.18653\/v1\/2024.eacl-short.4"},{"key":"28_CR19","unstructured":"Sun, Z., et al.: Aligning large multimodal models with factually augmented RLHF (2023). https:\/\/arxiv.org\/abs\/2309.14525"},{"key":"28_CR20","unstructured":"Wang, F., Ding, L., Rao, J., Liu, Y., Shen, L., Ding, C.: Can linguistic knowledge improve multimodal alignment in vision-language pretraining? (2023). https:\/\/arxiv.org\/abs\/2308.12898"},{"key":"28_CR21","doi-asserted-by":"crossref","unstructured":"Wang, X., Li, X., Ding, L., Zhao, S., Biemann, C.: Using self-supervised dual constraint contrastive learning for cross-modal retrieval. In: ECAI, pp. 2552\u20132559 (2023)","DOI":"10.3233\/FAIA230560"},{"key":"28_CR22","doi-asserted-by":"crossref","unstructured":"Wang, X., Pan, J., Ding, L., Biemann, C.: Mitigating hallucinations in large vision-language models with instruction contrastive decoding (2024). https:\/\/arxiv.org\/abs\/2403.18715","DOI":"10.18653\/v1\/2024.findings-acl.937"},{"key":"28_CR23","unstructured":"Yin, S., et al.: Woodpecker: Hallucination correction for multimodal large language models (2023). https:\/\/arxiv.org\/abs\/2310.16045"},{"key":"28_CR24","unstructured":"You, H., et al.: Ferret: Refer and ground anything anywhere at any granularity (2023). https:\/\/arxiv.org\/abs\/2310.07704"},{"key":"28_CR25","doi-asserted-by":"crossref","unstructured":"Yu, Q., et al.: HalluciDoctor: Mitigating hallucinatory toxicity in visual instruction data (2024). https:\/\/arxiv.org\/abs\/2311.13614","DOI":"10.1109\/CVPR52733.2024.01230"},{"key":"28_CR26","unstructured":"Zhang, Y., Cui, L., Bi, W., Shi, S.: Alleviating hallucinations of large language models through induced hallucinations (2024). https:\/\/arxiv.org\/abs\/2312.15710"},{"key":"28_CR27","unstructured":"Zhou, Y., et al.: Analyzing and mitigating object hallucination in large vision-language models (2024). https:\/\/arxiv.org\/abs\/2310.00754"},{"key":"28_CR28","doi-asserted-by":"crossref","unstructured":"Zhu, X., Mao, Z., Liu, C., Zhang, P., Wang, B., Zhang, Y.: Overcoming language priors with self-supervised learning for visual question answering (2020). https:\/\/arxiv.org\/abs\/2012.11528","DOI":"10.24963\/ijcai.2020\/151"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-5696-0_28","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,29]],"date-time":"2026-01-29T14:03:41Z","timestamp":1769695421000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-5696-0_28"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819556953","9789819556960"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-5696-0_28","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"30 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shanghai","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2025.prcv.cn\/index.asp","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}