{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T14:44:13Z","timestamp":1743000253528,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":30,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819794423"},{"type":"electronic","value":"9789819794430"}],"license":[{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-9443-0_34","type":"book-chapter","created":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T16:29:45Z","timestamp":1730392185000},"page":"389-399","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["UHDF: Hallucination Detection Using Open Source Models Beyond Close Source Models Methods"],"prefix":"10.1007","author":[{"given":"Dongxu","family":"Liu","sequence":"first","affiliation":[]},{"given":"Bufan","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Zhilong","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Bing","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Muyun","family":"Yang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,1]]},"reference":[{"unstructured":"Zhao, W.X., et al.: A survey of large language models. arXiv preprint arXiv:2303.18223 (2023)","key":"34_CR1"},{"unstructured":"Chen, J., et al.: MiniGPT-v2: large language model as a unified interface for vision-language multi-task learning. arXiv preprint arXiv:2310.09478 (2023)","key":"34_CR2"},{"unstructured":"Hu, S., et al.: MiniCPM: unveiling the potential of small language models with scalable training strategies. arXiv preprint arXiv:2404.06395 (2024)","key":"34_CR3"},{"unstructured":"Liu, H., et al.: LLaVA-next: improved reasoning, OCR, and world knowledge (2024). https:\/\/llava-vl.github.io\/blog\/2024-01-30-llava-next\/","key":"34_CR4"},{"unstructured":"Yu, T., et al.: RLAIF-V: aligning MLLMs through open-source AI feedback for super Gpt-4v trustworthiness. arXiv preprint arXiv:2405.17220 (2024)","key":"34_CR5"},{"unstructured":"Alayrac, J.B., et al.: Flamingo: a visual language model for few-shot learning. In: Advances in Neural Information Processing Systems, vol. 35, pp. 23716\u201323736 (2022)","key":"34_CR6"},{"unstructured":"Zhang, Y., et al.: Siren\u2019s song in the AI ocean: a survey on hallucination in large language models. arXiv preprint arXiv:2309.01219 (2023)","key":"34_CR7"},{"unstructured":"Tonmoy, S., et al.: A comprehensive survey of hallucination mitigation techniques in large language models. arXiv preprint arXiv:2401.01313 (2024)","key":"34_CR8"},{"doi-asserted-by":"crossref","unstructured":"Huang, Q., et al.: Opera: alleviating hallucination in multi-modal large language models via over-trust penalty and retrospection-allocation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13418\u201313427 (2024)","key":"34_CR9","DOI":"10.1109\/CVPR52733.2024.01274"},{"unstructured":"Wang, C., et al.: Survey on factuality in large language models: knowledge, retrieval and domain-specificity. arXiv preprint arXiv:2310.07521 (2023)","key":"34_CR10"},{"unstructured":"Liu, H., et al.: A survey on hallucination in large vision-language models. arXiv preprint arXiv:2402.00253 (2024)","key":"34_CR11"},{"unstructured":"Fu, C., et al.: MME: a comprehensive evaluation benchmark for multimodal large language models (2024). https:\/\/arxiv.org\/abs\/2306.13394","key":"34_CR12"},{"doi-asserted-by":"crossref","unstructured":"Li, B., et al.: Seed-bench: benchmarking multimodal large language models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13299\u201313308 (2024)","key":"34_CR13","DOI":"10.1109\/CVPR52733.2024.01263"},{"doi-asserted-by":"crossref","unstructured":"Liu, Y., et al.: MMBench: is your multi-modal model an all-around player? arXiv preprint arXiv:2307.06281 (2023)","key":"34_CR14","DOI":"10.1007\/978-3-031-72658-3_13"},{"doi-asserted-by":"crossref","unstructured":"Chen, X., et al.: Unified hallucination detection for multimodal large language models. arXiv preprint arXiv:2402.03190 (2024)","key":"34_CR15","DOI":"10.18653\/v1\/2024.acl-long.178"},{"unstructured":"OpenAI, Achiam, J., Adler, S., Agarwal, S. et al: GPT-4 technical report (2024). https:\/\/arxiv.org\/abs\/2303.08774","key":"34_CR16"},{"unstructured":"Bai, J., et al.: Qwen-Vl: a frontier large vision-language model with versatile abilities. arXiv preprint arXiv:2308.12966 (2023)","key":"34_CR17"},{"unstructured":"Dai, W., et al.: InstructBLIP: towards general-purpose vision-language models with instruction tuning (2023). https:\/\/arxiv.org\/abs\/2305.06500","key":"34_CR18"},{"unstructured":"Li, J., Li, D., Savarese, S., Hoi, S.: BLIP-2: bootstrapping language-image pretraining with frozen image encoders and large language models. In: International Conference on Machine Learning, pp. 19730\u201319742. PMLR (2023)","key":"34_CR19"},{"unstructured":"Liu, H., Li, C., Wu, Q., Lee, Y.J.: Visual instruction tuning. In: Advances in Neural Information Processing Systems, vol. 36 (2024)","key":"34_CR20"},{"unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)","key":"34_CR21"},{"doi-asserted-by":"crossref","unstructured":"Antol, S., et al.: VQA: visual question answering. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2425\u20132433 (2015)","key":"34_CR22","DOI":"10.1109\/ICCV.2015.279"},{"unstructured":"Contributors, O: OpenCompass: a universal evaluation platform for foundation models. https:\/\/github.com\/open-compass\/opencompass (2023)","key":"34_CR23"},{"unstructured":"Tsimpoukelli, M., Menick, J.L., Cabi, S., Eslami, S., Vinyals, O., Hill, F.: Multi-modal few-shot learning with frozen language models. In: Advances in Neural Information Processing Systems, vol. 34, pp. 200\u2013212 (2021)","key":"34_CR24"},{"doi-asserted-by":"crossref","unstructured":"Kaul, P., et al.: Throne: an object-based hallucination benchmark for the free-form generations of large vision-language models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 27228\u201327238 (2024)","key":"34_CR25","DOI":"10.1109\/CVPR52733.2024.02571"},{"doi-asserted-by":"crossref","unstructured":"Li, Y., Du, Y., Zhou, K., Wang, J., Zhao, W.X., Wen, J.R.: Evaluating object hallucination in large vision-language models. arXiv preprint arXiv:2305.10355 (2023)","key":"34_CR26","DOI":"10.18653\/v1\/2023.emnlp-main.20"},{"doi-asserted-by":"crossref","unstructured":"Rohrbach, A., Hendricks, L.A., Burns, K., Darrell, T., Saenko, K.: Object hallucination in image captioning. arXiv preprint arXiv:1809.02156 (2018)","key":"34_CR27","DOI":"10.18653\/v1\/D18-1437"},{"unstructured":"Meta AI: Meta llama 3. https:\/\/ai.meta.com\/blog\/meta-llama-3\/ (2023). Accessed 09 Nov 2023","key":"34_CR28"},{"doi-asserted-by":"crossref","unstructured":"Liu, H., Li, C., Li, Y., Lee, Y.J.: Improved baselines with visual instruction tuning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 26296\u201326306 (2024)","key":"34_CR29","DOI":"10.1109\/CVPR52733.2024.02484"},{"unstructured":"Yang, Z., et al.: The dawn of LMMs: preliminary explorations with Gpt-4v (ision). arXiv preprint arXiv:2309.17421, 9(1), 1 (2023)","key":"34_CR30"}],"container-title":["Lecture Notes in Computer Science","Natural Language Processing and Chinese Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-9443-0_34","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T16:36:20Z","timestamp":1730392580000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-9443-0_34"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,1]]},"ISBN":["9789819794423","9789819794430"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-9443-0_34","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,1]]},"assertion":[{"value":"1 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"NLPCC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"CCF International Conference on Natural Language Processing and Chinese Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hangzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 November 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 November 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"nlpcc2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/tcci.ccf.org.cn\/conference\/2024\/index.php","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}