{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,15]],"date-time":"2025-06-15T04:05:12Z","timestamp":1749960312765,"version":"3.41.0"},"publisher-location":"Singapore","reference-count":43,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819682973","type":"print"},{"value":"9789819682980","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-8298-0_30","type":"book-chapter","created":{"date-parts":[[2025,6,14]],"date-time":"2025-06-14T18:21:45Z","timestamp":1749925305000},"page":"376-388","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A Meta-thinking Approach to\u00a0Mitigating Linguistic Sycophancy in\u00a0Vision-Language Models"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-9186-4838","authenticated-orcid":false,"given":"Chinh","family":"Hoang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nathan","family":"Roberts","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9818-9600","authenticated-orcid":false,"given":"Mohammad Rashedul","family":"Hasan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,6,15]]},"reference":[{"key":"30_CR1","unstructured":"Acharya, M., et\u00a0al.: TallyQA: answering complex counting questions (2018). https:\/\/arxiv.org\/abs\/1810.12440"},{"key":"30_CR2","unstructured":"Anthropic et\u00a0al.: The Claude 3 model family: Opus, Sonnet, Haiku. Technical report, Anthropic (2024). https:\/\/www-cdn.anthropic.com\/de8ba9b01c9ab7cbabf5c33b80b7bbc618857627\/Model_Card_Claude_3.pdf"},{"key":"30_CR3","unstructured":"Anthropic et\u00a0al.: Claude 3.5 sonnet. https:\/\/www.anthropic.com\/news\/claude-3-5-sonnet (2024). https:\/\/www.anthropic.com\/news\/claude-3-5-sonnet. Accessed 02 Sep 2024"},{"key":"30_CR4","unstructured":"Ashutosh, C., et\u00a0al.: Gender classification dataset. https:\/\/www.kaggle.com\/datasets\/cashutosh\/gender-classification-dataset (2017). https:\/\/www.kaggle.com\/datasets\/cashutosh\/gender-classification-dataset. Accessed 01 Sep 2024"},{"key":"30_CR5","unstructured":"Bai, J., et\u00a0al.: Qwen-VL: a versatile vision-language model for understanding, localization, text reading, and beyond (2023). https:\/\/arxiv.org\/abs\/2308.12966"},{"key":"30_CR6","doi-asserted-by":"crossref","unstructured":"Chen, Z., et\u00a0al.: How far are we to GPT-4V? Closing the gap to commercial multimodal models with open-source suites. arXiv preprint arXiv:2404.16821 (2024)","DOI":"10.1007\/s11432-024-4231-5"},{"key":"30_CR7","unstructured":"Dai, W., et\u00a0al.: InstructBLIP: towards general-purpose vision-language models with instruction tuning (2023). https:\/\/arxiv.org\/abs\/2305.06500"},{"key":"30_CR8","unstructured":"Datasets, T., et\u00a0al.: Tensorflow flowers classification dataset. https:\/\/www.tensorflow.org\/datasets\/catalog\/tf_flowers (2024). https:\/\/www.tensorflow.org\/datasets\/catalog\/tf_flowers. Accessed 01 Sep 2024"},{"key":"30_CR9","unstructured":"Denison, C., et\u00a0al.: Sycophancy to subterfuge: investigating reward-tampering in large language models (2024). https:\/\/arxiv.org\/abs\/2406.10162"},{"key":"30_CR10","unstructured":"Gallegos, I.O., et\u00a0al.: Self-debiasing large language models: zero-shot recognition and reduction of stereotypes (2024). https:\/\/arxiv.org\/abs\/2402.01981"},{"key":"30_CR11","unstructured":"Hong, W., et\u00a0al.: CogVLM2: visual language models for image and video understanding (2024). https:\/\/arxiv.org\/abs\/2408.16500"},{"key":"30_CR12","doi-asserted-by":"publisher","unstructured":"Hou, S., et\u00a0al.: VegFru: a domain-specific dataset for fine-grained visual categorization. In: 2017 IEEE International Conference on Computer Vision (ICCV), pp. 541\u2013549 (2017). https:\/\/doi.org\/10.1109\/ICCV.2017.66","DOI":"10.1109\/ICCV.2017.66"},{"key":"30_CR13","unstructured":"Huang, Y., et\u00a0al.: TrustLLM: trustworthiness in large language models (2024). https:\/\/arxiv.org\/abs\/2401.05561"},{"key":"30_CR14","unstructured":"Kahneman, D., et\u00a0al.: Thinking, fast and slow. Farrar, Straus and Giroux, New York (2011). https:\/\/www.amazon.de\/Thinking-Fast-Slow-Daniel-Kahneman\/dp\/0374275637\/ref=wl_it_dp_o_pdT1_nS_nC?ie=UTF8&colid=151193SNGKJT9 &coliid=I3OCESLZCVDFL7"},{"key":"30_CR15","doi-asserted-by":"publisher","unstructured":"Li, J., et\u00a0al.: BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models, June 2023. https:\/\/doi.org\/10.48550\/arXiv.2301.12597","DOI":"10.48550\/arXiv.2301.12597"},{"key":"30_CR16","doi-asserted-by":"publisher","unstructured":"Li, Y., et\u00a0al.: Evaluating object hallucination in large vision-language models. In: Bouamor, H., et\u00a0al. (eds.) Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, pp. 292\u2013305. Association for Computational Linguistics, Singapore, December 2023. https:\/\/doi.org\/10.18653\/v1\/2023.emnlp-main.20","DOI":"10.18653\/v1\/2023.emnlp-main.20"},{"key":"30_CR17","doi-asserted-by":"publisher","unstructured":"Liang, P.P., et\u00a0al.: Towards debiasing sentence representations. In: Jurafsky, D., et\u00a0al. (eds.) Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 5502\u20135515. Association for Computational Linguistics, Online, July 2020. https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.488","DOI":"10.18653\/v1\/2020.acl-main.488"},{"key":"30_CR18","unstructured":"Lim, J., et\u00a0al.: Measuring visual sycophancy in multimodal models (2024). https:\/\/arxiv.org\/abs\/2408.09111"},{"key":"30_CR19","unstructured":"Liu, F., et\u00a0al.: Mitigating hallucination in large multi-modal models via robust instruction tuning (2024). https:\/\/arxiv.org\/abs\/2306.14565"},{"key":"30_CR20","unstructured":"Liu, H., et\u00a0al.: Improved baselines with visual instruction tuning (2024). https:\/\/arxiv.org\/abs\/2310.03744"},{"key":"30_CR21","unstructured":"Liu, H., et\u00a0al.: LlaVA-next: improved reasoning, OCR, and world knowledge, January 2024. https:\/\/llava-vl.github.io\/blog\/2024-01-30-llava-next\/"},{"key":"30_CR22","unstructured":"Liu, H., et\u00a0al.: Visual instruction tuning (2023). https:\/\/arxiv.org\/abs\/2304.08485"},{"key":"30_CR23","unstructured":"Liu, Y., et\u00a0al.: Trustworthy LLMS: a survey and guideline for evaluating large language models\u2019 alignment (2024). https:\/\/arxiv.org\/abs\/2308.05374"},{"key":"30_CR24","unstructured":"OpenAI et\u00a0al.: Hello gpt-4o. https:\/\/openai.com\/index\/hello-gpt-4o\/ (2024). https:\/\/openai.com\/index\/hello-gpt-4o\/. Accessed 02 Sep 2024"},{"key":"30_CR25","doi-asserted-by":"publisher","unstructured":"Perez, E., et\u00a0al.: Discovering language model behaviors with model-written evaluations. In: Rogers, A., et\u00a0al. (eds.) Findings of the Association for Computational Linguistics: ACL 2023, pp. 13387\u201313434. Association for Computational Linguistics, Toronto, Canada, July 2023. https:\/\/doi.org\/10.18653\/v1\/2023.findings-acl.847, https:\/\/aclanthology.org\/2023.findings-acl.847","DOI":"10.18653\/v1\/2023.findings-acl.847"},{"key":"30_CR26","unstructured":"Raja, A.M., et\u00a0al.: Violence vs. non-violence (2023). https:\/\/www.kaggle.com\/datasets\/abdulmananraja\/real-life-violence-situations. Accessed 04 Sep 2024"},{"key":"30_CR27","doi-asserted-by":"publisher","unstructured":"Ravfogel, S., et\u00a0al.: Null it out: guarding protected attributes by iterative nullspace projection. In: Jurafsky, D., et\u00a0al. (eds.) Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 7237\u20137256. Association for Computational Linguistics, Online, July 2020. https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.647","DOI":"10.18653\/v1\/2020.acl-main.647"},{"key":"30_CR28","unstructured":"Rimsky, N. et\u00a0al.: Blog post on the AI alignment forum, July 2023. https:\/\/www.alignmentforum.org\/posts\/zt6hRsDE84HeBKh7E\/. Accessed 8 Sep 2024"},{"key":"30_CR29","doi-asserted-by":"crossref","unstructured":"Schick, T., et\u00a0al.: Self-diagnosis and self-debiasing: a proposal for reducing corpus-based bias in NLP (2021). https:\/\/arxiv.org\/abs\/2103.00453","DOI":"10.1162\/tacl_a_00434"},{"key":"30_CR30","unstructured":"Sharma, M., et\u00a0al.: Towards understanding sycophancy in language models (2023). https:\/\/arxiv.org\/abs\/2310.13548"},{"key":"30_CR31","doi-asserted-by":"crossref","unstructured":"Shtedritski, A., et\u00a0al.: What does clip know about a red circle? visual prompt engineering for VLMS (2023). https:\/\/arxiv.org\/abs\/2304.06712","DOI":"10.1109\/ICCV51070.2023.01101"},{"key":"30_CR32","unstructured":"Si, C., et\u00a0al.: Prompting GPT-3 to be reliable (2023). https:\/\/arxiv.org\/abs\/2210.09150"},{"key":"30_CR33","doi-asserted-by":"publisher","unstructured":"Soliman, M.M., et\u00a0al.: Violence recognition from videos using deep learning techniques. In: 2019 Ninth International Conference on Intelligent Computing and Information Systems (ICICIS), pp. 80\u201385 (2019). https:\/\/doi.org\/10.1109\/ICICIS46948.2019.9014714","DOI":"10.1109\/ICICIS46948.2019.9014714"},{"key":"30_CR34","unstructured":"Sun, Z., et\u00a0al.: Aligning large multimodal models with factually augmented RLHF (2023). https:\/\/arxiv.org\/abs\/2309.14525"},{"key":"30_CR35","unstructured":"Team, G., et\u00a0al.: Gemini 1.5: unlocking multimodal understanding across millions of tokens of context (2024). https:\/\/arxiv.org\/abs\/2403.05530"},{"key":"30_CR36","unstructured":"Turpin, M., et\u00a0al.: Language models don\u2019t always say what they think: Unfaithful explanations in chain-of-thought prompting (2023). https:\/\/arxiv.org\/abs\/2305.04388"},{"key":"30_CR37","unstructured":"Webster, K. et\u00a0al.: Measuring and reducing gendered correlations in pre-trained models (2021). https:\/\/arxiv.org\/abs\/2010.06032"},{"key":"30_CR38","unstructured":"Wei, J. et\u00a0al.: Simple synthetic data reduces sycophancy in large language models (2024). https:\/\/arxiv.org\/abs\/2308.03958"},{"key":"30_CR39","doi-asserted-by":"crossref","unstructured":"Ye, Q., et\u00a0al.: mplug-owl2: Revolutionizing multi-modal large language model with modality collaboration (2023). https:\/\/arxiv.org\/abs\/2311.04257","DOI":"10.1109\/CVPR52733.2024.01239"},{"key":"30_CR40","unstructured":"Zhang, Y.F., et\u00a0al.: Debiasing multimodal large language models (2024). https:\/\/arxiv.org\/abs\/2403.05262"},{"key":"30_CR41","unstructured":"Zhang, Y., et\u00a0al.: Benchmarking trustworthiness of multimodal large language models: a comprehensive study (2024). https:\/\/arxiv.org\/abs\/2406.07057"},{"key":"30_CR42","unstructured":"Zhu, D., et\u00a0al.: MiniGPT-4: Enhancing vision-language understanding with advanced large language models (2023). https:\/\/arxiv.org\/abs\/2304.10592"},{"key":"30_CR43","doi-asserted-by":"publisher","unstructured":"Zmigrod, R., et\u00a0al.: Counterfactual data augmentation for mitigating gender stereotypes in languages with rich morphology. In: Korhonen, A. et\u00a0al. (eds.) Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pp. 1651\u20131661. Association for Computational Linguistics, Florence, Italy, July 2019. https:\/\/doi.org\/10.18653\/v1\/P19-1161, https:\/\/aclanthology.org\/P19-1161","DOI":"10.18653\/v1\/P19-1161"}],"container-title":["Lecture Notes in Computer Science","Data Science: Foundations and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-8298-0_30","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,14]],"date-time":"2025-06-14T18:21:54Z","timestamp":1749925314000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-8298-0_30"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819682973","9789819682980"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-8298-0_30","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"15 June 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PAKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Pacific-Asia Conference on Knowledge Discovery and Data Mining","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Sydney, NSW","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 June 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 June 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"pakdd2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/pakdd2025.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}