{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T01:16:58Z","timestamp":1765502218274,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":36,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,10]]},"DOI":"10.1145\/3746252.3760884","type":"proceedings-article","created":{"date-parts":[[2025,11,8]],"date-time":"2025-11-08T01:03:42Z","timestamp":1762563822000},"page":"4722-4727","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Think it Image by Image: Multi-Image Moral Reasoning of Large Vision-Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-3067-6110","authenticated-orcid":false,"given":"Chujie","family":"Gao","sequence":"first","affiliation":[{"name":"University of Notre Dame, South Bend, Indiana, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8315-7972","authenticated-orcid":false,"given":"Yue","family":"Huang","sequence":"additional","affiliation":[{"name":"University of Notre Dame, South Bend, Indiana, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-5065-9051","authenticated-orcid":false,"given":"Xiangqi","family":"Wang","sequence":"additional","affiliation":[{"name":"University of Notre Dame, South Bend, Indiana, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-0960-1699","authenticated-orcid":false,"given":"Siyuan","family":"Wu","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3932-5956","authenticated-orcid":false,"given":"Nitesh V.","family":"Chawla","sequence":"additional","affiliation":[{"name":"University of Notre Dame, South Bend, Indiana, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3574-5665","authenticated-orcid":false,"given":"Xiangliang","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Notre Dame, South Bend, Indiana, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,11,10]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Anthropic. 2024. Claude 3.5: A Sonnet. https:\/\/www.anthropic.com\/news\/claude-3-5-sonnet. Accessed: 2024-09-04."},{"key":"e_1_3_2_1_2_1","unstructured":"Dongping Chen Yue Huang Siyuan Wu Jingyu Tang Liuyi Chen Yilin Bai Zhigang He Chenlong Wang Huichi Zhou Yiqiang Li et al. 2024a. GUI-WORLD: A Dataset for GUI-oriented Multimodal LLM-based Agents. arXiv preprint arXiv:2406.10819 (2024)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Lin Chen Jinsong Li Xiaoyi Dong Pan Zhang Yuhang Zang Zehui Chen Haodong Duan Jiaqi Wang Yu Qiao Dahua Lin et al. 2024b. Are We on the Right Way for Evaluating Large Vision-Language Models? arXiv preprint arXiv:2403.20330 (2024).","DOI":"10.52202\/079017-0850"},{"key":"e_1_3_2_1_4_1","unstructured":"Sunipa Dev Emily Sheng Jieyu Zhao Aubrie Amstutz Jiao Sun Yu Hou Mattie Sanseverino Jiin Kim Akihiro Nishi Nanyun Peng et al. 2021. On measures of biases and harms in NLP. arXiv preprint arXiv:2108.03362 (2021)."},{"key":"e_1_3_2_1_5_1","volume-title":"Cross-modality Image Interpretation via Concept Decomposition Vector of Visual-language Models","author":"Fang Zhengqing","year":"2024","unstructured":"Zhengqing Fang, Zhouhang Yuan, Ziyu Li, Jingyuan Chen, Kun Kuang, Yu-feng Yao, and Fei Wu. 2024. Cross-modality Image Interpretation via Concept Decomposition Vector of Visual-language Models. IEEE Transactions on Circuits and Systems for Video Technology (2024)."},{"key":"e_1_3_2_1_6_1","volume-title":"Social chemistry 101: Learning to reason about social and moral norms. arXiv preprint arXiv:2011.00620","author":"Forbes Maxwell","year":"2020","unstructured":"Maxwell Forbes, Jena D Hwang, Vered Shwartz, Maarten Sap, and Yejin Choi. 2020. Social chemistry 101: Learning to reason about social and moral norms. arXiv preprint arXiv:2011.00620 (2020)."},{"key":"e_1_3_2_1_7_1","volume-title":"Chatglm: A family of large language models from glm-130b to glm-4 all tools. arXiv preprint arXiv:2406.12793","author":"Aohan Zeng Team GLM","year":"2024","unstructured":"Team GLM, Aohan Zeng, Bin Xu, Bowen Wang, Chenhui Zhang, Da Yin, Dan Zhang, Diego Rojas, Guanyu Feng, Hanlin Zhao, et al., 2024. Chatglm: A family of large language models from glm-130b to glm-4 all tools. arXiv preprint arXiv:2406.12793 (2024)."},{"key":"e_1_3_2_1_8_1","unstructured":"Google. 2024. Gemini Pro 1.5 - Google DeepMind Official Website. https:\/\/blog.google\/products\/ai\/gemini-1-5-pro\/ Accessed: 2024-08-26."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01138"},{"key":"e_1_3_2_1_10_1","volume-title":"VIVA: A Benchmark for Vision-Grounded Decision-Making with Human Values. arXiv preprint arXiv:2407.03000","author":"Hu Zhe","year":"2024","unstructured":"Zhe Hu, Yixiao Ren, Jing Li, and Yu Yin. 2024. VIVA: A Benchmark for Vision-Grounded Decision-Making with Human Values. arXiv preprint arXiv:2407.03000 (2024)."},{"key":"e_1_3_2_1_11_1","volume-title":"Proceedings of the 41st International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"20270","author":"Huang Yue","year":"2024","unstructured":"Yue Huang, Lichao Sun, Haoran Wang, Siyuan Wu, Qihui Zhang, Yuan Li, Chujie Gao, et al., 2024. Position: TrustLLM: Trustworthiness in Large Language Models. In Proceedings of the 41st International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 235), Ruslan Salakhutdinov, Zico Kolter, Katherine Heller, Adrian Weller, Nuria Oliver, Jonathan Scarlett, and Felix Berkenkamp (Eds.). PMLR, 20166-20270. https:\/\/proceedings.mlr.press\/v235\/huang24x.html"},{"key":"e_1_3_2_1_12_1","volume-title":"Trustgpt: A benchmark for trustworthy and responsible large language models. arXiv preprint arXiv:2306.11507","author":"Huang Yue","year":"2023","unstructured":"Yue Huang, Qihui Zhang, Lichao Sun, et al., 2023. Trustgpt: A benchmark for trustworthy and responsible large language models. arXiv preprint arXiv:2306.11507 (2023)."},{"key":"e_1_3_2_1_13_1","volume-title":"Multimodal bias: Introducing a framework for stereotypical bias assessment beyond gender and race in vision language models. arXiv preprint arXiv:2303.12734","author":"Janghorbani Sepehr","year":"2023","unstructured":"Sepehr Janghorbani and Gerard De Melo. 2023. Multimodal bias: Introducing a framework for stereotypical bias assessment beyond gender and race in vision language models. arXiv preprint arXiv:2303.12734 (2023)."},{"key":"e_1_3_2_1_14_1","volume-title":"Wenhao Zheng, Yiyang Zhou, Yifan Mai, Josselin Somerville Roberts, Michihiro Yasunaga, Huaxiu Yao, Cihang Xie, et al.","author":"Lee Tony","year":"2024","unstructured":"Tony Lee, Haoqin Tu, Chi Heem Wong, Wenhao Zheng, Yiyang Zhou, Yifan Mai, Josselin Somerville Roberts, Michihiro Yasunaga, Huaxiu Yao, Cihang Xie, et al., 2024. VHELM: A Holistic Evaluation of Vision Language Models. arXiv preprint arXiv:2410.07112 (2024)."},{"key":"e_1_3_2_1_15_1","volume-title":"Weng Lam Tam, et al","author":"Liu Xiao","year":"2023","unstructured":"Xiao Liu, Xuanyu Lei, Shengyuan Wang, Yue Huang, Zhuoer Feng, Bosi Wen, Jiale Cheng, Pei Ke, Yifan Xu, Weng Lam Tam, et al., 2023. Alignbench: Benchmarking chinese alignment of large language models. arXiv preprint arXiv:2311.18743 (2023)."},{"key":"e_1_3_2_1_16_1","volume-title":"Sora: A review on background, technology, limitations, and opportunities of large vision models. arXiv preprint arXiv:2402.17177","author":"Liu Yixin","year":"2024","unstructured":"Yixin Liu, Kai Zhang, Yuan Li, Zhiling Yan, Chujie Gao, Ruoxi Chen, Zhengqing Yuan, Yue Huang, Hanchi Sun, Jianfeng Gao, et al., 2024. Sora: A review on background, technology, limitations, and opportunities of large vision models. arXiv preprint arXiv:2402.17177 (2024)."},{"key":"e_1_3_2_1_17_1","unstructured":"Meta. 2024. Llama 3.2: Revolutionizing edge AI and vision with open customizable models. https:\/\/ai.meta.com\/blog\/llama-3-2-connect-2024-vision-edge-mobile-devices\/"},{"key":"e_1_3_2_1_18_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Mu Yao","year":"2024","unstructured":"Yao Mu, Qinglong Zhang, Mengkang Hu, Wenhai Wang, Mingyu Ding, Jun Jin, Bin Wang, Jifeng Dai, Yu Qiao, and Ping Luo. 2024. Embodiedgpt: Vision-language pre-training via embodied chain of thought. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.416"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.154"},{"key":"e_1_3_2_1_21_1","unstructured":"OpenAI. 2024. GPT-4O Mini: Advancing Cost-Efficient Intelligence. https:\/\/openai.com\/index\/gpt-4o-mini-advancing-cost-efficient-intelligence\/. Accessed: 2024-09-04."},{"key":"e_1_3_2_1_22_1","unstructured":"OpenAI. 2024. GPT-4o System Card. https:\/\/openai.com\/index\/gpt-4o-system-card\/ Accessed: 2024-09-02."},{"key":"e_1_3_2_1_23_1","unstructured":"Alexis Roger. 2024. Training large multimodal language models with ethical values. (2024)."},{"key":"e_1_3_2_1_24_1","volume-title":"Towards ethical multimodal systems. arXiv preprint arXiv:2304.13765","author":"Roger Alexis","year":"2023","unstructured":"Alexis Roger, Esma A\u00efmeur, and Irina Rish. 2023. Towards ethical multimodal systems. arXiv preprint arXiv:2304.13765 (2023)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01348"},{"key":"e_1_3_2_1_26_1","volume-title":"A unified framework and dataset for assessing gender bias in vision-language models. arXiv preprint arXiv:2402.13636","author":"Sathe Ashutosh","year":"2024","unstructured":"Ashutosh Sathe, Prachi Jain, and Sunayana Sitaram. 2024. A unified framework and dataset for assessing gender bias in vision-language models. arXiv preprint arXiv:2402.13636 (2024)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00659"},{"key":"e_1_3_2_1_28_1","volume-title":"Assessment of multimodal large language models in alignment with human values. arXiv preprint arXiv:2403.17830","author":"Shi Zhelun","year":"2024","unstructured":"Zhelun Shi, Zhipin Wang, Hongxing Fan, Zaibin Zhang, Lijun Li, Yongting Zhang, Zhenfei Yin, Lu Sheng, Yu Qiao, and Jing Shao. 2024. Assessment of multimodal large language models in alignment with human values. arXiv preprint arXiv:2403.17830 (2024)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01319"},{"key":"e_1_3_2_1_30_1","volume-title":"Sight Beyond Text: Multi-Modal Training Enhances LLMs in Truthfulness and Ethics. In NeurIPS 2023 Workshop on Instruction Tuning and Instruction Following.","author":"Tu Haoqin","year":"2023","unstructured":"Haoqin Tu, Bingchen Zhao, Chen Wei, and Cihang Xie. 2023. Sight Beyond Text: Multi-Modal Training Enhances LLMs in Truthfulness and Ethics. In NeurIPS 2023 Workshop on Instruction Tuning and Instruction Following."},{"key":"e_1_3_2_1_31_1","volume-title":"Qwen2-VL: Enhancing Vision-Language Model's Perception of the World at Any Resolution. arXiv preprint arXiv:2409.12191","author":"Wang Peng","year":"2024","unstructured":"Peng Wang, Shuai Bai, Sinan Tan, Shijie Wang, Zhihao Fan, Jinze Bai, Keqin Chen, Xuejing Liu, Jialin Wang, Wenbin Ge, Yang Fan, Kai Dang, Mengfei Du, Xuancheng Ren, Rui Men, Dayiheng Liu, Chang Zhou, Jingren Zhou, and Junyang Lin. 2024a. Qwen2-VL: Enhancing Vision-Language Model's Perception of the World at Any Resolution. arXiv preprint arXiv:2409.12191 (2024)."},{"key":"e_1_3_2_1_32_1","volume-title":"ACM Multimedia","author":"Wang Zecheng","year":"2024","unstructured":"Zecheng Wang, Xinye Li, Zhanyue Qin, Chunshan Li, Zhiying Tu, Dianhui Chu, and Dianbo Sui. 2024b. Can We Debiase Multimodal Large Language Models via Model Editing?. In ACM Multimedia 2024. https:\/\/openreview.net\/forum?id=ybqqGTWuhj"},{"key":"e_1_3_2_1_33_1","volume-title":"Benchmarking Gender Bias in Vision Language Models via Counterfactual Probing. arXiv preprint arXiv:2407.00600","author":"Xiao Yisong","year":"2024","unstructured":"Yisong Xiao, Aishan Liu, QianJia Cheng, Zhenfei Yin, Siyuan Liang, Jiapeng Li, Jing Shao, Xianglong Liu, and Dacheng Tao. 2024. GenderBias-emph {VL}: Benchmarking Gender Bias in Vision Language Models via Counterfactual Probing. arXiv preprint arXiv:2407.00600 (2024)."},{"key":"e_1_3_2_1_34_1","volume-title":"Benchmarking Trustworthiness of Multimodal Large Language Models: A Comprehensive Study. ArXiv","author":"Zhang Yichi","year":"2024","unstructured":"Yichi Zhang, Yao Huang, Yitong Sun, Chang Liu, Zhe Zhao, Zhengwei Fang, Yifan Wang, Huanran Chen, Xiao Yang, Xingxing Wei, Hang Su, Yinpeng Dong, and Jun Zhu. 2024. Benchmarking Trustworthiness of Multimodal Large Language Models: A Comprehensive Study. ArXiv, Vol. abs\/2406.07057 (2024). https:\/\/api.semanticscholar.org\/CorpusID:270379776"},{"key":"e_1_3_2_1_35_1","first-page":"46595","article-title":"Judging llm-as-a-judge with mt-bench and chatbot arena","volume":"36","author":"Zheng Lianmin","year":"2023","unstructured":"Lianmin Zheng, Wei-Lin Chiang, Ying Sheng, Siyuan Zhuang, Zhanghao Wu, Yonghao Zhuang, Zi Lin, Zhuohan Li, Dacheng Li, Eric Xing, et al., 2023. Judging llm-as-a-judge with mt-bench and chatbot arena. Advances in Neural Information Processing Systems, Vol. 36 (2023), 46595-46623.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_36_1","volume-title":"Vlstereoset: A study of stereotypical bias in pre-trained vision-language models","author":"Zhou Kankan","year":"2022","unstructured":"Kankan Zhou, Yibin LAI, and Jing Jiang. 2022. Vlstereoset: A study of stereotypical bias in pre-trained vision-language models. Association for Computational Linguistics."}],"event":{"name":"CIKM '25: The 34th ACM International Conference on Information and Knowledge Management","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Seoul Republic of Korea","acronym":"CIKM '25"},"container-title":["Proceedings of the 34th ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746252.3760884","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T01:15:10Z","timestamp":1765502110000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746252.3760884"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,10]]},"references-count":36,"alternative-id":["10.1145\/3746252.3760884","10.1145\/3746252"],"URL":"https:\/\/doi.org\/10.1145\/3746252.3760884","relation":{},"subject":[],"published":{"date-parts":[[2025,11,10]]},"assertion":[{"value":"2025-11-10","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}