{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T16:20:43Z","timestamp":1773246043598,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":41,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Science Foundation of China","award":["62376116, 62176120"],"award-info":[{"award-number":["62376116, 62176120"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681251","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:33Z","timestamp":1729925973000},"page":"10707-10715","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["Hallu-PI: Evaluating Hallucination in Multi-modal Large Language Models within Perturbed Inputs"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7814-6606","authenticated-orcid":false,"given":"Peng","family":"Ding","sequence":"first","affiliation":[{"name":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2586-330X","authenticated-orcid":false,"given":"Jingyu","family":"Wu","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8189-9449","authenticated-orcid":false,"given":"Jun","family":"Kuang","sequence":"additional","affiliation":[{"name":"Meituan, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9701-2750","authenticated-orcid":false,"given":"Dan","family":"Ma","sequence":"additional","affiliation":[{"name":"Meituan, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7044-1341","authenticated-orcid":false,"given":"Xuezhi","family":"Cao","sequence":"additional","affiliation":[{"name":"Meituan, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0685-9556","authenticated-orcid":false,"given":"Xunliang","family":"Cai","sequence":"additional","affiliation":[{"name":"Meituan, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3577-5725","authenticated-orcid":false,"given":"Shi","family":"Chen","sequence":"additional","affiliation":[{"name":"Zhejiang-Singapore Innovation and AI Joint Research Lab, Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9859-6898","authenticated-orcid":false,"given":"Jiajun","family":"Chen","sequence":"additional","affiliation":[{"name":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4869-0832","authenticated-orcid":false,"given":"Shujian","family":"Huang","sequence":"additional","affiliation":[{"name":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al. 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_2_1","first-page":"23716","article-title":"Flamingo: a visual language model for few-shot learning","volume":"35","author":"Alayrac Jean-Baptiste","year":"2022","unstructured":"Jean-Baptiste Alayrac, Jeff Donahue, Pauline Luc, Antoine Miech, Iain Barr, Yana Hasson, Karel Lenc, Arthur Mensch, Katherine Millican, Malcolm Reynolds, et al. 2022. Flamingo: a visual language model for few-shot learning. Advances in Neural Information Processing Systems, Vol. 35 (2022), 23716--23736.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_3_1","volume-title":"Qwen-vl: A frontier large vision-language model with versatile abilities. arXiv preprint arXiv:2308.12966","author":"Bai Jinze","year":"2023","unstructured":"Jinze Bai, Shuai Bai, Shusheng Yang, Shijie Wang, Sinan Tan, Peng Wang, Junyang Lin, Chang Zhou, and Jingren Zhou. 2023. Qwen-vl: A frontier large vision-language model with versatile abilities. arXiv preprint arXiv:2308.12966 (2023)."},{"key":"e_1_3_2_1_4_1","volume-title":"Natural language processing with Python: analyzing text with the natural language toolkit. \" O'Reilly Media","author":"Bird Steven","unstructured":"Steven Bird, Ewan Klein, and Edward Loper. 2009. Natural language processing with Python: analyzing text with the natural language toolkit. \" O'Reilly Media, Inc.\"."},{"key":"e_1_3_2_1_5_1","volume-title":"Minigpt-v2: large language model as a unified interface for vision-language multi-task learning. arXiv preprint arXiv:2310.09478","author":"Chen Jun","year":"2023","unstructured":"Jun Chen, Deyao Zhu, Xiaoqian Shen, Xiang Li, Zechun Liu, Pengchuan Zhang, Raghuraman Krishnamoorthi, Vikas Chandra, Yunyang Xiong, and Mohamed Elhoseiny. 2023. Minigpt-v2: large language model as a unified interface for vision-language multi-task learning. arXiv preprint arXiv:2310.09478 (2023)."},{"key":"e_1_3_2_1_6_1","volume-title":"Red Teaming GPT-4V: Are GPT-4V Safe Against Uni\/Multi-Modal Jailbreak Attacks? arXiv preprint arXiv:2404.03411","author":"Chen Shuo","year":"2024","unstructured":"Shuo Chen, Zhen Han, Bailan He, Zifeng Ding, Wenqian Yu, Philip Torr, Volker Tresp, and Jindong Gu. 2024. Red Teaming GPT-4V: Are GPT-4V Safe Against Uni\/Multi-Modal Jailbreak Attacks? arXiv preprint arXiv:2404.03411 (2024)."},{"key":"e_1_3_2_1_7_1","volume-title":"A Wolf in Sheep's Clothing: Generalized Nested Jailbreak Prompts can Fool Large Language Models Easily. arXiv preprint arXiv:2311.08268","author":"Ding Peng","year":"2023","unstructured":"Peng Ding, Jun Kuang, Dan Ma, Xuezhi Cao, Yunsen Xian, Jiajun Chen, and Shujian Huang. 2023. A Wolf in Sheep's Clothing: Generalized Nested Jailbreak Prompts can Fool Large Language Models Easily. arXiv preprint arXiv:2311.08268 (2023)."},{"key":"e_1_3_2_1_8_1","volume-title":"A survey on in-context learning. arXiv preprint arXiv:2301.00234","author":"Dong Qingxiu","year":"2022","unstructured":"Qingxiu Dong, Lei Li, Damai Dai, Ce Zheng, Zhiyong Wu, Baobao Chang, Xu Sun, Jingjing Xu, and Zhifang Sui. 2022. A survey on in-context learning. arXiv preprint arXiv:2301.00234 (2022)."},{"key":"e_1_3_2_1_9_1","volume-title":"Corey Lynch, Aakanksha Chowdhery, Brian Ichter, Ayzaan Wahid, Jonathan Tompson, Quan Vuong, Tianhe Yu, et al.","author":"Driess Danny","year":"2023","unstructured":"Danny Driess, Fei Xia, Mehdi SM Sajjadi, Corey Lynch, Aakanksha Chowdhery, Brian Ichter, Ayzaan Wahid, Jonathan Tompson, Quan Vuong, Tianhe Yu, et al. 2023. Palm-e: An embodied multimodal language model. arXiv preprint arXiv:2303.03378 (2023)."},{"key":"e_1_3_2_1_10_1","volume-title":"Glm: General language model pretraining with autoregressive blank infilling. arXiv preprint arXiv:2103.10360","author":"Du Zhengxiao","year":"2021","unstructured":"Zhengxiao Du, Yujie Qian, Xiao Liu, Ming Ding, Jiezhong Qiu, Zhilin Yang, and Jie Tang. 2021. Glm: General language model pretraining with autoregressive blank infilling. arXiv preprint arXiv:2103.10360 (2021)."},{"key":"e_1_3_2_1_11_1","volume-title":"Mme: A comprehensive evaluation benchmark for multimodal large language models. arXiv preprint arXiv:2306.13394","author":"Fu Chaoyou","year":"2023","unstructured":"Chaoyou Fu, Peixian Chen, Yunhang Shen, Yulei Qin, Mengdan Zhang, Xu Lin, Jinrui Yang, Xiawu Zheng, Ke Li, Xing Sun, et al. 2023. Mme: A comprehensive evaluation benchmark for multimodal large language models. arXiv preprint arXiv:2306.13394 (2023)."},{"key":"e_1_3_2_1_12_1","volume-title":"ImageNet-trained CNNs are biased towards texture","author":"Geirhos Robert","year":"1811","unstructured":"Robert Geirhos, Patricia Rubisch, Claudio Michaelis, Matthias Bethge, Felix A Wichmann, and Wieland Brendel. 2018. ImageNet-trained CNNs are biased towards texture; increasing shape bias improves accuracy and robustness. arXiv preprint arXiv:1811.12231 (2018)."},{"key":"e_1_3_2_1_13_1","volume-title":"Multimodal-gpt: A vision and language model for dialogue with humans. arXiv preprint arXiv:2305.04790","author":"Gong Tao","year":"2023","unstructured":"Tao Gong, Chengqi Lyu, Shilong Zhang, Yudong Wang, Miao Zheng, Qian Zhao, Kuikun Liu, Wenwei Zhang, Ping Luo, and Kai Chen. 2023. Multimodal-gpt: A vision and language model for dialogue with humans. arXiv preprint arXiv:2305.04790 (2023)."},{"key":"e_1_3_2_1_14_1","volume-title":"Detecting and preventing hallucinations in large vision language models. arXiv preprint arXiv:2308.06394","author":"Gunjal Anisha","year":"2023","unstructured":"Anisha Gunjal, Jihan Yin, and Erhan Bas. 2023. Detecting and preventing hallucinations in large vision language models. arXiv preprint arXiv:2308.06394 (2023)."},{"key":"e_1_3_2_1_15_1","volume-title":"Benchmarking neural network robustness to common corruptions and perturbations. arXiv preprint arXiv:1903.12261","author":"Hendrycks Dan","year":"2019","unstructured":"Dan Hendrycks and Thomas Dietterich. 2019. Benchmarking neural network robustness to common corruptions and perturbations. arXiv preprint arXiv:1903.12261 (2019)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Lei Huang Weijiang Yu Weitao Ma Weihong Zhong Zhangyin Feng Haotian Wang Qianglong Chen Weihua Peng Xiaocheng Feng Bing Qin et al. 2023. A survey on hallucination in large language models: Principles taxonomy challenges and open questions. arXiv preprint arXiv:2311.05232 (2023).","DOI":"10.1145\/3703155"},{"key":"e_1_3_2_1_17_1","volume-title":"OPERA: Alleviating Hallucination in Multi-Modal Large Language Models via Over-Trust Penalty and Retrospection-Allocation. arXiv preprint arXiv:2311.17911","author":"Huang Qidong","year":"2023","unstructured":"Qidong Huang, Xiaoyi Dong, Pan Zhang, Bin Wang, Conghui He, Jiaqi Wang, Dahua Lin, Weiming Zhang, and Nenghai Yu. 2023. OPERA: Alleviating Hallucination in Multi-Modal Large Language Models via Over-Trust Penalty and Retrospection-Allocation. arXiv preprint arXiv:2311.17911 (2023)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3571730"},{"key":"e_1_3_2_1_19_1","volume-title":"Seed-bench: Benchmarking multimodal llms with generative comprehension. arXiv preprint arXiv:2307.16125","author":"Li Bohao","year":"2023","unstructured":"Bohao Li, Rui Wang, Guangzhi Wang, Yuying Ge, Yixiao Ge, and Ying Shan. 2023. Seed-bench: Benchmarking multimodal llms with generative comprehension. arXiv preprint arXiv:2307.16125 (2023)."},{"key":"e_1_3_2_1_20_1","volume-title":"Jian-Yun Nie, and Ji-Rong Wen.","author":"Li Junyi","year":"2024","unstructured":"Junyi Li, Jie Chen, Ruiyang Ren, Xiaoxue Cheng, Wayne Xin Zhao, Jian-Yun Nie, and Ji-Rong Wen. 2024. The Dawn After the Dark: An Empirical Study on Factuality Hallucination in Large Language Models. arXiv preprint arXiv:2401.03205 (2024)."},{"key":"e_1_3_2_1_21_1","volume-title":"Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. arXiv preprint arXiv:2301.12597","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. arXiv preprint arXiv:2301.12597 (2023)."},{"key":"e_1_3_2_1_22_1","volume-title":"Wayne Xin Zhao, and Ji-Rong Wen","author":"Li Yifan","year":"2023","unstructured":"Yifan Li, Yifan Du, Kun Zhou, Jinpeng Wang, Wayne Xin Zhao, and Ji-Rong Wen. 2023. Evaluating object hallucination in large vision-language models. arXiv preprint arXiv:2305.10355 (2023)."},{"key":"e_1_3_2_1_23_1","volume-title":"2023 d. Aligning Large Multi-Modal Model with Robust Instruction Tuning. arXiv preprint arXiv:2306.14565","author":"Liu Fuxiao","year":"2023","unstructured":"Fuxiao Liu, Kevin Lin, Linjie Li, Jianfeng Wang, Yaser Yacoob, and Lijuan Wang. 2023 d. Aligning Large Multi-Modal Model with Robust Instruction Tuning. arXiv preprint arXiv:2306.14565 (2023)."},{"key":"e_1_3_2_1_24_1","volume-title":"Improved baselines with visual instruction tuning. arXiv preprint arXiv:2310.03744","author":"Liu Haotian","year":"2023","unstructured":"Haotian Liu, Chunyuan Li, Yuheng Li, and Yong Jae Lee. 2023. Improved baselines with visual instruction tuning. arXiv preprint arXiv:2310.03744 (2023)."},{"key":"e_1_3_2_1_25_1","volume-title":"Visual instruction tuning. arXiv preprint arXiv:2304.08485","author":"Liu Haotian","year":"2023","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2023. Visual instruction tuning. arXiv preprint arXiv:2304.08485 (2023)."},{"key":"e_1_3_2_1_26_1","volume-title":"Mmbench: Is your multi-modal model an all-around player? arXiv preprint arXiv:2307.06281","author":"Liu Yuan","year":"2023","unstructured":"Yuan Liu, Haodong Duan, Yuanhan Zhang, Bo Li, Songyang Zhang, Wangbo Zhao, Yike Yuan, Jiaqi Wang, Conghui He, Ziwei Liu, et al. 2023. Mmbench: Is your multi-modal model an all-around player? arXiv preprint arXiv:2307.06281 (2023)."},{"key":"e_1_3_2_1_27_1","volume-title":"Benchmarking robustness in object detection: Autonomous driving when winter is coming. arXiv preprint arXiv:1907.07484","author":"Michaelis Claudio","year":"2019","unstructured":"Claudio Michaelis, Benjamin Mitzkus, Robert Geirhos, Evgenia Rusak, Oliver Bringmann, Alexander S Ecker, Matthias Bethge, and Wieland Brendel. 2019. Benchmarking robustness in object detection: Autonomous driving when winter is coming. arXiv preprint arXiv:1907.07484 (2019)."},{"key":"e_1_3_2_1_28_1","volume-title":"Benchmarking Robustness of Multimodal Image-Text Models under Distribution Shift. Journal of Data-centric Machine Learning Research","author":"Qiu Jielin","year":"2023","unstructured":"Jielin Qiu, Yi Zhu, Xingjian Shi, Florian Wenzel, Zhiqiang Tang, Ding Zhao, Bo Li, and Mu Li. 2023. Benchmarking Robustness of Multimodal Image-Text Models under Distribution Shift. Journal of Data-centric Machine Learning Research (2023)."},{"key":"e_1_3_2_1_29_1","unstructured":"Gemini Team Rohan Anil Sebastian Borgeaud Yonghui Wu Jean-Baptiste Alayrac Jiahui Yu Radu Soricut Johan Schalkwyk Andrew M Dai Anja Hauth et al. 2023. Gemini: a family of highly capable multimodal models. arXiv preprint arXiv:2312.11805 (2023)."},{"key":"e_1_3_2_1_30_1","volume-title":"Eyes wide shut? exploring the visual shortcomings of multimodal llms. arXiv preprint arXiv:2401.06209","author":"Tong Shengbang","year":"2024","unstructured":"Shengbang Tong, Zhuang Liu, Yuexiang Zhai, Yi Ma, Yann LeCun, and Saining Xie. 2024. Eyes wide shut? exploring the visual shortcomings of multimodal llms. arXiv preprint arXiv:2401.06209 (2024)."},{"key":"e_1_3_2_1_31_1","volume-title":"An llm-free multi-dimensional benchmark for mllms hallucination evaluation. arXiv preprint arXiv:2311.07397","author":"Wang Junyang","year":"2023","unstructured":"Junyang Wang, Yuhang Wang, Guohai Xu, Jing Zhang, Yukai Gu, Haitao Jia, Ming Yan, Ji Zhang, and Jitao Sang. 2023. An llm-free multi-dimensional benchmark for mllms hallucination evaluation. arXiv preprint arXiv:2311.07397 (2023)."},{"key":"e_1_3_2_1_32_1","unstructured":"Junyang Wang Yiyang Zhou Guohai Xu Pengcheng Shi Chenlin Zhao Haiyang Xu Qinghao Ye Ming Yan Ji Zhang Jihua Zhu et al. 2023. Evaluation and analysis of hallucination in large vision-language models. arXiv preprint arXiv:2308.15126 (2023)."},{"key":"e_1_3_2_1_33_1","volume-title":"Cogvlm: Visual expert for pretrained language models. arXiv preprint arXiv:2311.03079","author":"Wang Weihan","year":"2023","unstructured":"Weihan Wang, Qingsong Lv, Wenmeng Yu, Wenyi Hong, Ji Qi, Yan Wang, Junhui Ji, Zhuoyi Yang, Lei Zhao, Xixuan Song, et al. 2023. Cogvlm: Visual expert for pretrained language models. arXiv preprint arXiv:2311.03079 (2023)."},{"key":"e_1_3_2_1_34_1","unstructured":"Fangzhao Wu Yueqi Xie Jingwei Yi Jiawei Shao Justin Curl Lingjuan Lyu Qifeng Chen and Xing Xie. 2023. Defending chatgpt against jailbreak attack via self-reminder. (2023)."},{"key":"e_1_3_2_1_35_1","volume-title":"Cognitive mirage: A review of hallucinations in large language models. arXiv preprint arXiv:2309.06794","author":"Ye Hongbin","year":"2023","unstructured":"Hongbin Ye, Tong Liu, Aijia Zhang, Wei Hua, and Weiqiang Jia. 2023. Cognitive mirage: A review of hallucinations in large language models. arXiv preprint arXiv:2309.06794 (2023)."},{"key":"e_1_3_2_1_36_1","volume-title":"mplug-owl2: Revolutionizing multi-modal large language model with modality collaboration. arXiv preprint arXiv:2311.04257","author":"Ye Qinghao","year":"2023","unstructured":"Qinghao Ye, Haiyang Xu, Jiabo Ye, Ming Yan, Haowei Liu, Qi Qian, Ji Zhang, Fei Huang, and Jingren Zhou. 2023. mplug-owl2: Revolutionizing multi-modal large language model with modality collaboration. arXiv preprint arXiv:2311.04257 (2023)."},{"key":"e_1_3_2_1_37_1","volume-title":"Woodpecker: Hallucination correction for multimodal large language models. arXiv preprint arXiv:2310.16045","author":"Yin Shukang","year":"2023","unstructured":"Shukang Yin, Chaoyou Fu, Sirui Zhao, Tong Xu, Hao Wang, Dianbo Sui, Yunhang Shen, Ke Li, Xing Sun, and Enhong Chen. 2023. Woodpecker: Hallucination correction for multimodal large language models. arXiv preprint arXiv:2310.16045 (2023)."},{"key":"e_1_3_2_1_38_1","volume-title":"HallE-Switch: Rethinking and Controlling Object Existence Hallucinations in Large Vision Language Models for Detailed Caption. arXiv preprint arXiv:2310.01779","author":"Zhai Bohan","year":"2023","unstructured":"Bohan Zhai, Shijia Yang, Xiangchen Zhao, Chenfeng Xu, Sheng Shen, Dongdi Zhao, Kurt Keutzer, Manling Li, Tan Yan, and Xiangjun Fan. 2023. HallE-Switch: Rethinking and Controlling Object Existence Hallucinations in Large Vision Language Models for Detailed Caption. arXiv preprint arXiv:2310.01779 (2023)."},{"key":"e_1_3_2_1_39_1","volume-title":"Yuhang Cao, Chao Xu, Linke Ouyang, Zhiyuan Zhao, Shuangrui Ding, Songyang Zhang, Haodong Duan, Hang Yan, et al.","author":"Zhang Pan","year":"2023","unstructured":"Pan Zhang, Xiaoyi Dong Bin Wang, Yuhang Cao, Chao Xu, Linke Ouyang, Zhiyuan Zhao, Shuangrui Ding, Songyang Zhang, Haodong Duan, Hang Yan, et al. 2023. Internlm-xcomposer: A vision-language large model for advanced text-image comprehension and composition. arXiv preprint arXiv:2309.15112 (2023)."},{"key":"e_1_3_2_1_40_1","volume-title":"Analyzing and mitigating object hallucination in large vision-language models. arXiv preprint arXiv:2310.00754","author":"Zhou Yiyang","year":"2023","unstructured":"Yiyang Zhou, Chenhang Cui, Jaehong Yoon, Linjun Zhang, Zhun Deng, Chelsea Finn, Mohit Bansal, and Huaxiu Yao. 2023. Analyzing and mitigating object hallucination in large vision-language models. arXiv preprint arXiv:2310.00754 (2023)."},{"key":"e_1_3_2_1_41_1","volume-title":"Minigpt-4: Enhancing vision-language understanding with advanced large language models. arXiv preprint arXiv:2304.10592","author":"Zhu Deyao","year":"2023","unstructured":"Deyao Zhu, Jun Chen, Xiaoqian Shen, Xiang Li, and Mohamed Elhoseiny. 2023. Minigpt-4: Enhancing vision-language understanding with advanced large language models. arXiv preprint arXiv:2304.10592 (2023)."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681251","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681251","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:42Z","timestamp":1750295862000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681251"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":41,"alternative-id":["10.1145\/3664647.3681251","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681251","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}