{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:55:30Z","timestamp":1781538930563,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":70,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T00:00:00Z","timestamp":1781481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,16]]},"DOI":"10.1145\/3805622.3810694","type":"proceedings-article","created":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:42:57Z","timestamp":1781534577000},"page":"1749-1758","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Agent-Based Query Reformulation: Simulating Feedback and Mitigating Negation Blindness in Interactive Image Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-0298-0905","authenticated-orcid":false,"given":"Hongyi","family":"Zhu","sequence":"first","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1595-3619","authenticated-orcid":false,"given":"Shuai","family":"Wang","sequence":"additional","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7943-2591","authenticated-orcid":false,"given":"Jia-Hong","family":"Huang","sequence":"additional","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8447-872X","authenticated-orcid":false,"given":"Yixian","family":"Shen","sequence":"additional","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1904-8736","authenticated-orcid":false,"given":"Stevan","family":"Rudinac","sequence":"additional","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8312-0694","authenticated-orcid":false,"given":"Evangelos","family":"Kanoulas","sequence":"additional","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,15]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612664"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"crossref","unstructured":"Gianni Amati and C\u00a0J\u00a0Van Rijsbergen. 2002. Probabilistic models of information retrieval based on measuring the divergence from randomness. ACM Trans. Inf. Syst. 20 (2002) 357\u2013389.","DOI":"10.1145\/582415.582416"},{"key":"e_1_3_3_2_4_2","unstructured":"Shuai Bai Keqin Chen Xuejing Liu Jialin Wang Wenbin Ge Sibo Song Kai Dang Peng Wang Shijie Wang Jun Tang et\u00a0al. 2025. Qwen2. 5-vl technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2502.13923 (2025)."},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02080"},{"key":"e_1_3_3_2_6_2","unstructured":"Pia Borlund. 2003. The IIR evaluation model: a framework for evaluation of interactive information retrieval systems. Inf. Res. 8 (2003)."},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"crossref","unstructured":"Alvaro Casado-Coscolla Carlos Sanchez-Belenguer Erik Wolfart Carlos Angorrilla-Bustamante and Vitor Sequeira. 2024. Active learning for image retrieval via visual similarity metrics and semantic features. Engineering Applications of Artificial Intelligence 138 (2024) 109239.","DOI":"10.1016\/j.engappai.2024.109239"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3450127"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00307"},{"key":"e_1_3_3_2_10_2","volume-title":"The Thirty-ninth Annual Conference on Neural Information Processing Systems","author":"Feng Yanglin","year":"2025","unstructured":"Yanglin Feng, Yongxiang Li, Yuan Sun, Yang Qin, Dezhong Peng, and Peng Hu. 2025. Interactive Cross-modal Learning for Text-3D Scene Retrieval. In The Thirty-ninth Annual Conference on Neural Information Processing Systems."},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.194"},{"key":"e_1_3_3_2_12_2","unstructured":"Xiaoxiao Guo Hui Wu Yu Cheng Steven Rennie Gerald Tesauro and Rogerio Feris. 2018. Dialog-based interactive image retrieval. Advances in neural information processing systems 31 (2018)."},{"key":"e_1_3_3_2_13_2","unstructured":"Xiaoxiao Guo Hui Wu Yupeng Gao Steven Rennie and Rogerio Feris. 2019. The fashion iq dataset: Retrieving images by combining side information and relative natural language feedback. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1905.12794 1 2 (2019) 7."},{"key":"e_1_3_3_2_14_2","unstructured":"Donghoon Han Eunhwan Park Gisang Lee Adam Lee and Nojun Kwak. 2024. Merlin: Multimodal embedding refinement via llm-based iterative navigation for text-video retrieval-rerank pipeline. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.12508 (2024)."},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.163"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.1109\/IGARSS52108.2023.10281811"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-98355-0_43"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i23.34587"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-96-2071-5_30"},{"key":"e_1_3_3_2_20_2","unstructured":"Ting Jiang Minghui Song Zihan Zhang Haizhen Huang Weiwei Deng Feng Sun Qi Zhang Deqing Wang and Fuzhen Zhuang. 2024. E5-v: Universal embeddings with multimodal large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.12580 (2024)."},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1145\/3643489.3661132"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-53302-0_31"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","unstructured":"Christoph Kofler Martha Larson and Alan Hanjalic. 2016. User Intent in Multimedia Search: A Survey of the State of the Art and Future Challenges. ACM Comput. Surv. 49 2 Article 36 (Aug. 2016) 37\u00a0pages. 10.1145\/2954930","DOI":"10.1145\/2954930"},{"key":"e_1_3_3_2_24_2","unstructured":"Saehyung Lee Sangwon Yu Junsung Park Jihun Yi and Sungroh Yoon. 2024. Interactive text-to-image retrieval with large language models: A plug-and-play approach. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.03411 (2024)."},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"crossref","unstructured":"Matan Levy Rami Ben-Ari Nir Darshan and Dani Lischinski. 2023. Chatting makes perfect: Chat-based image retrieval. Advances in Neural Information Processing Systems 36 (2023) 61437\u201361449.","DOI":"10.52202\/075280-2684"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i4.28081"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"crossref","unstructured":"Wei-Chao Lin. 2019. Aggregation of multiple pseudo relevance feedbacks for image search re-ranking. IEEE Access 7 (2019) 147553\u2013147559.","DOI":"10.1109\/ACCESS.2019.2942142"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.542"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00213"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-27077-2_50"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/3726302.3729950"},{"key":"e_1_3_3_2_33_2","unstructured":"Xiaopeng Lu Tiancheng Zhao and Kyusong Lee. 2021. VisualSparta: an embarrassingly simple approach to large-scale text-to-image search with weighted bag-of-words. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2101.00265 (2021)."},{"key":"e_1_3_3_2_34_2","unstructured":"Yiding Lu Mouxing Yang Dezhong Peng Peng Hu Yijie Lin and Xi Peng. 2025. LLaVA-ReID: Selective Multi-image Questioner for Interactive Person Re-Identification. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2504.10174 (2025)."},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591992"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"crossref","unstructured":"Nicola Messina Giuseppe Amato Andrea Esuli Fabrizio Falchi Claudio Gennaro and St\u00e9phane Marchand-Maillet. 2021. Fine-grained visual textual alignment for cross-modal retrieval using transformer encoders. ACM Transactions on Multimedia Computing Communications and Applications (TOMM) 17 4 (2021) 1\u201323.","DOI":"10.1145\/3451390"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.findings-acl.363"},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.63"},{"key":"e_1_3_3_2_39_2","unstructured":"Jaimie\u00a0Yejean Park Neil O\u2019Hare Rossano Schifanella Alejandro Jaimes and Chin-Wan Chung. 2015. A Large-Scale Study of User Image Search Behavior on the Web. Proceedings of the 33rd Annual ACM Conference on Human Factors in Computing Systems (2015). https:\/\/api.semanticscholar.org\/CorpusID:6798370"},{"key":"e_1_3_3_2_40_2","volume-title":"Forty-second International Conference on Machine Learning","author":"Pu Ruitao","unstructured":"Ruitao Pu, Yang Qin, Xiaomin Song, Dezhong Peng, Zhenwen Ren, and Yuan Sun. [n. d.]. SHE: Streaming-media Hashing Retrieval. In Forty-second International Conference on Machine Learning."},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01342"},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-96-2074-6_46"},{"key":"e_1_3_3_2_43_2","doi-asserted-by":"publisher","DOI":"10.1145\/3643489.3661123"},{"key":"e_1_3_3_2_44_2","first-page":"8748","volume-title":"International conference on machine learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748\u20138763."},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"crossref","unstructured":"Rafael Rafailov Archit Sharma Eric Mitchell Christopher\u00a0D Manning Stefano Ermon and Chelsea Finn. 2023. Direct preference optimization: Your language model is secretly a reward model. Advances in neural information processing systems 36 (2023) 53728\u201353741.","DOI":"10.52202\/075280-2338"},{"key":"e_1_3_3_2_46_2","doi-asserted-by":"crossref","unstructured":"Stephen\u00a0E. Robertson. 1991. On Term Selection for Query Expansion. J. Documentation 46 (1991) 359\u2013364.","DOI":"10.1108\/eb026866"},{"key":"e_1_3_3_2_47_2","unstructured":"J.\u00a0J. Rocchio. 1971. Relevance feedback in information retrieval."},{"key":"e_1_3_3_2_48_2","doi-asserted-by":"crossref","unstructured":"Luca Rossetto Ralph Gasser Jakub Loko\u010d Werner Bailer Klaus Schoeffmann Bernd Muenzer Tom\u00e1\u0161 Sou\u010dek Phuong\u00a0Anh Nguyen Paolo Bolettieri Andreas Leibetseder et\u00a0al. 2020. Interactive video retrieval in the age of deep learning\u2013detailed evaluation of VBS 2019. IEEE transactions on multimedia 23 (2020) 243\u2013256.","DOI":"10.1109\/TMM.2020.2980944"},{"key":"e_1_3_3_2_49_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.acl-long.1006"},{"key":"e_1_3_3_2_50_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.naacl-long.522"},{"key":"e_1_3_3_2_51_2","doi-asserted-by":"publisher","unstructured":"A.W.M. Smeulders M. Worring S. Santini A. Gupta and R. Jain. 2000. Content-based image retrieval at the end of the early years. IEEE Transactions on Pattern Analysis and Machine Intelligence 22 12 (2000) 1349\u20131380. 10.1109\/34.895972","DOI":"10.1109\/34.895972"},{"key":"e_1_3_3_2_52_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i19.34271"},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01343"},{"key":"e_1_3_3_2_54_2","doi-asserted-by":"crossref","unstructured":"Ly-Duyen Tran Manh-Duy Nguyen Duc-Tien Dang-Nguyen Silvan Heller Florian Spiess Jakub Loko\u010d Ladislav Pe\u0161ka Thao-Nhu Nguyen Omar\u00a0Shahbaz Khan Aaron Duane et\u00a0al. 2023. Comparing interactive retrieval approaches at the lifelog search challenge 2021. IEEE Access 11 (2023) 30982\u201330995.","DOI":"10.1109\/ACCESS.2023.3248284"},{"key":"e_1_3_3_2_55_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-98355-0_55"},{"key":"e_1_3_3_2_56_2","unstructured":"Songjun Tu Jiahao Lin Xiangyu Tian Qichao Zhang Linjing Li Yuqian Fu Nan Xu Wei He Xiangyuan Lan Dongmei Jiang et\u00a0al. 2025. Enhancing LLM Reasoning with Iterative DPO: A Comprehensive Empirical Investigation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2503.12854 (2025)."},{"key":"e_1_3_3_2_57_2","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548263"},{"key":"e_1_3_3_2_58_2","doi-asserted-by":"publisher","DOI":"10.1145\/3746027.3755673"},{"key":"e_1_3_3_2_59_2","unstructured":"Shuai Wang Hongyi Zhu Jia-Hong Huang Yixian Shen Chengxi Zeng Stevan Rudinac Monika Kackovic Nachoem Wijnberg and Marcel Worring. 2026. A-MAR: Agent-based Multimodal Art Retrieval for Fine-Grained Artwork Understanding. arxiv:https:\/\/arXiv.org\/abs\/2604.19689\u00a0[cs.AI] https:\/\/arxiv.org\/abs\/2604.19689"},{"key":"e_1_3_3_2_60_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01115"},{"key":"e_1_3_3_2_61_2","doi-asserted-by":"publisher","DOI":"10.1145\/3543829.3543837"},{"key":"e_1_3_3_2_62_2","unstructured":"An Yang Anfeng Li Baosong Yang Beichen Zhang Binyuan Hui Bo Zheng Bowen Yu Chang Gao Chengen Huang Chenxu Lv et\u00a0al. 2025. Qwen3 technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2505.09388 (2025)."},{"key":"e_1_3_3_2_63_2","doi-asserted-by":"crossref","unstructured":"Peter Young Alice Lai Micah Hodosh and Julia Hockenmaier. 2014. From image descriptions to visual denotations: New similarity metrics for semantic inference over event descriptions. Transactions of the Association for Computational Linguistics 2 (2014) 67\u201378.","DOI":"10.1162\/tacl_a_00166"},{"key":"e_1_3_3_2_64_2","unstructured":"Dong Yun Marco Schouten and Dim Papadopoulos. 2025. Sherlock Your Queries: Learning to Ask the Right Questions for Dialogue-Based Retrieval. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2510.18659 (2025)."},{"key":"e_1_3_3_2_65_2","doi-asserted-by":"publisher","DOI":"10.1145\/502585.502654"},{"key":"e_1_3_3_2_66_2","doi-asserted-by":"crossref","unstructured":"Tianhua Zhang Kun Li Hongyin Luo Xixin Wu James Glass and Helen Meng. 2024. Adaptive query rewriting: Aligning rewriters through marginal probability of conversational answers. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.10991 (2024).","DOI":"10.18653\/v1\/2024.emnlp-main.746"},{"key":"e_1_3_3_2_67_2","unstructured":"Xin Zhang Yanzhao Zhang Wen Xie Mingxin Li Ziqi Dai Dingkun Long Pengjun Xie Meishan Zhang Wenjie Li and Min Zhang. 2024. GME: Improving Universal Multimodal Retrieval by Multimodal LLMs. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.16855 (2024)."},{"key":"e_1_3_3_2_68_2","unstructured":"Yaowei Zheng Richong Zhang Junhao Zhang Yanhan Ye Zheyan Luo Zhangchi Feng and Yongqiang Ma. 2024. Llamafactory: Unified efficient fine-tuning of 100+ language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.13372 (2024)."},{"key":"e_1_3_3_2_69_2","doi-asserted-by":"publisher","DOI":"10.1145\/3652583.3658032"},{"key":"e_1_3_3_2_70_2","doi-asserted-by":"crossref","unstructured":"Hongyi Zhu Jia-Hong Huang Yixian Shen Stevan Rudinac and Evangelos Kanoulas. 2025. Interactive Image Retrieval Meets Query Rewriting with Large Language and Vision Language Models. ACM Trans. Multimedia Comput. Commun. Appl. (2025).","DOI":"10.1145\/3744910"},{"key":"e_1_3_3_2_71_2","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3592047"}],"event":{"name":"ICMR '26: International Conference on Multimedia Retrieval","location":"Amsterdam The Netherlands","acronym":"ICMR '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2026 International Conference on Multimedia Retrieval"],"original-title":[],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:13:44Z","timestamp":1781536424000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805622.3810694"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,15]]},"references-count":70,"alternative-id":["10.1145\/3805622.3810694","10.1145\/3805622"],"URL":"https:\/\/doi.org\/10.1145\/3805622.3810694","relation":{},"subject":[],"published":{"date-parts":[[2026,6,15]]},"assertion":[{"value":"2026-06-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}