{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:54:40Z","timestamp":1781538880015,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":33,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T00:00:00Z","timestamp":1781481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"Key Science and Technology Innovation Program of the Chinese Academy of Sciences","award":["KGFZD-145-26-44"],"award-info":[{"award-number":["KGFZD-145-26-44"]}]},{"name":"the Key Research and Development Program of Xinjiang Uyghur Autonomous Region","award":["2023B03024"],"award-info":[{"award-number":["2023B03024"]}]},{"name":"Tianshan Talent Training Program","award":["2023TSYCCX0041"],"award-info":[{"award-number":["2023TSYCCX0041"]}]},{"name":"the Key Research and Development Program of Xinjiang Uyghur Autonomous Region","award":["2024B03026"],"award-info":[{"award-number":["2024B03026"]}]},{"name":"the Youth Talents Support Project of Xinjiang Uyghur Autonomous Region","award":["2023TSYCQNTJ0037"],"award-info":[{"award-number":["2023TSYCQNTJ0037"]}]},{"name":"Xinjiang Leading Talents Introduction Program","award":["XJRC-2025-KJ-YJ-CXPT-070"],"award-info":[{"award-number":["XJRC-2025-KJ-YJ-CXPT-070"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,16]]},"DOI":"10.1145\/3805622.3810597","type":"proceedings-article","created":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:42:57Z","timestamp":1781534577000},"page":"1740-1748","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Ask-to-Retrieve: VQA-Guided Information-Gain Question Selection for Interactive Text-to-Image Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-4079-0349","authenticated-orcid":false,"given":"Shuaiwei","family":"Xie","sequence":"first","affiliation":[{"name":"Xinjiang Technical Institute of Physics &amp; Chemistry, Chinese Academy of Sciences, Urumqi, China; University of Chinese Academy of Sciences, Beijing, China and Xinjiang Laboratory of Minority Speech and Language Information Processing, Urumqi, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2639-3944","authenticated-orcid":false,"given":"Yating","family":"Yang","sequence":"additional","affiliation":[{"name":"Xinjiang Technical Institute of Physics &amp; Chemistry, Chinese Academy of Sciences, Urumqi, China; University of Chinese Academy of Sciences, Beijing, China and Xinjiang Laboratory of Minority Speech and Language Information Processing, Urumqi, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3082-648X","authenticated-orcid":false,"given":"Bo","family":"Ma","sequence":"additional","affiliation":[{"name":"Xinjiang Technical Institute of Physics &amp; Chemistry, Chinese Academy of Sciences, Urumqi, China; University of Chinese Academy of Sciences, Beijing, China and Xinjiang Laboratory of Minority Speech and Language Information Processing, Urumqi, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2867-1765","authenticated-orcid":false,"given":"Xi","family":"Zhou","sequence":"additional","affiliation":[{"name":"Xinjiang Technical Institute of Physics &amp; Chemistry, Chinese Academy of Sciences, Urumqi, China; University of Chinese Academy of Sciences, Beijing, China and Xinjiang Laboratory of Minority Speech and Language Information Processing, Urumqi, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9285-3040","authenticated-orcid":false,"given":"Zhen","family":"Wang","sequence":"additional","affiliation":[{"name":"Xinjiang Technical Institute of Physics &amp; Chemistry, Chinese Academy of Sciences, Urumqi, China; University of Chinese Academy of Sciences, Beijing, China and Xinjiang Laboratory of Minority Speech and Language Information Processing, Urumqi, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6660-8989","authenticated-orcid":false,"given":"Ahtamjan","family":"Ahmat","sequence":"additional","affiliation":[{"name":"Xinjiang Technical Institute of Physics &amp; Chemistry, Chinese Academy of Sciences, Urumqi, China; University of Chinese Academy of Sciences, Beijing, China and Xinjiang Laboratory of Minority Speech and Language Information Processing, Urumqi, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,15]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"Erdem B\u0131y\u0131k Kenneth Wang Nima Anari and Dorsa Sadigh. 2019. Batch active learning using determinantal point processes. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1906.07975 (2019)."},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1145\/1390334.1390377"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"crossref","unstructured":"Kathryn Chaloner and Isabella Verdinelli. 1995. Bayesian experimental design: A review. Statistical science (1995) 273\u2013304.","DOI":"10.1214\/ss\/1177009939"},{"key":"e_1_3_3_1_5_2","unstructured":"Laming Chen Guoxin Zhang and Eric Zhou. 2018. Fast greedy map inference for determinantal point process to improve recommendation diversity. Advances in neural information processing systems 31 (2018)."},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-8_7"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.acl-long.735"},{"key":"e_1_3_3_1_8_2","unstructured":"Daniel Golovin and Andreas Krause. 2011. Adaptive submodularity: Theory and applications in active learning and stochastic optimization. Journal of Artificial Intelligence Research 42 (2011) 427\u2013486."},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i16.33922"},{"key":"e_1_3_3_1_10_2","first-page":"4904","volume-title":"International conference on machine learning","author":"Jia Chao","year":"2021","unstructured":"Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc Le, Yun-Hsuan Sung, Zhen Li, and Tom Duerig. 2021. Scaling up visual and vision-language representation learning with noisy text supervision. In International conference on machine learning. PMLR, 4904\u20134916."},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00740"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-naacl.276"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"crossref","unstructured":"Byeong\u00a0Su Kim Jieun Kim Deokwoo Lee and Beakcheol Jang. 2025. Visual question answering: A survey of methods datasets evaluation and challenges. Comput. Surveys 57 10 (2025) 1\u201335.","DOI":"10.1145\/3728635"},{"key":"e_1_3_3_1_14_2","unstructured":"Andreas Krause H\u00a0Brendan McMahan Carlos Guestrin and Anupam Gupta. 2008. Robust Submodular Observation Selection.Journal of Machine Learning Research 9 12 (2008)."},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"crossref","unstructured":"Jiayi Kuang Ying Shen Jingyou Xie Haohao Luo Zhe Xu Ronghao Li Yinghui Li Xianfeng Cheng Xika Lin and Yu Han. 2025. Natural language understanding and inference with MLLM in visual question answering: A survey. Comput. Surveys 57 8 (2025) 1\u201336.","DOI":"10.1145\/3711680"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.46"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"crossref","unstructured":"Matan Levy Rami Ben-Ari Nir Darshan and Dani Lischinski. 2023. Chatting makes perfect: Chat-based image retrieval. Advances in Neural Information Processing Systems 36 (2023) 61437\u201361449.","DOI":"10.52202\/075280-2684"},{"key":"e_1_3_3_1_18_2","first-page":"12888","volume-title":"International conference on machine learning","author":"Li Junnan","year":"2022","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven Hoi. 2022. BLIP: Bootstrapping language-image pre-training for unified vision-language understanding and generation. In International conference on machine learning. PMLR, 12888\u201312900."},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02240"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.639"},{"key":"e_1_3_3_1_21_2","unstructured":"Aixin Liu Aoxue Mei Bangcai Lin Bing Xue Bingxuan Wang Bingzheng Xu Bochao Wu Bowei Zhang Chaofan Lin Chen Dong et\u00a0al. 2025. Deepseek-v3. 2: Pushing the frontier of open large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2512.02556 (2025)."},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"crossref","unstructured":"Zejun Liu Fanglin Chen Jun Xu Wenjie Pei and Guangming Lu. 2022. Image-text retrieval with cross-modal semantic importance consistency. IEEE Transactions on Circuits and Systems for Video Technology 33 5 (2022) 2465\u20132476.","DOI":"10.1109\/TCSVT.2022.3220297"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.1145\/3726302.3729950"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.322"},{"key":"e_1_3_3_1_25_2","first-page":"8748","volume-title":"International conference on machine learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748\u20138763."},{"key":"e_1_3_3_1_26_2","unstructured":"Joseph\u00a0John Rocchio\u00a0Jr. 1971. Relevance feedback in information retrieval. The SMART retrieval system: experiments in automatic document processing (1971)."},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"crossref","unstructured":"Bart Thomee and Michael\u00a0S Lew. 2012. Interactive search in image retrieval: a survey. International Journal of Multimedia Information Retrieval 1 2 (2012) 71\u201386.","DOI":"10.1007\/s13735-012-0014-4"},{"key":"e_1_3_3_1_28_2","unstructured":"Chunyu Xie Bin Wang Fanjing Kong Jincheng Li Dawei Liang Ji Ao Dawei Leng and Yuhui Yin. 2025. FG-CLIP 2: A Bilingual Fine-grained Vision-Language Alignment Model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2510.10921 (2025)."},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548107"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.1145\/1571941.1571954"},{"key":"e_1_3_3_1_31_2","unstructured":"An Yang Anfeng Li Baosong Yang Beichen Zhang Binyuan Hui Bo Zheng Bowen Yu Chang Gao Chengen Huang Chenxu Lv et\u00a0al. 2025. Qwen3 technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2505.09388 (2025)."},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.398"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/1835449.1835511"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"crossref","unstructured":"Kun Zhou Fadratul\u00a0Hafinaz Hassan and Gan\u00a0Keng Hoon. 2023. The state of the art for cross-modal retrieval: A survey. IEEE Access 11 (2023) 138568\u2013138589.","DOI":"10.1109\/ACCESS.2023.3338548"}],"event":{"name":"ICMR '26: International Conference on Multimedia Retrieval","location":"Amsterdam The Netherlands","acronym":"ICMR '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2026 International Conference on Multimedia Retrieval"],"original-title":[],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:03:38Z","timestamp":1781535818000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805622.3810597"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,15]]},"references-count":33,"alternative-id":["10.1145\/3805622.3810597","10.1145\/3805622"],"URL":"https:\/\/doi.org\/10.1145\/3805622.3810597","relation":{},"subject":[],"published":{"date-parts":[[2026,6,15]]},"assertion":[{"value":"2026-06-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}