{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,2]],"date-time":"2025-08-02T19:44:42Z","timestamp":1754163882393,"version":"3.41.2"},"publisher-location":"New York, NY, USA","reference-count":13,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1145\/3733566.3734431","type":"proceedings-article","created":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T09:09:47Z","timestamp":1750669787000},"page":"9-13","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["AMF: Adaptive Modality Fusion for Zero-Shot Composed Image Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-1059-6549","authenticated-orcid":false,"given":"Shuping","family":"Hui","sequence":"first","affiliation":[{"name":"University of Science and Technology of China, Hefei, Anhui, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3048-6980","authenticated-orcid":false,"given":"Min","family":"Wang","sequence":"additional","affiliation":[{"name":"Hefei Comprehensive National Science Center, Hefei, Anhui, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2353-3508","authenticated-orcid":false,"given":"Hui","family":"Wu","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, Anhui, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1690-9836","authenticated-orcid":false,"given":"Wengang","family":"Zhou","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, Anhui, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2188-3028","authenticated-orcid":false,"given":"Houqiang","family":"Li","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, Anhui, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,6,30]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"isearle: Improving textual inversion for zero-shot composed image retrieval. arXiv:2405.02951","author":"Agnolucci Lorenzo","year":"2024","unstructured":"Lorenzo Agnolucci, Alberto Baldrati, Marco Bertini, and Alberto Del Bimbo. 2024. isearle: Improving textual inversion for zero-shot composed image retrieval. arXiv:2405.02951 (2024)."},{"doi-asserted-by":"crossref","unstructured":"Alberto Baldrati Lorenzo Agnolucci Marco Bertini and Alberto Del Bimbo. 2023. Zero-Shot Composed Image Retrieval with Textual Inversion. In ICCV. 15338--15347.","key":"e_1_3_2_1_2_1","DOI":"10.1109\/ICCV51070.2023.01407"},{"unstructured":"Yiyang Chen Zhedong Zheng Wei Ji Leigang Qu and Tat-Seng Chua. 2024. Composed image retrieval with text feedback via multi-grained uncertainty regularization. In ICLR.","key":"e_1_3_2_1_3_1"},{"key":"e_1_3_2_1_4_1","volume-title":"Gabriela Csurka, and Diane Larlus.","author":"Delmas Ginger","year":"2022","unstructured":"Ginger Delmas, Rafael Sampaio de Rezende, Gabriela Csurka, and Diane Larlus. 2022. Artemis: Attention-based retrieval with text-explicit matching and implicit similarity. In ICLR."},{"unstructured":"Yongchao Du Min Wang Wengang Zhou Shuping Hui and Houqiang Li. 2024. Image2Sentence based Asymmetrical Zero-Shot Composed Image Retrieval. In ICLR.","key":"e_1_3_2_1_5_1"},{"key":"e_1_3_2_1_6_1","volume-title":"Fashionvlp: Vision language transformer for fashion retrieval with feedback. In CVPR. 14105--14115.","author":"Goenka Sonam","year":"2022","unstructured":"Sonam Goenka, Zhaoheng Zheng, Ayush Jaiswal, Rakesh Chada, Yue Wu, Varsha Hedau, and Pradeep Natarajan. 2022. Fashionvlp: Vision language transformer for fashion retrieval with feedback. In CVPR. 14105--14115."},{"doi-asserted-by":"crossref","unstructured":"Geonmo Gu Sanghyuk Chun Wonjae Kim Yoohoon Kang and Sangdoo Yun. 2024. Language-only Efficient Training of Zero-shot Composed Image Retrieval. In CVPR. 13225--13234.","key":"e_1_3_2_1_7_1","DOI":"10.1109\/CVPR52733.2024.01256"},{"key":"e_1_3_2_1_8_1","volume-title":"Fame-vil: Multi-tasking vision-language model for heterogeneous fashion tasks. In CVPR. 2669--2680.","author":"Han Xiao","year":"2023","unstructured":"Xiao Han, Xiatian Zhu, Licheng Yu, Li Zhang, Yi-Zhe Song, and Tao Xiang. 2023. Fame-vil: Multi-tasking vision-language model for heterogeneous fashion tasks. In CVPR. 2669--2680."},{"doi-asserted-by":"crossref","unstructured":"Young Kyun Jang Dat Huynh Ashish Shah Wen-Kai Chen and Ser-Nam Lim. 2025. Spherical Linear Interpolation and Text-Anchoring for Zero-shot Composed Image Retrieval. In ECCV. 239--254.","key":"e_1_3_2_1_9_1","DOI":"10.1007\/978-3-031-72655-2_14"},{"doi-asserted-by":"crossref","unstructured":"Kuniaki Saito Kihyuk Sohn Xiang Zhang Chun-Liang Li Chen-Yu Lee Kate Saenko and Tomas Pfister. 2023. Pic2Word: Mapping Pictures to Words for Zero-shot Composed Image Retrieval. In CVPR. 19305--19314.","key":"e_1_3_2_1_10_1","DOI":"10.1109\/CVPR52729.2023.01850"},{"unstructured":"Yucheng Suo Fan Ma Linchao Zhu and Yi Yang. 2024. Knowledge-Enhanced Dual-stream Zero-shot Composed Image Retrieval. In CVPR. 26951--26962.","key":"e_1_3_2_1_11_1"},{"doi-asserted-by":"crossref","unstructured":"Yuanmin Tang Jing Yu Keke Gai Jiamin Zhuang Gang Xiong Yue Hu and Qi Wu. 2024. Context-I2W: Mapping Images to Context-dependent Words for Accurate Zero-Shot Composed Image Retrieval. In AAAI. 5180--5188.","key":"e_1_3_2_1_12_1","DOI":"10.1609\/aaai.v38i6.28324"},{"doi-asserted-by":"crossref","unstructured":"Nam Vo Lu Jiang Chen Sun Kevin Murphy Li-Jia Li Li Fei-Fei and James Hays. 2019. Composing Text and Image for Image Retrieval-An Empirical Odyssey. In CVPR. 6439--6448.","key":"e_1_3_2_1_13_1","DOI":"10.1109\/CVPR.2019.00660"}],"event":{"sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"acronym":"ICMR '25","name":"ICMR '25: International Conference on Multimedia Retrieval","location":"Chicago IL USA"},"container-title":["Proceedings of the 6th Workshop on Intelligent Cross-Data Analysis and Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3733566.3734431","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,31]],"date-time":"2025-07-31T16:19:52Z","timestamp":1753978792000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3733566.3734431"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":13,"alternative-id":["10.1145\/3733566.3734431","10.1145\/3733566"],"URL":"https:\/\/doi.org\/10.1145\/3733566.3734431","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]},"assertion":[{"value":"2025-06-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}