{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,5]],"date-time":"2026-04-05T05:28:53Z","timestamp":1775366933462,"version":"3.50.1"},"reference-count":204,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2025,2,27]],"date-time":"2025-02-27T00:00:00Z","timestamp":1740614400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,2,27]],"date-time":"2025-02-27T00:00:00Z","timestamp":1740614400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No. 62477006"],"award-info":[{"award-number":["No. 62477006"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2025,5]]},"DOI":"10.1007\/s10489-025-06372-x","type":"journal-article","created":{"date-parts":[[2025,2,27]],"date-time":"2025-02-27T01:28:31Z","timestamp":1740619711000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Composed image retrieval: a survey on recent research and development"],"prefix":"10.1007","volume":"55","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6911-0852","authenticated-orcid":false,"given":"Yongquan","family":"Wan","sequence":"first","affiliation":[]},{"given":"Guobing","family":"Zou","sequence":"additional","affiliation":[]},{"given":"Bofeng","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,2,27]]},"reference":[{"key":"6372_CR1","doi-asserted-by":"crossref","unstructured":"Vo N, Jiang L, Sun C, Murphy K, Li L-J, Fei-Fei L, Hays J (2019) Composing text and image for image retrieval-an empirical odyssey. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 6439\u20136448","DOI":"10.1109\/CVPR.2019.00660"},{"key":"6372_CR2","doi-asserted-by":"crossref","unstructured":"Lee S, Kim D, Han B (2021) Cosmo: Content-style modulation for image retrieval with text feedback. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 802\u2013812","DOI":"10.1109\/CVPR46437.2021.00086"},{"key":"6372_CR3","unstructured":"Sadeh G, Fritz L, Shalev G, Oks E (2019) Joint visual-textual embedding for multimodal style search. arXiv:1906.06620"},{"key":"6372_CR4","doi-asserted-by":"crossref","unstructured":"Yu L, Chen J, Sinha A, Wang M, Chen Y, Berg TL, Zhang N (2022) Commercemm: large-scale commerce multimodal representation learning with omni retrieval. In: Proceedings of the 28th ACM SIGKDD conference on knowledge discovery and data mining, pp 4433\u20134442","DOI":"10.1145\/3534678.3539151"},{"issue":"6","key":"6372_CR5","first-page":"1","volume":"20","author":"Y Chen","year":"2024","unstructured":"Chen Y, Zhou J, Peng Y (2024) Spirit: style-guided patch interaction for fashion image retrieval with text feedback. ACM Trans Multimed Comput Commun Appl 20(6):1\u201317","journal-title":"ACM Trans Multimed Comput Commun Appl"},{"key":"6372_CR6","doi-asserted-by":"crossref","unstructured":"Lang Y, He Y, Yang F, Dong J, Xue H (2020) Which is plagiarism: fashion image retrieval based on regional representation for design protection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 2595\u20132604","DOI":"10.1109\/CVPR42600.2020.00267"},{"key":"6372_CR7","first-page":"4705116","volume":"62","author":"F Wang","year":"2024","unstructured":"Wang F, Zhu X, Liu X, Zhang Y, Li Y (2024) Scene graph-aware hierarchical fusion network for remote sensing image retrieval with text feedback. IEEE Trans Geosci Remote Sens 62:4705116","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"6372_CR8","unstructured":"Liu D, Li H, Zhao Z, Su F, Meng H (2023) Word for person: zero-shot composed person retrieval. arXiv:2311.16515"},{"key":"6372_CR9","doi-asserted-by":"crossref","unstructured":"Zhang D, Wu X-J, Liu Z, Yu J, Kittler J (2020) Fast discrete cross-modal hashing based on label relaxation and matrix factorization. In: Proceedings of the IEEE international conference on pattern recognition, pp 4845\u20134850","DOI":"10.1109\/ICPR48806.2021.9412497"},{"key":"6372_CR10","doi-asserted-by":"crossref","first-page":"19","DOI":"10.1016\/j.patrec.2021.07.018","volume":"151","author":"D Zhang","year":"2021","unstructured":"Zhang D, Wu X-J, Yin H-F, Kittler J (2021) Moon: multi-hash codes joint learning for cross-media retrieval. Pattern Recogn Lett 151:19\u201325","journal-title":"Pattern Recogn Lett"},{"key":"6372_CR11","doi-asserted-by":"crossref","unstructured":"Zhang D, Wu X-J, Yu J (2021) Discrete bidirectional matrix factorization hashing for zero-shot cross-media retrieval. In: Proceedings of the 4th chinese conference on pattern recognition and computer vision, pp 524\u2013536","DOI":"10.1007\/978-3-030-88007-1_43"},{"key":"6372_CR12","doi-asserted-by":"crossref","first-page":"283","DOI":"10.1007\/s10044-020-00893-6","volume":"24","author":"D Zhang","year":"2021","unstructured":"Zhang D, Wu X-J, Yu J (2021) Learning latent hash codes with discriminative structure preserving for cross-modal retrieval. Pattern Anal Appl 24:283\u2013297","journal-title":"Pattern Anal Appl"},{"key":"6372_CR13","doi-asserted-by":"crossref","first-page":"2467","DOI":"10.1016\/j.neucom.2017.11.022","volume":"275","author":"M Tzelepi","year":"2018","unstructured":"Tzelepi M, Tefas A (2018) Deep convolutional learning for content based image retrieval. Neurocomputing 275:2467\u20132478","journal-title":"Neurocomputing"},{"issue":"4","key":"6372_CR14","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3451390","volume":"17","author":"N Messina","year":"2021","unstructured":"Messina N, Amato G, Esuli A, Falchi F, Gennaro C, Marchand-Maillet S (2021) Fine-grained visual textual alignment for cross-modal retrieval using transformer encoders. ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM) 17(4):1\u201323","journal-title":"ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM)"},{"issue":"12","key":"6372_CR15","doi-asserted-by":"crossref","first-page":"17973","DOI":"10.1109\/TNNLS.2023.3310118","volume":"35","author":"S Yan","year":"2023","unstructured":"Yan S, Tang H, Zhang L, Tang J (2023) Image-specific information suppression and implicit local alignment for text-based person search. IEEE Trans Neural Netw Learn Syst 35(12):17973\u201317986","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"6372_CR16","doi-asserted-by":"crossref","unstructured":"Yang S, Zhou Y, Zheng Z, Wang Y, Zhu L, Wu Y (2023) Towards unified text-based person retrieval: a large-scale multi-attribute and language search benchmark. In: Proceedings of the 31st ACM international conference on multimedia, pp 4492\u20134501","DOI":"10.1145\/3581783.3611709"},{"issue":"8","key":"6372_CR17","doi-asserted-by":"crossref","first-page":"4257","DOI":"10.1109\/TCSVT.2023.3243725","volume":"33","author":"J Zhang","year":"2023","unstructured":"Zhang J, Xie Y, Ding W, Wang Z (2023) Cross on cross attention: Deep fusion transformer for image captioning. IEEE Trans Circuits Syst Video Technol 33(8):4257\u20134268","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"6372_CR18","doi-asserted-by":"crossref","unstructured":"Hu X, Gan Z, Wang J, Yang Z, Liu Z, Lu Y, Wang L (2022) Scaling up vision-language pre-training for image captioning. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 17980\u201317989","DOI":"10.1109\/CVPR52688.2022.01745"},{"issue":"5","key":"6372_CR19","doi-asserted-by":"crossref","first-page":"1733","DOI":"10.1109\/TPAMI.2019.2955476","volume":"43","author":"Z Zhang","year":"2019","unstructured":"Zhang Z, Chen P, Shi X, Yang L (2019) Text-guided neural network training for image recognition in natural scenes and medicine. IEEE Trans Pattern Anal Mach Intell 43(5):1733\u20131745","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"6372_CR20","doi-asserted-by":"crossref","first-page":"107563","DOI":"10.1016\/j.patcog.2020.107563","volume":"108","author":"J Yu","year":"2020","unstructured":"Yu J, Zhu Z, Wang Y, Zhang W, Hu Y, Tan J (2020) Cross-modal knowledge reasoning for knowledge-based visual question answering. Pattern Recogn 108:107563","journal-title":"Pattern Recogn"},{"key":"6372_CR21","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. In: Proceedings of 3rd international conference on learning representations"},{"key":"6372_CR22","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"6372_CR23","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser L, Polosukhin I (2017) Attention is all you need. In: Proceedings of advances in neural information processing systems, pp 5999\u20136009"},{"key":"6372_CR24","unstructured":"Devlin J, Chang M-W, Lee K, Toutanova K (2019) Bert: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the conference on the north american chapter of the association for computational linguistics: human language technologies, pp 4171\u20134186"},{"key":"6372_CR25","doi-asserted-by":"crossref","unstructured":"Liu Z, Rodriguez-Opazo C, Teney D, Gould S (2021) Image retrieval on real-life images with pre-trained vision-and-language models. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 2125\u20132134","DOI":"10.1109\/ICCV48922.2021.00213"},{"issue":"6","key":"6372_CR26","doi-asserted-by":"crossref","first-page":"7270","DOI":"10.1109\/TPAMI.2022.3218591","volume":"45","author":"W Chen","year":"2022","unstructured":"Chen W, Liu Y, Wang W, Bakker EM, Georgiou T, Fieguth P, Liu L, Lew MS (2022) Deep learning for instance retrieval: a survey. IEEE Trans Pattern Anal Mach Intell 45(6):7270\u20137292","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"6372_CR27","doi-asserted-by":"crossref","first-page":"95410","DOI":"10.1109\/ACCESS.2023.3308911","volume":"11","author":"D Srivastava","year":"2023","unstructured":"Srivastava D, Singh SS, Rajitha B, Verma M, Kaur A, Lee H-N (2023) Content-based image retrieval: a survey on local and global features selection, extraction, epresentation and evaluation parameters. IEEE Access 11:95410\u201395431","journal-title":"IEEE Access"},{"key":"6372_CR28","doi-asserted-by":"crossref","unstructured":"Lande MV, Ridhorkar S (2021) A comprehensive survey on content-based image retrieval using machine learning. In: Proceedings of data analytics and management, pp 165\u2013179","DOI":"10.1007\/978-981-16-6285-0_14"},{"issue":"19","key":"6372_CR29","doi-asserted-by":"crossref","first-page":"29561","DOI":"10.1007\/s11042-021-11045-1","volume":"80","author":"R Kapoor","year":"2021","unstructured":"Kapoor R, Sharma D, Gulati T (2021) State of the art content based image retrieval techniques using deep learning: a survey. Multimed Tool Appl 80(19):29561\u201329583","journal-title":"Multimed Tool Appl"},{"issue":"5","key":"6372_CR30","doi-asserted-by":"crossref","first-page":"2687","DOI":"10.1109\/TCSVT.2021.3080920","volume":"32","author":"SR Dubey","year":"2021","unstructured":"Dubey SR (2021) A decade survey of content based image retrieval using deep learning. IEEE Trans Circuits Syst Video Technol 32(5):2687\u20132704","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"6372_CR31","doi-asserted-by":"crossref","unstructured":"Zhang M, Cai Y, Zhang Y, Li X, Fan Y (2021) A survey on content-based encrypted image retrieval in cloud computing. In: Proceedings of the international conference on big data and security, pp 312\u2013325","DOI":"10.1007\/978-981-19-0852-1_24"},{"key":"6372_CR32","doi-asserted-by":"crossref","first-page":"94","DOI":"10.1016\/j.inffus.2020.10.008","volume":"67","author":"Y Li","year":"2021","unstructured":"Li Y, Ma J, Zhang Y (2021) Image retrieval from remote sensing big data: a survey. Inform Fusion 67:94\u2013115","journal-title":"Inform Fusion"},{"key":"6372_CR33","unstructured":"Wang K, Yin Q, Wang W, Wu S, Wang L (2016) A comprehensive survey on cross-modal retrieval. arXiv:1607.06215"},{"key":"6372_CR34","doi-asserted-by":"crossref","unstructured":"Cao M, Li S, Li J, Nie L, Zhang M (2022) Image-text retrieval: A survey on recent research and development. In: Proceedings of the IJCAI international joint conference on artificial intelligence, pp 5410\u20135417","DOI":"10.24963\/ijcai.2022\/759"},{"issue":"7","key":"6372_CR35","doi-asserted-by":"crossref","first-page":"5261","DOI":"10.1007\/s10462-020-09820-x","volume":"53","author":"J Rodrigues","year":"2020","unstructured":"Rodrigues J, Cristo M, Colonna JG (2020) Deep hashing for multi-label image retrieval: A survey. Artif Intell Rev 53(7):5261\u20135307","journal-title":"Artif Intell Rev"},{"issue":"3","key":"6372_CR36","doi-asserted-by":"crossref","first-page":"199","DOI":"10.1007\/s13735-022-00244-7","volume":"11","author":"X Zhou","year":"2022","unstructured":"Zhou X, Han X, Li H, Wang J, Liang X (2022) Cross-domain image retrieval: methods and applications. Int J Multimed Inform Retrieval 11(3):199\u2013218","journal-title":"Int J Multimed Inform Retrieval"},{"key":"6372_CR37","doi-asserted-by":"crossref","first-page":"185","DOI":"10.1007\/s11263-015-0814-0","volume":"115","author":"A Kovashka","year":"2015","unstructured":"Kovashka A, Parikh D, Grauman K (2015) Whittlesearch: interactive image search with relative attribute feedback. Int J Comput Vision 115:185\u2013210","journal-title":"Int J Comput Vision"},{"key":"6372_CR38","doi-asserted-by":"crossref","unstructured":"Bhattacharya G, Kilari N, Gubbi J, Pal A et al (2022) Datrnet: disentangling fashion attribute embedding for substitute item retrieval. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 2283\u20132287","DOI":"10.1109\/CVPRW56347.2022.00253"},{"key":"6372_CR39","doi-asserted-by":"crossref","unstructured":"Chen Y, Bazzani L (2020) Learning joint visual semantic matching embeddings for language-guided retrieval. In: Proceedings of the 16th European conference on computer vision, pp 136\u2013152","DOI":"10.1007\/978-3-030-58542-6_9"},{"key":"6372_CR40","doi-asserted-by":"crossref","first-page":"103204","DOI":"10.1016\/j.cviu.2021.103204","volume":"207","author":"N Murrugarra-Llerena","year":"2021","unstructured":"Murrugarra-Llerena N, Kovashka A (2021) Image retrieval with mixed initiative and multimodal feedback. Comput Vis Image Underst 207:103204","journal-title":"Comput Vis Image Underst"},{"key":"6372_CR41","doi-asserted-by":"crossref","unstructured":"Changpinyo S, Pont-Tuset J, Ferrari V, Soricut R (2021) Telling the what while pointing to the where: multimodal queries for image retrieval. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 12136\u201312146","DOI":"10.1109\/ICCV48922.2021.01192"},{"key":"6372_CR42","doi-asserted-by":"crossref","unstructured":"Changpinyo S, Pont-Tuset J, Ferrari V, Soricut R (2021) Telling the what while pointing to the where: Multimodal queries for image retrieval. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 12136\u201312146","DOI":"10.1109\/ICCV48922.2021.01192"},{"issue":"6","key":"6372_CR43","doi-asserted-by":"crossref","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2017","unstructured":"Ren S, He K, Girshick R, Sun J (2017) Faster r-cnn: towards real-time object detection with region proposal networks. IEEE Trans Pattern Anal Mach Intell 39(6):1137\u20131149","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"6372_CR44","doi-asserted-by":"crossref","unstructured":"Hosseinzadeh M, Wang Y (2020) Composed query image retrieval using locally bounded features. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 3596\u20133605","DOI":"10.1109\/CVPR42600.2020.00365"},{"key":"6372_CR45","doi-asserted-by":"crossref","unstructured":"Xu Y, Bin Y, Wang G, Yang Y (2021) Hierarchical composition learning for composed query image retrieval. In: Proceedings of the 3rd ACM international conference on multimedia in Asia, pp 1\u20137","DOI":"10.1145\/3469877.3490601"},{"key":"6372_CR46","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, Li L-J, Li K, Fei-Fei L (2009) Imagenet: a large-scale hierarchical image database. In: Proceedings of IEEE conference on computer vision and pattern recognition, pp 248\u2013255","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"6372_CR47","doi-asserted-by":"crossref","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P, Reed S, Anguelov D, Erhan D, Vanhoucke V, Rabinovich A (2015) Going deeper with convolutions. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1\u20139","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"6372_CR48","doi-asserted-by":"crossref","unstructured":"Fu Z, Chen X, Dong J, Ji S (2021) Multi-order adversarial representation learning for composed query image retrieval. In: Proceedings of the IEEE international conference on acoustics, speech and signal processing, pp 1685\u20131689","DOI":"10.1109\/ICASSP39728.2021.9414436"},{"key":"6372_CR49","doi-asserted-by":"crossref","unstructured":"Chen Y, Gong S, Bazzani L (2020) Image search with text feedback by visiolinguistic attention learning. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 3001\u20133011","DOI":"10.1109\/CVPR42600.2020.00307"},{"key":"6372_CR50","unstructured":"Dodds E, Culpepper J, Herdade S, Zhang Y, Boakye K (2020) Modality-agnostic attention fusion for visual search with text feedback. arXiv:2007.00145"},{"issue":"4","key":"6372_CR51","doi-asserted-by":"crossref","first-page":"2959","DOI":"10.1109\/TCSVT.2023.3306738","volume":"34","author":"S Li","year":"2024","unstructured":"Li S, Xu X, Jiang X, Shen F, Liu X, Shen HT (2024) Multi-grained attention network with mutual exclusion for composed query-based image retrieval. IEEE Trans Circuits Syst Video Technol 34(4):2959\u20132972","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"6372_CR52","doi-asserted-by":"crossref","unstructured":"Baldrati A, Bertini M, Uricchio T, Del Bimbo A (2021) Conditioned image retrieval for fashion using contrastive learning and clip-based features. In: Proceedings of the 3rd ACM international conference on multimedia in Asia, pp 1\u20135","DOI":"10.1145\/3469877.3493593"},{"key":"6372_CR53","doi-asserted-by":"crossref","unstructured":"Lin H, Wen H, Chen X, Song X (2023) Clip-based composed image retrieval with comprehensive fusion and data augmentation. In: Proceedings of the Australasian joint conference on artificial intelligence, pp 190\u2013202","DOI":"10.1007\/978-981-99-8388-9_16"},{"key":"6372_CR54","unstructured":"Radford A, Kim JW, Hallacy C, Ramesh A, Goh G, garwal, S, Sastry, G, Askell, A, Mishkin, P, Clark, J, et al (2021) Learning transferable visual models from natural language supervision. In: Proceedings of the international conference on machine learning, pp 8748\u20138763"},{"key":"6372_CR55","doi-asserted-by":"crossref","unstructured":"Gao D, Jin L, Chen B, Qiu M, Li P, Wei Y, Hu Y, Wang H (2020) Fashionbert: Text and image matching with adaptive loss for cross-modal retrieval. In: Proceedings of the 43rd International ACM SIGIR conference on research and development in information retrieval, pp 2251\u20132260","DOI":"10.1145\/3397271.3401430"},{"key":"6372_CR56","doi-asserted-by":"crossref","unstructured":"Zhuge M, Gao D, Fan D-P, Jin L, Chen B, Zhou H, Qiu M, Shao L (2021) Kaleido-bert: Vision-language pre-training on fashion domain. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 12647\u201312657","DOI":"10.1109\/CVPR46437.2021.01246"},{"key":"6372_CR57","unstructured":"Zhang W, Guo J, Li M, Shi H, Zhang S, Li J, Tang S, Zhuang Y (2022) Boss: Bottom-up cross-modal semantic composition with hybrid counterfactual training for robust content-based image retrieval. arXiv:2207.04211"},{"key":"6372_CR58","doi-asserted-by":"crossref","unstructured":"Goenka S, Zheng Z, Jaiswal A, Chada R, Wu Y, Hedau V, Natarajan P (2022) Fashionvlp: Vision language transformer for fashion retrieval with feedback. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 14105\u201314115","DOI":"10.1109\/CVPR52688.2022.01371"},{"key":"6372_CR59","doi-asserted-by":"crossref","first-page":"32","DOI":"10.1007\/s11263-016-0981-7","volume":"123","author":"R Krishna","year":"2017","unstructured":"Krishna R, Zhu Y, Groth O, Johnson J, Hata K, Kravitz J, Chen S, Kalantidis Y, Li L-J, Shamma DA et al (2017) Visual genome: connecting language and vision using crowdsourced dense image annotations. Int J Comput Vision 123:32\u201373","journal-title":"Int J Comput Vision"},{"key":"6372_CR60","doi-asserted-by":"crossref","first-page":"84613","DOI":"10.1109\/ACCESS.2019.2923552","volume":"7","author":"I Tautkute","year":"2019","unstructured":"Tautkute I, Trzci\u0144ski T, Skorupa AP, Brocki L, Marasek K (2019) Deepstyle: multimodal search engine for fashion and interior design. IEEE Access 7:84613\u201384628","journal-title":"IEEE Access"},{"key":"6372_CR61","doi-asserted-by":"crossref","unstructured":"Redmon J, Farhadi A (2017) Yolo9000: better, faster, stronger. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 7263\u20137271","DOI":"10.1109\/CVPR.2017.690"},{"key":"6372_CR62","doi-asserted-by":"crossref","unstructured":"Cui C, Wang W, Song X, Huang M, Xu X-S, Nie L (2019) User attention-guided multimodal dialog systems. In: Proceedings of the 42nd International ACM SIGIR conference on research and development in information retrieval, pp 445\u2013454","DOI":"10.1145\/3331184.3331226"},{"key":"6372_CR63","unstructured":"Xu X, Liu Y, Khan S, Khan F, Zuo W, Goh RSM, Feng C-M et al (2024) Sentence-level prompts benefit composed image retrieval. In: Proceedings of the 12th international conference on learning representations. https:\/\/openreview.net\/forum?id=m3ch3kJL7q"},{"key":"6372_CR64","unstructured":"Gu G, Chun S, Kim W, Jun H, Kang Y, Yun S (2023) Compodiff: Versatile composed image retrieval with latent diffusion. arXiv:2303.11916"},{"key":"6372_CR65","doi-asserted-by":"publisher","unstructured":"Ge H, Jiang Y, Sun J, Yuan K, Liu Y (2024) Llm-enhanced composed image retrieval: An intent uncertainty-aware linguistic-visual dual channel matching model. ACM Trans Inform Syst. https:\/\/doi.org\/10.1145\/3699715","DOI":"10.1145\/3699715"},{"key":"6372_CR66","doi-asserted-by":"crossref","unstructured":"Jang YK, Kim D, Meng Z, Huynh D, Lim S-N (2024) Visual delta generator with large multi-modal models for semi-supervised composed image retrieval. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 16805\u201316814","DOI":"10.1109\/CVPR52733.2024.01590"},{"key":"6372_CR67","unstructured":"Mikolov T (2013) Efficient estimation of word representations in vector space. arXiv:1301.3781"},{"key":"6372_CR68","doi-asserted-by":"crossref","unstructured":"Yuan Y, Lam W (2021) Conversational fashion image retrieval via multiturn natural language feedback. In: Proceedings of the 44th International ACM SIGIR conference on research and development in information retrieval, pp 839\u2013848","DOI":"10.1145\/3404835.3462881"},{"key":"6372_CR69","doi-asserted-by":"crossref","unstructured":"Kim J, Yu Y, Kim H, Kim G (2021) Dual compositional learning in interactive image retrieval. In: Proceedings of the 35th AAAI conference on artificial intelligence, pp 1771\u20131779","DOI":"10.1609\/aaai.v35i2.16271"},{"key":"6372_CR70","doi-asserted-by":"crossref","unstructured":"Naka R, Katsurai M, Yanagi K, Goto R (2022) Fashion style-aware embeddings for clothing image retrieval. In: Proceedings of the the international conference on multimedia retrieval, pp 49\u201353","DOI":"10.1145\/3512527.3531433"},{"key":"6372_CR71","doi-asserted-by":"crossref","first-page":"5976","DOI":"10.1109\/TIP.2022.3204213","volume":"31","author":"G Zhang","year":"2022","unstructured":"Zhang G, Wei S, Pang H, Qiu S, Zhao Y (2022) Composed image retrieval via explicit erasure and replenishment with semantic alignment. IEEE Trans Image Process 31:5976\u20135988","journal-title":"IEEE Trans Image Process"},{"issue":"6","key":"6372_CR72","first-page":"1","volume":"19","author":"H Zhu","year":"2023","unstructured":"Zhu H, Wei Y, Zhao Y, Zhang C, Huang S (2023) Amc: adaptive multi-expert collaborative network for text-guided image retrieval. ACM Trans Multimed Comput Commun Appl 19(6):1\u201322","journal-title":"ACM Trans Multimed Comput Commun Appl"},{"issue":"8","key":"6372_CR73","doi-asserted-by":"crossref","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neural Comput 9(8):1735\u20131780","journal-title":"Neural Comput"},{"key":"6372_CR74","doi-asserted-by":"crossref","unstructured":"Huang J, Feris RS, Chen Q, Yan S (2015) Cross-domain image retrieval with a dual attribute-aware ranking network. In: Proceedings of the IEEE international conference on computer vision, pp 1062\u20131070","DOI":"10.1109\/ICCV.2015.127"},{"issue":"2","key":"6372_CR75","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3478642","volume":"18","author":"F Zhang","year":"2022","unstructured":"Zhang F, Xu M, Xu C (2022) Tell, imagine, and search: end-to-end learning for composing text and image to image retrieval. ACM Trans Multimed Comput Commun Appl 18(2):1\u201323","journal-title":"ACM Trans Multimed Comput Commun Appl"},{"key":"6372_CR76","doi-asserted-by":"crossref","unstructured":"Zhang F, Xu M, Mao Q, Xu C (2020) Joint attribute manipulation and modality alignment learning for composing text and image to image retrieval. In: Proceedings of the 28th ACM international conference on multimedia, pp 3367\u20133376","DOI":"10.1145\/3394171.3413917"},{"key":"6372_CR77","unstructured":"Chung J (2014) Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv:1412.3555"},{"key":"6372_CR78","doi-asserted-by":"crossref","unstructured":"Liu Y, Lu Y (2021) Multi-grained fusion for conditional image retrieval. In: Proceedings of the 27th international conference on multimedia modeling, pp 315\u2013327","DOI":"10.1007\/978-3-030-67832-6_26"},{"key":"6372_CR79","unstructured":"Wu Y (2016) Google\u2019s neural machine translation system: Bridging the gap between human and machine translation. arXiv:1609.08144"},{"key":"6372_CR80","unstructured":"G\u00fcnel M, Erdem E, Erdem A (2018) Language guided fashion image manipulation with feature-wise transformations. arXiv:1808.04000"},{"key":"6372_CR81","unstructured":"Zhao S, Xu H (2024) Neucore: Neural concept reasoning for composed image retrieval. In: Proceedings of the 1st workshop on unifying representations in neural models, pp 47\u201359"},{"key":"6372_CR82","doi-asserted-by":"crossref","unstructured":"Zhang G, Wei S, Pang H, Zhao Y (2021) Heterogeneous feature fusion and cross-modal alignment for composed image retrieval. In: Proceedings of the 29th ACM international conference on multimedia, pp 5353\u20135362","DOI":"10.1145\/3474085.3475659"},{"key":"6372_CR83","doi-asserted-by":"crossref","unstructured":"Laenen K, Zoghbi S, Moens M-F (2018) Web search of fashion items with multimodal querying. In: Proceedings of the 11th ACM international conference on web search and data mining, pp 342\u2013350","DOI":"10.1145\/3159652.3159716"},{"key":"6372_CR84","doi-asserted-by":"crossref","unstructured":"Yang Y, Wang M, Zhou W, Li H (2021) Cross-modal joint prediction and alignment for composed query image retrieval. In: Proceedings of the 29th ACM international conference on multimedia, pp 3303\u20133311","DOI":"10.1145\/3474085.3475483"},{"key":"6372_CR85","doi-asserted-by":"crossref","first-page":"4543","DOI":"10.1109\/TIP.2023.3299791","volume":"32","author":"Q Yang","year":"2023","unstructured":"Yang Q, Ye M, Cai Z, Su K, Du B (2023) Composed image retrieval via cross relation network with hierarchical aggregation transformer. IEEE Trans Image Process 32:4543\u20134554","journal-title":"IEEE Trans Image Process"},{"key":"6372_CR86","doi-asserted-by":"crossref","unstructured":"Hu Z, Zhu X, Tran S, Vidal R, Dhua A (2023) Provla: compositional image search with progressive vision-language alignment and multimodal fusion. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 2772\u20132777","DOI":"10.1109\/ICCVW60793.2023.00293"},{"key":"6372_CR87","unstructured":"Jiang X, Wang Y, Wu Y, Wang M, Qian X (2023) Dual relation alignment for composed image retrieval. arXiv:2309.02169"},{"key":"6372_CR88","doi-asserted-by":"crossref","first-page":"166","DOI":"10.1016\/j.neucom.2022.01.078","volume":"496","author":"C Gu","year":"2022","unstructured":"Gu C, Bu J, Zhou X, Yao C, Ma D, Yu Z, Yan X (2022) Cross-modal image retrieval with deep mutual information maximization. Neurocomputing 496:166\u2013177","journal-title":"Neurocomputing"},{"key":"6372_CR89","unstructured":"Hjelm RD, Fedorov A, Lavoie-Marchildon S, Grewal K, Bachman P, Trischler A, Bengio Y (2018) Learning deep representations by mutual information estimation and maximization. arXiv:1808.06670"},{"key":"6372_CR90","doi-asserted-by":"crossref","first-page":"8346","DOI":"10.1109\/TMM.2023.3235495","volume":"25","author":"Y Xu","year":"2023","unstructured":"Xu Y, Bin Y, Wei J, Yang Y, Wang G, Shen HT (2023) Multi-modal transformer with global-local alignment for composed query image retrieval. IEEE Trans Multimedia 25:8346\u20138357","journal-title":"IEEE Trans Multimedia"},{"key":"6372_CR91","doi-asserted-by":"crossref","unstructured":"Wang C, Nezhadarya E, Sadhu T, Zhang S (2022) Exploring compositional image retrieval with hybrid compositional learning and heuristic negative mining. In: Findings of the Association for Computational Linguistics: EMNLP, pp 1273\u20131285","DOI":"10.18653\/v1\/2022.findings-emnlp.92"},{"key":"6372_CR92","unstructured":"Yu Y, Lee S, Choi Y, Kim G (2020) Curlingnet: Compositional learning between images and text for fashion iq data. arXiv:2003.12299"},{"issue":"6","key":"6372_CR93","first-page":"1","volume":"20","author":"S Li","year":"2024","unstructured":"Li S, Xu X, Jiang X, Shen F, Sun Z, Cichocki A (2024) Cross-modal attention preservation with self-contrastive learning for composed query-based image retrieval. ACM Trans Multimed Comput Commun Appl 20(6):1\u201322","journal-title":"ACM Trans Multimed Comput Commun Appl"},{"key":"6372_CR94","doi-asserted-by":"crossref","unstructured":"Sarker MI, Milanova M (2022) Deep learning-based multimodal image retrieval combining image and text. In: Proceedings of the international conference on computational science and computational intelligence, pp 1543\u20131546","DOI":"10.1109\/CSCI58124.2022.00274"},{"key":"6372_CR95","unstructured":"Yang X, Liu D, Zhang H, Luo Y, Wang C, Zhang J (2023) Decompose semantic shifts for composed image retrieval. arXiv:2309.09531"},{"key":"6372_CR96","doi-asserted-by":"crossref","unstructured":"Anwaar MU, Labintcev E, Kleinsteuber M (2021) Compositional learning of image-text query for image retrieval. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision, pp 1140\u20131149","DOI":"10.1109\/WACV48630.2021.00118"},{"key":"6372_CR97","doi-asserted-by":"crossref","unstructured":"Hou Y, Vig E, Donoser M, Bazzani L (2021) Learning attribute-driven disentangled representations for interactive fashion retrieval. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 12147\u201312157","DOI":"10.1109\/ICCV48922.2021.01193"},{"key":"6372_CR98","doi-asserted-by":"crossref","unstructured":"Jandial S, Badjatiya P, Chawla P, Chopra A, Sarkar M, Krishnamurthy B (2022) Sac: Semantic attention composition for text-conditioned image retrieval. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision, pp 4021\u20134030","DOI":"10.1109\/WACV51458.2022.00067"},{"key":"6372_CR99","doi-asserted-by":"crossref","unstructured":"Gu C, Bu J, Zhang Z, Yu Z, Ma D, Wang W (2021) Image search with text feedback by deep hierarchical attention mutual information maximization. In: Proceedings of the 29th ACM international conference on multimedia, pp 4600\u20134609","DOI":"10.1145\/3474085.3475619"},{"key":"6372_CR100","unstructured":"Zhang X, Zheng Z, Wang X, Yang Y (2023) Relieving triplet ambiguity: consensus network for language-guided image retrieval. arXiv:2306.02092"},{"key":"6372_CR101","doi-asserted-by":"crossref","unstructured":"Mao X, Chen Y, Li Y, Xiong T, He Y, Xue H (2019) Bilinear representation for language-based image editing using conditional generative adversarial networks. In: Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp 2047\u20132051","DOI":"10.1109\/ICASSP.2019.8683008"},{"key":"6372_CR102","doi-asserted-by":"crossref","unstructured":"Li S (2023) Dual-path semantic construction network for composed query-based image retrieval. In: Proceedings of the 2023 ACM international conference on multimedia retrieval, pp 636\u2013639","DOI":"10.1145\/3591106.3592245"},{"key":"6372_CR103","unstructured":"Yu Y, Lee S, Choi Y, Kim G (2020) Curlingnet: Compositional learning between images and text for fashion iq data. arXiv:2003.12299"},{"key":"6372_CR104","unstructured":"Chen Z, Deng Y, Wu Y, Gu Q, Li Y (2022) Towards understanding the mixture-of-experts layer in deep learning. In: Proceedings of advances in neural information processing systems, vol 35, pp 23049\u201323062"},{"key":"6372_CR105","doi-asserted-by":"crossref","unstructured":"Wu Y, Li H, Wang F, Zhang Y, Liang R (2024) Self-distilled dynamic fusion network for language-based fashion retrieval. In: Proceedings of the IEEE international conference on acoustics, speech and signal processing, pp 3260\u20133264","DOI":"10.1109\/ICASSP48485.2024.10445903"},{"key":"6372_CR106","doi-asserted-by":"crossref","unstructured":"Huang F, Zhang L, Fu X, Song S (2024) Dynamic weighted combiner for mixed-modal image retrieval. In: Proceedings of the AAAI conference on artificial intelligence, vol 38, pp 2303\u20132311","DOI":"10.1609\/aaai.v38i3.28004"},{"key":"6372_CR107","unstructured":"Hinton G, Vinyals O, Dean J (2015) Distilling the knowledge in a neural network. arXiv:1503.02531"},{"key":"6372_CR108","doi-asserted-by":"crossref","unstructured":"Wen H, Zhang X, Song X, Wei Y, Nie L (2023) Target-guided composed image retrieval. In: Proceedings of the 31st ACM international conference on multimedia, pp 915\u2013923","DOI":"10.1145\/3581783.3611817"},{"key":"6372_CR109","doi-asserted-by":"crossref","first-page":"6446","DOI":"10.1109\/TMM.2022.3208742","volume":"25","author":"H Pang","year":"2022","unstructured":"Pang H, Wei S, Zhang G, Zhang S, Qiu S, Zhao Y (2022) Heterogeneous feature alignment and fusion in cross-modal augmented space for composed image retrieval. IEEE Trans Multimedia 25:6446\u20136457","journal-title":"IEEE Trans Multimedia"},{"key":"6372_CR110","doi-asserted-by":"crossref","unstructured":"Huang F, Zhang L (2023) Language guided local infiltration for interactive image retrieval. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 6104\u20136113","DOI":"10.1109\/CVPRW59228.2023.00650"},{"key":"6372_CR111","doi-asserted-by":"crossref","unstructured":"Zhuge M, Gao D, Fan D, Jin L, Chen B, Zhou H, Qiu M, Shao L (2021) Kaleido-bert: Vision-language pre-training on fashion domain. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 12647\u201312657","DOI":"10.1109\/CVPR46437.2021.01246"},{"key":"6372_CR112","unstructured":"Udhayanan P, Karanam S, Srinivasan BV (2023) Learning with multi-modal gradient attention for explainable composed image retrieval. arXiv:2308.16649"},{"key":"6372_CR113","doi-asserted-by":"crossref","first-page":"916","DOI":"10.1109\/TMM.2023.3273466","volume":"26","author":"G Zhang","year":"2023","unstructured":"Zhang G, Wei S, Pang H, Qiu S, Zhao Y (2023) Enhance composed image retrieval via multi-level collaborative localization and semantic activeness rerception. IEEE Trans Multimedia 26:916\u2013928","journal-title":"IEEE Trans Multimedia"},{"key":"6372_CR114","doi-asserted-by":"crossref","unstructured":"Wen H, Song X, Yang X, Zhan Y, Nie L (2021) Comprehensive linguistic-visual composition network for image retrieval. In: Proceedings of the 44th International ACM SIGIR conference on research and development in nformation retrieval, pp 1369\u20131378","DOI":"10.1145\/3404835.3462967"},{"issue":"10","key":"6372_CR115","doi-asserted-by":"crossref","first-page":"14247","DOI":"10.1109\/TNNLS.2023.3276796","volume":"35","author":"Y Liu","year":"2023","unstructured":"Liu Y, Liu H, Wang H, Meng F, Liu M (2023) Bcan: bidirectional correct attention network for cross-modal retrieval. IEEE Trans Neural Netw Learn Syst 35(10):14247\u201314258","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"issue":"1","key":"6372_CR116","doi-asserted-by":"crossref","first-page":"586","DOI":"10.1007\/s10489-022-03559-4","volume":"53","author":"Z Guo","year":"2023","unstructured":"Guo Z, Han D (2023) Sparse co-attention visual question answering networks based on thresholds. Appl Intell 53(1):586\u2013600","journal-title":"Appl Intell"},{"key":"6372_CR117","doi-asserted-by":"crossref","unstructured":"Tian Y, Newsam S, Boakye K (2023) Fashion image retrieval with text feedback by additive attention compositional learning. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision, pp 1011\u20131021","DOI":"10.1109\/WACV56688.2023.00107"},{"issue":"5","key":"6372_CR118","doi-asserted-by":"crossref","first-page":"3665","DOI":"10.1109\/TPAMI.2023.3346434","volume":"46","author":"H Wen","year":"2023","unstructured":"Wen H, Song X, Yin J, Wu J, Guan W, Nie L (2023) Self-training boosted multi-factor matching network for composed image retrieval. IEEE Trans Pattern Anal Mach Intell 46(5):3665\u20133678","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"6372_CR119","doi-asserted-by":"crossref","first-page":"1000","DOI":"10.1109\/TIP.2021.3138302","volume":"31","author":"F Zhang","year":"2021","unstructured":"Zhang F, Xu M, Xu C (2021) Geometry sensitive cross-modal reasoning for composed query based image retrieval. IEEE Trans Image Process 31:1000\u20131011","journal-title":"IEEE Trans Image Process"},{"key":"6372_CR120","doi-asserted-by":"crossref","unstructured":"Han X, Zhu X, Yu L, Zhang L, Song Y-Z, Xiang T (2023) Fame-vil: Multi-tasking vision-language model for heterogeneous fashion tasks. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 2669\u20132680","DOI":"10.1109\/CVPR52729.2023.00262"},{"key":"6372_CR121","doi-asserted-by":"crossref","unstructured":"Mirchandani S, Yu L, Wang M, Sinha A, Jiang W, Xiang T, Zhang N (2022) Fad-vlp: Fashion vision-and-language pre-training towards unified retrieval and captioning. In: Proceedings of the conference on empirical methods in natural language processing, pp 10484\u201310497","DOI":"10.18653\/v1\/2022.emnlp-main.716"},{"key":"6372_CR122","unstructured":"Liu Z, Sun W, Teney D, Gould S (2023) Candidate set re-ranking for composed image retrieval with dual multi-modal encoder. arXiv:2305.16304"},{"key":"6372_CR123","unstructured":"Kipf TN, Welling M (2016) Semi-supervised classification with graph convolutional networks. arXiv:1609.02907"},{"key":"6372_CR124","unstructured":"Veli\u010dkovi\u0107 P, Cucurull G, Casanova A, Romero A, Lio P, Bengio Y (2017) Graph attention networks. arXiv:1710.10903"},{"key":"6372_CR125","unstructured":"Shin M, Cho Y, Ko B, Gu G (2021) Rtic: Residual learning for text and image composition using graph convolutional network. arXiv:2104.03015"},{"key":"6372_CR126","doi-asserted-by":"crossref","first-page":"216","DOI":"10.1016\/j.cag.2023.07.025","volume":"115","author":"H Ding","year":"2023","unstructured":"Ding H, Wang S, Xie Z, Li M, Ma L (2023) A fine-grained vision and language representation framework with graph-based fashion semantic knowledge. Comput Graph 115:216\u2013225","journal-title":"Comput Graph"},{"key":"6372_CR127","doi-asserted-by":"crossref","first-page":"7732","DOI":"10.1109\/TIP.2021.3108724","volume":"30","author":"L Nie","year":"2021","unstructured":"Nie L, Jiao F, Wang W, Wang Y, Tian Q (2021) Conversational image search. IEEE Trans Image Process 30:7732\u20137743","journal-title":"IEEE Trans Image Process"},{"key":"6372_CR128","unstructured":"Goodfellow I, Pouget-Abadie J, Mirza M, Xu B, Warde-Farley D, Ozair S, Courville A, Bengio Y (2014) Generative adversarial nets. In: Proceedings of advances in neural information processing systems, vol 27, pp 2672\u20132680"},{"key":"6372_CR129","unstructured":"Guo X, Wu H, Cheng Y, Rennie S, Tesauro G, Feris O (2018) Dialog-based interactive image retrieval. In: Proceedings of advances in neural information processing systems, vol 31, pp 678\u2013688"},{"key":"6372_CR130","doi-asserted-by":"crossref","unstructured":"Ak KE, Lim JH, Tham JY, Kassim AA (2019) Attribute manipulation generative adversarial networks for fashion images. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 10541\u201310550","DOI":"10.1109\/ICCV.2019.01064"},{"key":"6372_CR131","doi-asserted-by":"crossref","unstructured":"Yang X, Song X, Han X, Wen H, Nie J, Nie L (2020) Generative attribute manipulation scheme for flexible fashion search. In: Proceedings of the 43rd international acm sigir conference on research and development in information retrieval, pp 941\u2013950","DOI":"10.1145\/3397271.3401150"},{"key":"6372_CR132","unstructured":"Shin M, Park S, Kim T (2019) Semi-supervised feature-level attribute manipulation for fashion image retrieval. arXiv:1907.05007"},{"key":"6372_CR133","unstructured":"Tautkute I, Trzcinski T (2021) I want this product but different: Multimodal retrieval with synthetic query expansion. arXiv:2102.08871"},{"key":"6372_CR134","doi-asserted-by":"crossref","unstructured":"Kenan EA, Sun Y, Lim JH (2020) Learning cross-modal representations for language-based image manipulation. In: Proceedings of the IEEE international conference on image processing, pp 1601\u20131605","DOI":"10.1109\/ICIP40778.2020.9191228"},{"key":"6372_CR135","doi-asserted-by":"crossref","unstructured":"Kong C, Jeon D, Kwon O, Kwak N (2023) Leveraging off-the-shelf diffusion model for multi-attribute fashion image manipulation. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision, pp 848\u2013857","DOI":"10.1109\/WACV56688.2023.00091"},{"issue":"12","key":"6372_CR136","doi-asserted-by":"crossref","first-page":"4217","DOI":"10.1109\/TPAMI.2020.2970919","volume":"43","author":"T Karras","year":"2021","unstructured":"Karras T, Laine S, Aila T (2021) A style-based generator architecture for generative adversarial networks. IEEE Trans Pattern Anal Mach Intell 43(12):4217\u20134228","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"6372_CR137","doi-asserted-by":"crossref","unstructured":"Zhu J-Y, Park T, Isola P, Efros AA (2017) Unpaired image-to-image translation using cycle-consistent adversarial networks. In: Proceedings of the IEEE international conference on computer vision, pp 2242\u20132251","DOI":"10.1109\/ICCV.2017.244"},{"key":"6372_CR138","first-page":"9694","volume":"34","author":"J Li","year":"2021","unstructured":"Li J, Selvaraju R, Gotmare A, Joty S, Xiong C, Hoi SCH (2021) Align before fuse: vision and language representation learning with momentum distillation. Proc Advan Neural Inform Process Syst 34:9694\u20139705","journal-title":"Proc Advan Neural Inform Process Syst"},{"key":"6372_CR139","doi-asserted-by":"crossref","unstructured":"Zhou L, Palangi H, Zhang L, Hu H, Corso J, Gao J (2020) Unified vision-language pre-training for image captioning and vqa. In: Proceedings of the AAAI conference on artificial intelligence, vol 34, pp 13041\u201313049","DOI":"10.1609\/aaai.v34i07.7005"},{"key":"6372_CR140","doi-asserted-by":"crossref","unstructured":"Hong W, Ji K, Liu J, Wang J, Chen J, Chu W (2021) Gilbert: generative vision-language pre-training for image-text retrieval. In: Proceedings of the 44th international ACM SIGIR conference on research and development in information retrieval, pp 1379\u20131388","DOI":"10.1145\/3404835.3462838"},{"key":"6372_CR141","doi-asserted-by":"crossref","unstructured":"Liu Z, Ma Y, Schubert M, Ouyang Y, Xiong Z (2022) Multi-modal contrastive pre-training for recommendation. In: Proceedings of the 2022 international conference on multimedia retrieval, pp 99\u2013108","DOI":"10.1145\/3512527.3531378"},{"key":"6372_CR142","unstructured":"Li LH, Yatskar M, Yin D, Hsieh C-J, Chang K-W (2019) Visualbert: a simple and performant baseline for vision and language. arXiv:1908.03557"},{"key":"6372_CR143","doi-asserted-by":"crossref","unstructured":"Chen Y-C, Li L, Yu L, El Kholy A, Ahmed F, Gan Z, Cheng Y, Liu J (2020) Uniter: universal image-text representation learning. In: Proceedings of the european conference on computer vision, vol 12375 LNCS, pp 104\u2013120","DOI":"10.1007\/978-3-030-58577-8_7"},{"key":"6372_CR144","unstructured":"Kim W, Son B, Kim I (2021) Vilt: vision-and-language transformer without convolution or region supervision. In: Proceedings of the international conference on machine learning, vol 139, pp 5583\u20135594"},{"key":"6372_CR145","unstructured":"Su W, Zhu X, Cao Y, Li B, Lu L, Wei F, Dai J (2019) Vl-bert: pre-training of generic visual-linguistic representations. arXiv:1908.08530"},{"key":"6372_CR146","doi-asserted-by":"crossref","unstructured":"Li X, Yin X, Li C, Zhang P, Hu X, Zhang L, Wang L, Hu H, Dong L, Wei F et al (2020) Oscar: Object-semantics aligned pre-training for vision-language tasks. In: Proceedings of the 16th european conference on computer vision, vol 12375 LNCS, pp 121\u2013137","DOI":"10.1007\/978-3-030-58577-8_8"},{"key":"6372_CR147","doi-asserted-by":"crossref","unstructured":"Han Y, Zhang L, Chen Q, Chen Z, Li Z, Yang J, Cao Z (2023) Fashionsap: Symbols and attributes prompt for fine-grained fashion vision-language pre-training. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, vol 2023-June, pp 15028\u201315038","DOI":"10.1109\/CVPR52729.2023.01443"},{"key":"6372_CR148","doi-asserted-by":"crossref","unstructured":"Han X, Yu L, Zhu X, Zhang L, Song Y-Z, Xiang T (2022) Fashionvil: fashion-focused vision-and-language representation learning. In: Proceedings of the 17th european conference on computer vision, pp 634\u2013651","DOI":"10.1007\/978-3-031-19833-5_37"},{"key":"6372_CR149","doi-asserted-by":"crossref","unstructured":"Sun S, Chen Y-C, Li L, Wang S, Fang Y, Liu J (2021) Lightningdot: pre-training visual-semantic embeddings for real-time image-text retrieval. In: Proceedings of the 2021 conference of the north american chapter of the association for computational linguistics: human language technologies, pp 982\u2013997","DOI":"10.18653\/v1\/2021.naacl-main.77"},{"key":"6372_CR150","doi-asserted-by":"crossref","unstructured":"Tan H, Bansal M (2019) Lxmert: learning cross-modality encoder representations from transformers. arXiv:1908.07490","DOI":"10.18653\/v1\/D19-1514"},{"key":"6372_CR151","unstructured":"Lu J, Batra D, Parikh D, Lee S (2019) Vilbert: pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. In: Proceedings of the advances in neural information processing systems, vol 32"},{"key":"6372_CR152","unstructured":"Jia C, Yang Y, Xia Y, Chen Y-T, Parekh Z, Pham H, Le Q, Sung Y-H, Li Z, Duerig T (2021) Scaling up visual and vision-language representation learning with noisy text supervision. In: Proceedings of the international conference on machine learning, pp 4904\u20134916"},{"key":"6372_CR153","doi-asserted-by":"crossref","unstructured":"Zhao Y, Song Y, Jin Q (2022) Progressive learning for image retrieval with hybrid-modality queries. In: Proceedings of the 45th international ACM SIGIR conference on research and development in information retrieval, pp 1012\u20131021","DOI":"10.1145\/3477495.3532047"},{"issue":"3","key":"6372_CR154","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3617597","volume":"20","author":"A Baldrati","year":"2023","unstructured":"Baldrati A, Bertini M, Uricchio T, Del Bimbo A (2023) Composed image retrieval using contrastive learning and task-oriented clip-based features. ACM Trans Multimed Comput Commun Appl 20(3):1\u201324","journal-title":"ACM Trans Multimed Comput Commun Appl"},{"key":"6372_CR155","doi-asserted-by":"crossref","unstructured":"Baldrati A, Bertini M, Uricchio T, Del Bimbo A (2022) Conditioned and composed image retrieval combining and partially fine-tuning clip-based features. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 4959\u20134968","DOI":"10.1109\/CVPRW56347.2022.00543"},{"key":"6372_CR156","doi-asserted-by":"crossref","unstructured":"Liu Z, Sun W, Hong Y, Teney D, Gould S (2024) Bi-directional training for composed image retrieval via text prompt learning. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision, pp 5753\u20135762","DOI":"10.1109\/WACV57701.2024.00565"},{"key":"6372_CR157","doi-asserted-by":"crossref","unstructured":"Zhao B, Feng J, Wu X, Yan S (2017) Memory-augmented attribute manipulation networks for interactive fashion search. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1520\u20131528","DOI":"10.1109\/CVPR.2017.652"},{"key":"6372_CR158","doi-asserted-by":"crossref","unstructured":"Zhang F, Yan M, Zhang J, Xu C (2022) Comprehensive relationship reasoning for composed query based image retrieval. In: Proceedings of the 30th ACM international conference on multimedia, pp 4655\u20134664","DOI":"10.1145\/3503161.3548126"},{"key":"6372_CR159","doi-asserted-by":"crossref","first-page":"1149","DOI":"10.1109\/TIP.2024.3359062","volume":"33","author":"G Zhang","year":"2024","unstructured":"Zhang G, Li S, Wei S, Ge S, Cai N, Zhao Y (2024) Multimodal composition example mining for composed query image retrieval. IEEE Trans Image Process 33:1149\u20131161","journal-title":"IEEE Trans Image Process"},{"key":"6372_CR160","unstructured":"Howard AG (2017) Mobilenets: efficient convolutional neural networks for mobile vision applications. arXiv:1704.04861"},{"key":"6372_CR161","doi-asserted-by":"crossref","unstructured":"Wu H, Gao Y, Guo X, Al-Halah Z, Rennie S, Grauman K, Feris R (2021) Fashion iq: a new dataset towards retrieving images by natural language feedback. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 11307\u201311317","DOI":"10.1109\/CVPR46437.2021.01115"},{"key":"6372_CR162","doi-asserted-by":"crossref","unstructured":"Han X, Wu Z, Huang P, Zhang X, Zhu M, Li Y, Zhao Y, Davis LS (2017) Automatic spatially-aware fashion concept discovery. In: Proceedings of the IEEE international conference on computer vision, pp 1463\u20131471","DOI":"10.1109\/ICCV.2017.163"},{"key":"6372_CR163","doi-asserted-by":"crossref","first-page":"7415","DOI":"10.1109\/TMM.2022.3222624","volume":"25","author":"F Huang","year":"2022","unstructured":"Huang F, Zhang L, Zhou Y, Gao X (2022) Adversarial and isotropic gradient augmentation for image retrieval with text feedback. IEEE Trans Multimedia 25:7415\u20137427","journal-title":"IEEE Trans Multimedia"},{"key":"6372_CR164","doi-asserted-by":"crossref","unstructured":"Neculai A, Chen Y, Akata Z (2022) Probabilistic compositional embeddings for multimodal image retrieval. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 4547\u20134557","DOI":"10.1109\/CVPRW56347.2022.00501"},{"key":"6372_CR165","doi-asserted-by":"crossref","unstructured":"Tian Y, Newsam S, Boakye K (2023) Fashion image retrieval with text feedback by additive attention compositional learning. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision, pp 1011\u20131021","DOI":"10.1109\/WACV56688.2023.00107"},{"key":"6372_CR166","unstructured":"Chen Y, Zheng Z, Ji W, Qu L, Chua T-S (2024) Composed image retrieval with text feedback via multi-grained uncertainty regularization. In: Proceedings of the international conference on learning representations. https:\/\/openreview.net\/forum?id=Yb5KvPkKQg"},{"key":"6372_CR167","doi-asserted-by":"crossref","unstructured":"Xu Y, Bin Y, Wei J, Yang Y, Wang G, Shen HT (2024) Align and retrieve: Composition and decomposition learning in image retrieval with text feedback. IEEE Trans Multimed:9936\u20139948","DOI":"10.1109\/TMM.2024.3417694"},{"key":"6372_CR168","doi-asserted-by":"crossref","unstructured":"Baldrati A, Bertini M, Uricchio T, Del Bimbo A (2022) Effective conditioned and composed image retrieval combining clip-based features. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 21466\u201321474","DOI":"10.1109\/CVPR52688.2022.02080"},{"key":"6372_CR169","unstructured":"Delmas G, Rezende RS, Csurka G, Larlus D (2022) Artemis: Attention-based retrieval with text-explicit matching and implicit similarity. In: Proceedings of the 10th international conference on learning representations. https:\/\/openreview.net\/forum?id=CVfLvQq9gLo"},{"key":"6372_CR170","unstructured":"Chen J, Lai H (2023) Ranking-aware uncertainty for text-guided image retrieval. arXiv:2308.08131"},{"key":"6372_CR171","unstructured":"Li J, Li D, Savarese S, Hoi S (2023) Blip-2: bootstrapping language-image pre-training with frozen image encoders and large language models. In: Proceedings of the international conference on machine learning, pp 19730\u201319742"},{"key":"6372_CR172","unstructured":"Achiam J, Adler S, Agarwal S, Ahmad L, Akkaya I, Aleman FL, Almeida D, Altenschmidt J, Altman S, Anadkat S et al (2023) Gpt-4 technical report. arXiv:2303.08774"},{"key":"6372_CR173","unstructured":"Touvron H, Lavril T, Izacard G, Martinet X, Lachaux M-A, Lacroix T, Rozi\u00e8re B, Goyal N, Hambro E, Azhar F et al (2023) Llama: open and efficient foundation language models. arXiv:2302.13971"},{"key":"6372_CR174","unstructured":"Feng C-M, Bai Y, Luo T, Li Z, Khan S, Zuo W, Xu X, Goh RSM, Liu Y (2023) Vqa4cir: boosting composed image retrieval with visual question answering. arXiv:2312.12273"},{"key":"6372_CR175","unstructured":"Liu H, Li C, Wu Q, Lee YJ (2024) Visual instruction tuning. In: Proceedings of the advances in neural information processing systems, vol 36, pp 34892\u201334916"},{"key":"6372_CR176","unstructured":"Karthik S, Roth K, Mancini M, Akata Z (2023) Vision-by-language for training-free compositional image retrieval. arXiv:2310.09291"},{"key":"6372_CR177","doi-asserted-by":"crossref","unstructured":"Li Y, Ma F, Yang Y (2024) Imagine and seek: improving composed image retrieval with an imagined proxy. arXiv:2411.16752","DOI":"10.1109\/CVPR52734.2025.00377"},{"key":"6372_CR178","doi-asserted-by":"crossref","unstructured":"Yang Z, Xue D, Qian S, Dong W, Xu C (2024) Ldre: Llm-based divergent reasoning and ensemble for zero-shot composed image retrieval. In: Proceedings of the 47th International ACM SIGIR conference on research and development in information retrieval, pp 80\u201390","DOI":"10.1145\/3626772.3657740"},{"key":"6372_CR179","doi-asserted-by":"crossref","unstructured":"Saito K, Sohn K, Zhang X, Li C-L, Lee C-Y, Saenko K, Pfister T (2023) Pic2word: mapping pictures to words for zero-shot composed image retrieval. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 19305\u201319314","DOI":"10.1109\/CVPR52729.2023.01850"},{"key":"6372_CR180","doi-asserted-by":"crossref","unstructured":"Baldrati A, Agnolucci L, Bertini M, Del Bimbo A (2023) Zero-shot composed image retrieval with textual inversion. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 15338\u201315347","DOI":"10.1109\/ICCV51070.2023.01407"},{"key":"6372_CR181","unstructured":"Chen J, Lai H (2023) Pretrain like you inference: masked tuning improves zero-shot composed image retrieval. arXiv:2311.07622"},{"key":"6372_CR182","unstructured":"Li J, Li D, Xiong C, Hoi S (2022) Blip: bootstrapping language-image pre-training for unified vision-language understanding and generation. In: Proceedings of the international conference on machine learning, pp 12888\u201312900"},{"key":"6372_CR183","doi-asserted-by":"crossref","unstructured":"Levy M, Ben-Ari R, Darshan N, Lischinski D (2024) Data roaming and quality assessment for composed image retrieval. In: Proceedings of the AAAI conference on artificial intelligence, vol 38, pp 2991\u20132999","DOI":"10.1609\/aaai.v38i4.28081"},{"key":"6372_CR184","doi-asserted-by":"crossref","unstructured":"Wen H, Song X, Chen X, Wei Y, Nie L, Chua T-S (2024) Simple but effective raw-data level multimodal fusion for composed image retrieval. In: Proceedings of the 47th International ACM SIGIR conference on research and development in information retrieval, pp 229\u2013239","DOI":"10.1145\/3626772.3657727"},{"key":"6372_CR185","doi-asserted-by":"crossref","unstructured":"Couairon G, Douze M, Cord M, Schwenk H (2022) Embedding arithmetic of multimodal queries for image retrieval. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition workshops, pp 4950\u20134958","DOI":"10.1109\/CVPRW56347.2022.00542"},{"key":"6372_CR186","doi-asserted-by":"crossref","unstructured":"Wei H, Wang S, Xue Z, Chen S, Huang Q (2023) Conversational composed retrieval with iterative sequence refinement. In: Proceedings of the 31st ACM international conference on multimedia, pp 6390\u20136399","DOI":"10.1145\/3581783.3611885"},{"key":"6372_CR187","unstructured":"Rostamzadeh N, Hosseini S, Boquet T, Stokowiec W, Zhang Y, Jauvin C, Pal C (2018) Fashion-gen: The generative fashion dataset and challenge. arXiv:1806.08317"},{"key":"6372_CR188","doi-asserted-by":"crossref","unstructured":"Yang X, Zhang H, Jin D, Liu Y, Wu C-H, Tan J, Xie D, Wang J, Wang X (2020) Fashion captioning: Towards generating accurate descriptions with semantic rewards. In: Proceedings of the 16th european conference on computer vision, pp 1\u201317","DOI":"10.1007\/978-3-030-58601-0_1"},{"issue":"1","key":"6372_CR189","doi-asserted-by":"crossref","first-page":"79","DOI":"10.1214\/aoms\/1177729694","volume":"22","author":"S Kullback","year":"1951","unstructured":"Kullback S, Leibler RA (1951) On information and sufficiency. Ann Math Stat 22(1):79\u201386","journal-title":"Ann Math Stat"},{"key":"6372_CR190","unstructured":"Oord Avd, Li Y, Vinyals O (2018) Representation learning with contrastive predictive coding. arXiv:1807.03748"},{"issue":"1","key":"6372_CR191","doi-asserted-by":"crossref","first-page":"20800","DOI":"10.1038\/s41598-022-25340-w","volume":"12","author":"Z Zhang","year":"2022","unstructured":"Zhang Z, Wang L, Cheng S (2022) Composed query image retrieval based on triangle area triple loss function and combining cnn with transformer. Sci Rep 12(1):20800","journal-title":"Sci Rep"},{"key":"6372_CR192","doi-asserted-by":"crossref","unstructured":"Schroff F, Kalenichenko D, Philbin J (2015) Facenet: a unified embedding for face recognition and clustering. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 815\u2013823","DOI":"10.1109\/CVPR.2015.7298682"},{"key":"6372_CR193","doi-asserted-by":"crossref","unstructured":"Liu Z, Luo P, Qiu S, Wang X, Tang X (2016) Deepfashion: Powering robust clothes recognition and retrieval with rich annotations. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 1096\u20131104","DOI":"10.1109\/CVPR.2016.124"},{"key":"6372_CR194","doi-asserted-by":"crossref","unstructured":"Huang J, Feris RS, Chen Q, Yan S (2015) Cross-domain image retrieval with a dual attribute-aware ranking network. In: Proceedings of the IEEE international conference on computer vision, pp 1062\u20131070","DOI":"10.1109\/ICCV.2015.127"},{"issue":"1","key":"6372_CR195","doi-asserted-by":"crossref","first-page":"31","DOI":"10.17706\/IJCEE.2016.8.1.31-43","volume":"8","author":"S Zoghbi","year":"2016","unstructured":"Zoghbi S, Heyman G, Gomez JC, Moens M-F (2016) Fashion meets computer vision and nlp at e-commerce search. Int J Comput Electr Eng 8(1):31","journal-title":"Int J Comput Electr Eng"},{"key":"6372_CR196","doi-asserted-by":"crossref","unstructured":"Ak KE, Lim JH, Tham JY, Kassim AA (2018) Efficient multi-attribute similarity learning towards attribute-based fashion search. In: Proceedings of the IEEE winter conference on applications of computer vision, pp 1671\u20131679","DOI":"10.1109\/WACV.2018.00186"},{"key":"6372_CR197","doi-asserted-by":"crossref","unstructured":"Isola P, Lim JJ, Adelson EH (2015) Discovering states and transformations in image collections. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1383\u20131391","DOI":"10.1109\/CVPR.2015.7298744"},{"key":"6372_CR198","doi-asserted-by":"crossref","unstructured":"Forbes M, Kaeser-Chen C, Sharma P, Belongie S (2019) Neural naturalist: generating fine-grained image comparisons. In: Proceedings of the conference on empirical methods in natural language processing and the 9th international joint conference on natural language processing, pp 708\u2013717","DOI":"10.18653\/v1\/D19-1065"},{"key":"6372_CR199","doi-asserted-by":"crossref","unstructured":"Jhamtani H, Berg-Kirkpatrick T (2018) Learning to describe differences between pairs of similar images. In: Proceedings of the conference on empirical methods in natural language processing, pp 4024\u20134034","DOI":"10.18653\/v1\/D18-1436"},{"key":"6372_CR200","doi-asserted-by":"crossref","unstructured":"Guo S, Huang W, Zhang X, Srikhanta P, Cui Y, Li Y, Adam H, Scott MR, Belongie S (2019) The imaterialist fashion attribute dataset. In: Proceedings of the international conference on computer vision workshop, pp 3113\u20133116","DOI":"10.1109\/ICCVW.2019.00377"},{"key":"6372_CR201","doi-asserted-by":"crossref","unstructured":"Liu Z, Luo P, Wang X, Tang X (2015) Deep learning face attributes in the wild. In: Proceedings of the IEEE international conference on computer vision, pp 3730\u20133738","DOI":"10.1109\/ICCV.2015.425"},{"key":"6372_CR202","doi-asserted-by":"crossref","unstructured":"Dong H, Wang Z, Qiu Q, Sapiro G (2021) Using text to teach image retrieval. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition workshops, pp 1643\u20131652","DOI":"10.1109\/CVPRW53098.2021.00180"},{"key":"6372_CR203","doi-asserted-by":"crossref","unstructured":"Suo Y, Ma F, Zhu L, Yang Y (2024) Knowledge-enhanced dual-stream zero-shot composed image retrieval. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 26951\u201326962","DOI":"10.1109\/CVPR52733.2024.02545"},{"key":"6372_CR204","doi-asserted-by":"crossref","unstructured":"Zhang X, Zheng Z, Zhu L, Yang Y (2024) Collaborative group: composed image retrieval via consensus learning from noisy annotations. Knowledge-Based Syst:112135","DOI":"10.1016\/j.knosys.2024.112135"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-025-06372-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-025-06372-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-025-06372-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T19:32:34Z","timestamp":1758310354000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-025-06372-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,27]]},"references-count":204,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2025,5]]}},"alternative-id":["6372"],"URL":"https:\/\/doi.org\/10.1007\/s10489-025-06372-x","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,2,27]]},"assertion":[{"value":"14 February 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 February 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they comply with ethical and informed consent for the data used.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Approval"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"482"}}