{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T20:10:37Z","timestamp":1776888637506,"version":"3.51.2"},"publisher-location":"Cham","reference-count":46,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031733369","type":"print"},{"value":"9783031733376","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73337-6_15","type":"book-chapter","created":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T23:02:27Z","timestamp":1730329347000},"page":"258-274","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["FreestyleRet: Retrieving Images from\u00a0Style-Diversified Queries"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3200-0270","authenticated-orcid":false,"given":"Hao","family":"Li","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6966-1512","authenticated-orcid":false,"given":"Yanhao","family":"Jia","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9287-6410","authenticated-orcid":false,"given":"Peng","family":"Jin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1885-4261","authenticated-orcid":false,"given":"Zesen","family":"Cheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kehan","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jialu","family":"Sui","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6747-0646","authenticated-orcid":false,"given":"Chang","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2120-5588","authenticated-orcid":false,"given":"Li","family":"Yuan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,10,31]]},"reference":[{"issue":"10","key":"15_CR1","doi-asserted-by":"publisher","first-page":"2303","DOI":"10.1109\/TPAMI.2017.2753232","volume":"40","author":"Y Aytar","year":"2017","unstructured":"Aytar, Y., Castrejon, L., Vondrick, C., Pirsiavash, H., Torralba, A.: Cross-modal scene networks. IEEE Trans. Pattern Anal. Mach. Intell. 40(10), 2303\u20132314 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"15_CR2","doi-asserted-by":"crossref","unstructured":"Bossett, D., et al.: Emotion-based style transfer on visual art using gram matrices. In: 2021 IEEE MIT Undergraduate Research Technology Conference (URTC), pp.\u00a01\u20135. IEEE (2021)","DOI":"10.1109\/URTC54388.2021.9701611"},{"key":"15_CR3","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T., et al.: Language models are few-shot learners. Adv. Neural. Inf. Process. Syst. 33, 1877\u20131901 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"15_CR4","unstructured":"Cheng, X., Zhang, N., Yu, J., Wang, Y., Li, G., Zhang, J.: Null-space diffusion sampling for zero-shot point cloud completion"},{"key":"15_CR5","doi-asserted-by":"crossref","unstructured":"Chowdhury, P.N., Bhunia, A.K., Sain, A., Koley, S., Xiang, T., Song, Y.Z.: Scenetrilogy: on human scene-sketch and its complementarity with photo and text. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10972\u201310983 (2023)","DOI":"10.1109\/CVPR52729.2023.01056"},{"key":"15_CR6","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1007\/978-3-031-20074-8_15","volume-title":"ECCV 2022","author":"PN Chowdhury","year":"2022","unstructured":"Chowdhury, P.N., Sain, A., Bhunia, A.K., Xiang, T., Gryaditskaya, Y., Song, Y.Z.: FS-COCO: towards understanding of freehand sketches of common objects in context. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13668, pp. 253\u2013270. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20074-8_15"},{"issue":"2","key":"15_CR7","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1348246.1348248","volume":"40","author":"R Datta","year":"2008","unstructured":"Datta, R., Joshi, D., Li, J., Wang, J.Z.: Image retrieval: ideas, influences, and trends of the new age. ACM Comput. Surv. (CSUR) 40(2), 1\u201360 (2008)","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"15_CR8","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"15_CR9","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"15_CR10","doi-asserted-by":"crossref","unstructured":"Farahani, A., Voghoei, S., Rasheed, K., Arabnia, H.R.: A brief review of domain adaptation. In: Advances in Data Science and Information Engineering: Proceedings from ICDATA 2020 and IKE 2020, pp. 877\u2013894 (2021)","DOI":"10.1007\/978-3-030-71704-9_65"},{"key":"15_CR11","unstructured":"Guo, Y., et al.: AnimateDiff: animate your personalized text-to-image diffusion models without specific tuning. arXiv preprint arXiv:2307.04725 (2023)"},{"key":"15_CR12","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 770\u2013778, June 2016","DOI":"10.1109\/CVPR.2016.90"},{"issue":"3","key":"15_CR13","doi-asserted-by":"publisher","first-page":"261","DOI":"10.1016\/j.eij.2015.06.005","volume":"16","author":"FO Isinkaye","year":"2015","unstructured":"Isinkaye, F.O., Folajimi, Y.O., Ojokoh, B.A.: Recommendation systems: principles, methods and evaluation. Egypt. Inform. J. 16(3), 261\u2013273 (2015)","journal-title":"Egypt. Inform. J."},{"key":"15_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"709","DOI":"10.1007\/978-3-031-19827-4_41","volume-title":"ECCV 2022","author":"M Jia","year":"2022","unstructured":"Jia, M., et al.: Visual prompt tuning. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13693, pp. 709\u2013727. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19827-4_41"},{"key":"15_CR15","doi-asserted-by":"crossref","unstructured":"Jin, P., et al.: Text-video retrieval with disentangled conceptualization and set-to-set alignment. arXiv preprint arXiv:2305.12218 (2023)","DOI":"10.24963\/ijcai.2023\/104"},{"key":"15_CR16","doi-asserted-by":"crossref","unstructured":"Jin, P., et al.: DiffusionRet: generative text-video retrieval with diffusion model. arXiv preprint arXiv:2303.09867 (2023)","DOI":"10.1109\/ICCV51070.2023.00234"},{"key":"15_CR17","doi-asserted-by":"crossref","unstructured":"Johnson, J., et al.: Image retrieval using scene graphs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3668\u20133678 (2015)","DOI":"10.1109\/CVPR.2015.7298990"},{"issue":"4","key":"15_CR18","doi-asserted-by":"publisher","first-page":"1090","DOI":"10.1109\/TMM.2014.2305633","volume":"16","author":"M Kafai","year":"2014","unstructured":"Kafai, M., Eshghi, K., Bhanu, B.: Discrete cosine transform locality-sensitive hashes for face retrieval. IEEE Trans. Multimedia 16(4), 1090\u20131103 (2014)","journal-title":"IEEE Trans. Multimedia"},{"key":"15_CR19","doi-asserted-by":"crossref","unstructured":"Khattak, M.U., Rasheed, H., Maaz, M., Khan, S., Khan, F.S.: Maple: multi-modal prompt learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19113\u201319122 (2023)","DOI":"10.1109\/CVPR52729.2023.01832"},{"key":"15_CR20","doi-asserted-by":"crossref","unstructured":"Lester, B., Al-Rfou, R., Constant, N.: The power of scale for parameter-efficient prompt tuning. arXiv preprint arXiv:2104.08691 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"15_CR21","doi-asserted-by":"publisher","first-page":"3367","DOI":"10.1109\/TIP.2023.3276570","volume":"32","author":"H Li","year":"2023","unstructured":"Li, H., Huang, J., Jin, P., Song, G., Wu, Q., Chen, J.: Weakly-supervised 3D spatial reasoning for text-based visual question answering. IEEE Trans. Image Process. 32, 3367\u20133382 (2023)","journal-title":"IEEE Trans. Image Process."},{"key":"15_CR22","doi-asserted-by":"crossref","unstructured":"Li, H., Li, X., Karimi, B., Chen, J., Sun, M.: Joint learning of object graph and relation graph for visual question answering. In: 2022 IEEE International Conference on Multimedia and Expo (ICME), pp. 01\u201306. IEEE (2022)","DOI":"10.1109\/ICME52920.2022.9859766"},{"key":"15_CR23","unstructured":"Li, J., Li, D., Savarese, S., Hoi, S.: BLIP-2: bootstrapping language-image pre-training with frozen image encoders and large language models. arXiv preprint arXiv:2301.12597 (2023)"},{"key":"15_CR24","unstructured":"Li, J., Li, D., Xiong, C., Hoi, S.: BLIP: bootstrapping language-image pre-training for unified vision-language understanding and generation. In: International Conference on Machine Learning, pp. 12888\u201312900. PMLR (2022)"},{"key":"15_CR25","doi-asserted-by":"crossref","unstructured":"Li, X.L., Liang, P.: Prefix-tuning: optimizing continuous prompts for generation. arXiv preprint arXiv:2101.00190 (2021)","DOI":"10.18653\/v1\/2021.acl-long.353"},{"key":"15_CR26","doi-asserted-by":"publisher","first-page":"675","DOI":"10.1016\/j.neucom.2020.07.139","volume":"452","author":"X Li","year":"2021","unstructured":"Li, X., Yang, J., Ma, J.: Recent developments of content-based image retrieval (CBIR). Neurocomputing 452, 675\u2013689 (2021)","journal-title":"Neurocomputing"},{"key":"15_CR27","unstructured":"Li, Y., Fang, C., Yang, J., Wang, Z., Lu, X., Yang, M.H.: Universal style transfer via feature transforms. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"15_CR28","doi-asserted-by":"crossref","unstructured":"Liu, X., et al.: P-tuning: prompt tuning can be comparable to fine-tuning across scales and tasks. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), pp. 61\u201368 (2022)","DOI":"10.18653\/v1\/2022.acl-short.8"},{"key":"15_CR29","doi-asserted-by":"crossref","unstructured":"Liu, X., et al.: P-tuning v2: prompt tuning can be comparable to fine-tuning universally across scales and tasks. arXiv preprint arXiv:2110.07602 (2021)","DOI":"10.18653\/v1\/2022.acl-short.8"},{"key":"15_CR30","doi-asserted-by":"crossref","unstructured":"Liu, Y., et al.: Hierarchical prompt learning for multi-task learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10888\u201310898 (2023)","DOI":"10.1109\/CVPR52729.2023.01048"},{"key":"15_CR31","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.122844","volume":"243","author":"X Meng","year":"2024","unstructured":"Meng, X., Huang, J., Li, Z., Wang, C., Teng, S., Grau, A.: DedustGAN: unpaired learning for image dedusting based on retinex with GANs. Expert Syst. Appl. 243, 122844 (2024)","journal-title":"Expert Syst. Appl."},{"key":"15_CR32","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"15_CR33","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"key":"15_CR34","doi-asserted-by":"crossref","unstructured":"Song, J., Song, Y.Z., Xiang, T., Hospedales, T.M.: Fine-grained image retrieval: the text\/sketch input dilemma. In: BMVC, vol.\u00a02, p.\u00a07 (2017)","DOI":"10.5244\/C.31.45"},{"key":"15_CR35","doi-asserted-by":"crossref","unstructured":"Tao, Y.: Image style transfer based on VGG neural network model. In: 2022 IEEE International Conference on Advances in Electrical Engineering and Computer Applications (AEECA), pp. 1475\u20131482. IEEE (2022)","DOI":"10.1109\/AEECA55500.2022.9918891"},{"key":"15_CR36","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1007\/s13735-012-0014-4","volume":"1","author":"B Thomee","year":"2012","unstructured":"Thomee, B., Lew, M.S.: Interactive search in image retrieval: a survey. Int. J. Multimed. Inf. Retr. 1, 71\u201386 (2012)","journal-title":"Int. J. Multimed. Inf. Retr."},{"key":"15_CR37","unstructured":"Van Der\u00a0Maaten, L.: Learning a parametric embedding by preserving local structure. In: Artificial Intelligence and Statistics, pp. 384\u2013391. PMLR (2009)"},{"key":"15_CR38","doi-asserted-by":"crossref","unstructured":"Wang, P., Li, Y., Vasconcelos, N.: Rethinking and improving the robustness of image style transfer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 124\u2013133 (2021)","DOI":"10.1109\/CVPR46437.2021.00019"},{"key":"15_CR39","unstructured":"Wang, Y., Yu, J., Zhang, J.: Zero-shot image restoration using denoising diffusion null-space model. arXiv preprint arXiv:2212.00490 (2022)"},{"key":"15_CR40","unstructured":"Wu, X., et al.: Human preference score V2: a solid benchmark for evaluating human preferences of text-to-image synthesis. arXiv preprint arXiv:2306.09341 (2023)"},{"key":"15_CR41","doi-asserted-by":"crossref","unstructured":"Xu, S., Pang, L., Shen, H., Cheng, X.: Match-prompt: improving multi-task generalization ability for neural text matching via prompt learning. In: Proceedings of the 31st ACM International Conference on Information & Knowledge Management, pp. 2290\u20132300 (2022)","DOI":"10.1145\/3511808.3557388"},{"key":"15_CR42","doi-asserted-by":"crossref","unstructured":"Yu, J., Wang, Y., Zhao, C., Ghanem, B., Zhang, J.: Freedom: training-free energy-guided conditional diffusion model. arXiv preprint arXiv:2303.09833 (2023)","DOI":"10.1109\/ICCV51070.2023.02118"},{"issue":"12","key":"15_CR43","doi-asserted-by":"publisher","first-page":"5586","DOI":"10.1109\/TKDE.2021.3070203","volume":"34","author":"Y Zhang","year":"2021","unstructured":"Zhang, Y., Yang, Q.: A survey on multi-task learning. IEEE Trans. Knowl. Data Eng. 34(12), 5586\u20135609 (2021)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"15_CR44","first-page":"4396","volume":"45","author":"K Zhou","year":"2022","unstructured":"Zhou, K., Liu, Z., Qiao, Y., Xiang, T., Loy, C.C.: Domain generalization: a survey. IEEE Trans. Pattern Anal. Mach. Intell. 45, 4396\u20134415 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"15_CR45","doi-asserted-by":"crossref","unstructured":"Zhou, K., Yang, J., Loy, C.C., Liu, Z.: Conditional prompt learning for vision-language models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16816\u201316825 (2022)","DOI":"10.1109\/CVPR52688.2022.01631"},{"issue":"9","key":"15_CR46","doi-asserted-by":"publisher","first-page":"2337","DOI":"10.1007\/s11263-022-01653-1","volume":"130","author":"K Zhou","year":"2022","unstructured":"Zhou, K., Yang, J., Loy, C.C., Liu, Z.: Learning to prompt for vision-language models. Int. J. Comput. Vision 130(9), 2337\u20132348 (2022)","journal-title":"Int. J. Comput. Vision"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73337-6_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T23:05:32Z","timestamp":1730329532000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73337-6_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,31]]},"ISBN":["9783031733369","9783031733376"],"references-count":46,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73337-6_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,31]]},"assertion":[{"value":"31 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}