{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T01:25:45Z","timestamp":1773105945344,"version":"3.50.1"},"publisher-location":"Cham","reference-count":50,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031431470","type":"print"},{"value":"9783031431487","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-43148-7_21","type":"book-chapter","created":{"date-parts":[[2023,9,4]],"date-time":"2023-09-04T20:48:35Z","timestamp":1693860515000},"page":"245-256","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["OpenFashionCLIP: Vision-and-Language Contrastive Learning with\u00a0Open-Source Fashion Data"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5590-3253","authenticated-orcid":false,"given":"Giuseppe","family":"Cartella","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5012-5800","authenticated-orcid":false,"given":"Alberto","family":"Baldrati","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7918-6220","authenticated-orcid":false,"given":"Davide","family":"Morelli","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9640-9385","authenticated-orcid":false,"given":"Marcella","family":"Cornia","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1364-218X","authenticated-orcid":false,"given":"Marco","family":"Bertini","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2239-283X","authenticated-orcid":false,"given":"Rita","family":"Cucchiara","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,9,5]]},"reference":[{"key":"21_CR1","unstructured":"Aggarwal, P.: Fashion Product Images (Small). https:\/\/www.kaggle.com\/datasets\/paramaggarwal\/fashion-product-images-small"},{"key":"21_CR2","doi-asserted-by":"crossref","unstructured":"Baldrati, A., Agnolucci, L., Bertini, M., Del Bimbo, A.: Zero-shot composed image retrieval with textual inversion. arXiv preprint arXiv:2303.15247 (2023)","DOI":"10.1109\/ICCV51070.2023.01407"},{"key":"21_CR3","doi-asserted-by":"crossref","unstructured":"Baldrati, A., Bertini, M., Uricchio, T., Del Bimbo, A.: Conditioned image retrieval for fashion using contrastive learning and CLIP-based features. In: ACM Multimedia Asia (2021)","DOI":"10.1145\/3469877.3493593"},{"key":"21_CR4","doi-asserted-by":"crossref","unstructured":"Baldrati, A., Bertini, M., Uricchio, T., Del Bimbo, A.: Conditioned and composed image retrieval combining and partially fine-tuning CLIP-based features. In: CVPR Workshops (2022)","DOI":"10.1109\/CVPRW56347.2022.00543"},{"key":"21_CR5","doi-asserted-by":"crossref","unstructured":"Baldrati, A., Morelli, D., Cartella, G., Cornia, M., Bertini, M., Cucchiara, R.: Multimodal garment designer: human-centric latent diffusion models for fashion image editing. arXiv preprint arXiv:2304.02051 (2023)","DOI":"10.1109\/ICCV51070.2023.02138"},{"key":"21_CR6","unstructured":"Brown, T., et al.: Language models are few-shot learners. In: NeurIPS (2020)"},{"key":"21_CR7","unstructured":"Chen, T., Xu, B., Zhang, C., Guestrin, C.: Training deep nets with sublinear memory cost. arXiv preprint arXiv:1604.06174 (2016)"},{"issue":"1","key":"21_CR8","doi-asserted-by":"publisher","first-page":"18958","DOI":"10.1038\/s41598-022-23052-9","volume":"12","author":"PJ Chia","year":"2022","unstructured":"Chia, P.J., et al.: Contrastive language and vision learning of general fashion concepts. Sci. Rep. 12(1), 18958 (2022)","journal-title":"Sci. Rep."},{"key":"21_CR9","unstructured":"Cornia, M., Baraldi, L., Fiameni, G., Cucchiara, R.: Universal captioner: inducing content-style separation in vision-and-language model training. arXiv preprint arXiv:2111.12727 (2022)"},{"key":"21_CR10","doi-asserted-by":"crossref","unstructured":"Cucurull, G., Taslakian, P., Vazquez, D.: Context-aware visual compatibility prediction. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.01290"},{"issue":"1s","key":"21_CR11","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3531017","volume":"19","author":"L De Divitiis","year":"2023","unstructured":"De Divitiis, L., Becattini, F., Baecchi, C., Del Bimbo, A.: Disentangling features for fashion recommendation. ACM TOMM 19(1s), 1\u201321 (2023)","journal-title":"ACM TOMM"},{"key":"21_CR12","doi-asserted-by":"crossref","unstructured":"Dong, H., et al.: Fashion editing with adversarial parsing learning. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00814"},{"key":"21_CR13","doi-asserted-by":"crossref","unstructured":"Fenocchi, E., Morelli, D., Cornia, M., Baraldi, L., Cesari, F., Cucchiara, R.: Dual-branch collaborative transformer for virtual try-on. In: CVPR Workshops (2022)","DOI":"10.1109\/CVPRW56347.2022.00246"},{"issue":"2","key":"21_CR14","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3491226","volume":"18","author":"M Fincato","year":"2022","unstructured":"Fincato, M., Cornia, M., Landi, F., Cesari, F., Cucchiara, R.: Transform, warp, and dress: a new transformation-guided model for virtual try-on. ACM TOMM 18(2), 1\u201324 (2022)","journal-title":"ACM TOMM"},{"key":"21_CR15","doi-asserted-by":"crossref","unstructured":"Fincato, M., Landi, F., Cornia, M., Cesari, F., Cucchiara, R.: VITON-GT: an image-based virtual try-on model with geometric transformations. In: ICPR (2021)","DOI":"10.1109\/ICPR48806.2021.9412052"},{"key":"21_CR16","doi-asserted-by":"crossref","unstructured":"Gao, T., Fisch, A., Chen, D.: Making pre-trained language models better few-shot learners. In: ACL (2021)","DOI":"10.18653\/v1\/2021.acl-long.295"},{"key":"21_CR17","doi-asserted-by":"crossref","unstructured":"Guo, S., et al.: The iMaterialist fashion attribute dataset. In: ICCV Workshops (2019)","DOI":"10.1109\/ICCVW.2019.00377"},{"key":"21_CR18","doi-asserted-by":"crossref","unstructured":"Hadi Kiapour, M., Han, X., Lazebnik, S., Berg, A.C., Berg, T.L.: Where to buy it: matching street clothing photos in online shops. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.382"},{"key":"21_CR19","doi-asserted-by":"publisher","unstructured":"Han, X., Yu, L., Zhu, X., Zhang, L., Song, Y.Z., Xiang, T.: FashionViL: fashion-focused vision-and-language representation learning. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV. LNCS, vol. 13695, pp. 634\u2013651. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19833-5_37","DOI":"10.1007\/978-3-031-19833-5_37"},{"key":"21_CR20","doi-asserted-by":"crossref","unstructured":"Han, X., et al.: Automatic spatially-aware fashion concept discovery. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.163"},{"key":"21_CR21","doi-asserted-by":"crossref","unstructured":"Hsiao, W.L., Grauman, K.: Creating capsule wardrobes from fashion images. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00748"},{"key":"21_CR22","doi-asserted-by":"publisher","unstructured":"Ilharco, G., et al.: OpenCLIP (2021). https:\/\/doi.org\/10.5281\/zenodo.5143773","DOI":"10.5281\/zenodo.5143773"},{"key":"21_CR23","doi-asserted-by":"crossref","unstructured":"Kuang, Z., et al.: Fashion retrieval via graph reasoning networks on a similarity pyramid. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00316"},{"key":"21_CR24","doi-asserted-by":"publisher","unstructured":"Lee, S., Gu, G., Park, S., Choi, S., Choo, J.: High-resolution virtual try-on with misalignment and occlusion-handled conditions. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV. LNCS, vol. 13677, pp. 204\u2013219. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19790-1_13","DOI":"10.1007\/978-3-031-19790-1_13"},{"key":"21_CR25","unstructured":"Li, J., Li, D., Xiong, C., Hoi, S.: BLIP: bootstrapping language-image pre-training for unified vision-language understanding and generation. In: ICML (2022)"},{"key":"21_CR26","unstructured":"Li, Y., et al.: Supervision exists everywhere: a data efficient contrastive language-image pre-training paradigm. In: ICLR (2022)"},{"key":"21_CR27","doi-asserted-by":"crossref","unstructured":"Liu, Z., Luo, P., Qiu, S., Wang, X., Tang, X.: DeepFashion: powering robust clothes recognition and retrieval with rich annotations. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.124"},{"key":"21_CR28","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: ICLR (2019)"},{"key":"21_CR29","doi-asserted-by":"crossref","unstructured":"Majithia, S., Parameswaran, S.N., Babar, S., Garg, V., Srivastava, A., Sharma, A.: Robust 3D garment digitization from monocular 2D images for 3D virtual try-on systems. In: WACV (2022)","DOI":"10.1109\/WACV51458.2022.00148"},{"issue":"3","key":"21_CR30","doi-asserted-by":"publisher","first-page":"1286","DOI":"10.3390\/s23031286","volume":"23","author":"N Moratelli","year":"2023","unstructured":"Moratelli, N., Barraco, M., Morelli, D., Cornia, M., Baraldi, L., Cucchiara, R.: Fashion-oriented image captioning with external knowledge retrieval and fully attentive gates. Sensors 23(3), 1286 (2023)","journal-title":"Sensors"},{"key":"21_CR31","doi-asserted-by":"crossref","unstructured":"Morelli, D., Baldrati, A., Cartella, G., Cornia, M., Bertini, M., Cucchiara, R.: LaDI-VTON: latent diffusion textual-inversion enhanced virtual try-on. arXiv preprint arXiv:2305.13501 (2023)","DOI":"10.1145\/3581783.3612137"},{"key":"21_CR32","unstructured":"Morelli, D., Cornia, M., Cucchiara, R.: FashionSearch++: improving consumer-to-shop clothes retrieval with hard negatives. In: CEUR Workshop Proceedings (2021)"},{"key":"21_CR33","doi-asserted-by":"publisher","unstructured":"Morelli, D., Fincato, M., Cornia, M., Landi, F., Cesari, F., Cucchiara, R.: Dress code: high-resolution multi-category virtual try-on. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV. LNCS, vol. 13668, pp. 345\u2013362. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20074-8_20","DOI":"10.1007\/978-3-031-20074-8_20"},{"key":"21_CR34","unstructured":"Pernu\u0161, M., Fookes, C., \u0160truc, V., Dobri\u0161ek, S.: FICE: text-conditioned fashion image editing with guided GAN inversion. arXiv preprint arXiv:2301.02110 (2023)"},{"key":"21_CR35","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: ICML (2021)"},{"key":"21_CR36","unstructured":"Rostamzadeh, N., et al.: Fashion-gen: the generative fashion dataset and challenge. arXiv preprint arXiv:1806.08317 (2018)"},{"key":"21_CR37","unstructured":"Santesteban, I., Otaduy, M., Thuerey, N., Casas, D.: ULNeF: untangled layered neural fields for mix-and-match virtual try-on. In: NeurIPS (2022)"},{"key":"21_CR38","doi-asserted-by":"crossref","unstructured":"Santesteban, I., Thuerey, N., Otaduy, M.A., Casas, D.: Self-supervised collision handling via generative 3D garment models for virtual try-on. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.01159"},{"key":"21_CR39","doi-asserted-by":"crossref","unstructured":"Sarkar, R., et al.: OutfitTransformer: learning outfit representations for fashion recommendation. In: WACV (2023)","DOI":"10.1109\/WACV56688.2023.00359"},{"key":"21_CR40","unstructured":"Schuhmann, C., et al.: LAION-5B: an open large-scale dataset for training next generation image-text models. In: NeurIPS (2022)"},{"key":"21_CR41","unstructured":"Schuhmann, C., et al.: LAION-400M: open dataset of CLIP-filtered 400 million image-text pairs. In: NeurIPS Workshops (2021)"},{"key":"21_CR42","doi-asserted-by":"crossref","unstructured":"Shiau, R., et al.: Shop the look: building a large scale visual shopping system at Pinterest. In: KDD (2020)","DOI":"10.1145\/3394486.3403372"},{"key":"21_CR43","doi-asserted-by":"crossref","unstructured":"Wortsman, M., et al.: Robust fine-tuning of zero-shot models. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00780"},{"key":"21_CR44","doi-asserted-by":"crossref","unstructured":"Wu, H., et al.: Fashion IQ: a new dataset towards retrieving images by natural language feedback. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.01115"},{"key":"21_CR45","unstructured":"Xiao, H., Rasul, K., Vollgraf, R.: Fashion-MNIST: a novel image dataset for benchmarking machine learning algorithms. arXiv preprint arXiv:1708.07747 (2017)"},{"key":"21_CR46","doi-asserted-by":"crossref","unstructured":"Xie, Z., et al.: GP-VTON: towards general purpose virtual try-on via collaborative local-flow global-parsing learning. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.02255"},{"key":"21_CR47","unstructured":"Yao, L., et al.: FILIP: fine-grained interactive language-image pre-training. In: ICLR (2022)"},{"key":"21_CR48","doi-asserted-by":"crossref","unstructured":"Zhai, A., Wu, H.Y., Tzeng, E., Park, D.H., Rosenberg, C.: Learning a unified embedding for visual search at Pinterest. In: KDD (2019)","DOI":"10.1145\/3292500.3330739"},{"key":"21_CR49","doi-asserted-by":"crossref","unstructured":"Zhang, Y., et al.: Visual search at Alibaba. In: KDD (2018)","DOI":"10.1145\/3219819.3219820"},{"key":"21_CR50","doi-asserted-by":"crossref","unstructured":"Zhuge, M., et al.: Kaleido-BERT: vision-language pre-training on fashion domain. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.01246"}],"container-title":["Lecture Notes in Computer Science","Image Analysis and Processing \u2013 ICIAP 2023"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-43148-7_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T16:38:45Z","timestamp":1710261525000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-43148-7_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031431470","9783031431487"],"references-count":50,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-43148-7_21","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"5 September 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIAP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Image Analysis and Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Udine","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iciap2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.iciap2023.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"144","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"85","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"59% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"https:\/\/iciap2023.org\/satellite-event\/workshops\/","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}