{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,17]],"date-time":"2026-06-17T16:26:42Z","timestamp":1781713602317,"version":"3.54.5"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2025,2,6]],"date-time":"2025-02-06T00:00:00Z","timestamp":1738800000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,2,6]],"date-time":"2025-02-06T00:00:00Z","timestamp":1738800000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["42394064"],"award-info":[{"award-number":["42394064"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["42394064"],"award-info":[{"award-number":["42394064"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["42394064"],"award-info":[{"award-number":["42394064"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["42394064"],"award-info":[{"award-number":["42394064"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["42394064"],"award-info":[{"award-number":["42394064"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["42394064"],"award-info":[{"award-number":["42394064"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["42394064"],"award-info":[{"award-number":["42394064"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2025,3]]},"DOI":"10.1007\/s10994-024-06665-1","type":"journal-article","created":{"date-parts":[[2025,2,6]],"date-time":"2025-02-06T15:32:51Z","timestamp":1738855971000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Semantic-aware contrastive learning via multi-prompt alignment"],"prefix":"10.1007","volume":"114","author":[{"given":"Zhuoran","family":"Zhao","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Hao","family":"Qin","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ming","family":"Kong","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Luyuan","family":"Chen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Di","family":"Xie","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jiang","family":"Zhu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Qiang","family":"Zhu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,2,6]]},"reference":[{"key":"6665_CR1","unstructured":"Bardes, A., Ponce, J., & LeCun, Y. (2021). Vicreg: Variance-invariance-covariance regularization for self-supervised learning. arXiv preprint arXiv:2105.04906"},{"key":"6665_CR2","unstructured":"Bochkovskiy, A., Wang, C.-Y., & Liao, H.-Y. M. (2020). Yolov4: Optimal speed and accuracy of object detection. arXiv preprint arXiv:2004.10934"},{"key":"6665_CR3","doi-asserted-by":"crossref","unstructured":"Caron, M., Touvron, H., Misra, I., J\u00e9gou, H., Mairal, J., Bojanowski, P., & Joulin, A. (2021). Emerging properties in self-supervised vision transformers. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9650\u20139660.","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"6665_CR4","first-page":"9912","volume":"33","author":"M Caron","year":"2020","unstructured":"Caron, M., Misra, I., Mairal, J., Goyal, P., Bojanowski, P., & Joulin, A. (2020). Unsupervised learning of visual features by contrasting cluster assignments. Advances in Neural Information Processing Systems, 33, 9912\u20139924.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"6665_CR5","doi-asserted-by":"crossref","unstructured":"Chen, X., & He, K. (2021). Exploring simple siamese representation learning. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15750\u201315758.","DOI":"10.1109\/CVPR46437.2021.01549"},{"key":"6665_CR6","unstructured":"Chen, X., Fan, H., Girshick, R., & He, K. (2020). Improved baselines with momentum contrastive learning. arXiv preprint arXiv:2003.04297"},{"key":"6665_CR7","unstructured":"Chen, T., Kornblith, S., Norouzi, M., Hinton, G. (2020). A simple framework for contrastive learning of visual representations. In International Conference on Machine Learning, pp. 1597\u20131607. PMLR."},{"key":"6665_CR8","doi-asserted-by":"crossref","unstructured":"Cimpoi, M., Maji, S., Kokkinos, I., Mohamed, S., & Vedaldi, A. (2014). Describing textures in the wild. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3606\u20133613.","DOI":"10.1109\/CVPR.2014.461"},{"key":"6665_CR9","unstructured":"Dunlap, L., Umino, A., Zhang, H., Yang, J., Gonzalez, J. E., & Darrell, T. (2024). Diversify your vision datasets with automatic diffusion-based augmentation. Advances in Neural Information Processing Systems, 36."},{"key":"6665_CR10","doi-asserted-by":"crossref","unstructured":"Gong, C., Wang, D., Li, M., Chandra, V., & Liu, Q. (2021). Keepaugment: A simple information-preserving data augmentation approach. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1055\u20131064.","DOI":"10.1109\/CVPR46437.2021.00111"},{"key":"6665_CR11","first-page":"21271","volume":"33","author":"J-B Grill","year":"2020","unstructured":"Grill, J.-B., Strub, F., Altch\u00e9, F., Tallec, C., Richemond, P., Buchatskaya, E., Doersch, C., Avila Pires, B., Guo, Z., Gheshlaghi Azar, M., et al. (2020). Bootstrap your own latent-a new approach to self-supervised learning. Advances in Neural Information Processing Systems, 33, 21271\u201321284.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"6665_CR12","doi-asserted-by":"crossref","unstructured":"Guo, Y., Xu, M., Li, J., Ni, B., Zhu, X., Sun, Z., & Xu, Y. (2022). Hcsc: Hierarchical contrastive selective coding. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9706\u20139715","DOI":"10.1109\/CVPR52688.2022.00948"},{"key":"6665_CR13","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S., & Girshick, R. (2020). Momentum contrast for unsupervised visual representation learning. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9729\u20139738.","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"6665_CR14","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., & Abbeel, P. (2020). Denoising diffusion probabilistic models. Advances in Neural Information Processing Systems, 33, 6840\u20136851.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"6665_CR15","doi-asserted-by":"crossref","unstructured":"Hu, J., Huang, L., Ren, T., Zhang, S., Ji, R., & Cao, L. (2023). You only segment once: Towards real-time panoptic segmentation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 17819\u201317829.","DOI":"10.1109\/CVPR52729.2023.01709"},{"key":"6665_CR16","doi-asserted-by":"crossref","unstructured":"Hu, Q., Wang, X., Hu, W., & Qi, G.-J. (2021). Adco: Adversarial contrast for efficient learning of unsupervised representations from self-trained negative adversaries. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1074\u20131083.","DOI":"10.1109\/CVPR46437.2021.00113"},{"key":"6665_CR17","unstructured":"Huang, S., Dong, L., Wang, W., Hao, Y., Singhal, S., Ma, S., Lv, T., Cui, L., Mohammed, O.K., Liu, Q., et al. (2023). Language is not all you need: Aligning perception with language models. arXiv preprint arXiv:2302.14045"},{"key":"6665_CR18","unstructured":"Jahanian, A., Puig, X., Tian, Y., & Isola, P. (2021). Generative models as a data source for multiview representation learning. arXiv preprint arXiv:2106.05258"},{"key":"6665_CR19","unstructured":"Kingma, D. P., & Welling, M. (2013). Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114"},{"key":"6665_CR20","doi-asserted-by":"crossref","unstructured":"Kisantal, M., Wojna, Z., Murawski, J., Naruniec, J., & Cho, K. (2019). Augmentation for small object detection. arXiv preprint arXiv:1902.07296","DOI":"10.5121\/csit.2019.91713"},{"key":"6665_CR21","doi-asserted-by":"crossref","unstructured":"Kumar, T., Turab, M., Raj, K., Mileo, A., Brennan, R., & Bendechache, M. (2023). Advanced data augmentation approaches: A comprehensive survey and future directions. arXiv preprint arXiv:2301.02830.","DOI":"10.1109\/ACCESS.2024.3470122"},{"key":"6665_CR22","unstructured":"Li, J., Li, D., Savarese, S., & Hoi, S. (2023). Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. arXiv preprint arXiv:2301.12597"},{"key":"6665_CR23","doi-asserted-by":"crossref","unstructured":"Li, Z., Zhu, Y., Yang, F., Li, W., Zhao, C., Chen, Y., Chen, Z., Xie, J., Wu, L., Zhao, R., et al. (2022). Univip: A unified framework for self-supervised visual pre-training. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14627\u201314636.","DOI":"10.1109\/CVPR52688.2022.01422"},{"key":"6665_CR24","unstructured":"Maaten, L., & Hinton, G. (2008). Visualizing data using t-sne. Journal of machine learning research, 9(11)."},{"key":"6665_CR25","doi-asserted-by":"crossref","unstructured":"Nilsback, M.-E., & Zisserman, A. (2008). Automated flower classification over a large number of classes. In 2008 Sixth Indian Conference on Computer Vision, Graphics & Image Processing, pp. 722\u2013729. IEEE.","DOI":"10.1109\/ICVGIP.2008.47"},{"key":"6665_CR26","doi-asserted-by":"crossref","unstructured":"Parkhi, O. M., Vedaldi, A., Zisserman, A., & Jawahar, C. (2012). Cats and dogs. In 2012 IEEE Conference on Computer Vision and Pattern Recognition, pp. 3498\u20133505. IEEE.","DOI":"10.1109\/CVPR.2012.6248092"},{"key":"6665_CR27","unstructured":"Radford, A., Kim, J. W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J., et al. (2021). Learning transferable visual models from natural language supervision. In International Conference on Machine Learning, pp. 8748\u20138763. PMLR."},{"issue":"14","key":"6665_CR28","first-page":"71","volume":"8","author":"MDM Reddy","year":"2021","unstructured":"Reddy, M. D. M., Basha, M. S. M., Hari, M. M. C., & Penchalaiah, M. N. (2021). Dall-e: Creating images from text. UGC Care Group I Journal, 8(14), 71\u201375.","journal-title":"UGC Care Group I Journal"},{"key":"6665_CR29","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., & Ommer, B. (2022). High-resolution image synthesis with latent diffusion models. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695.","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"6665_CR30","doi-asserted-by":"crossref","unstructured":"Sariyildiz, M. B., Alahari, K., Larlus, D., & Kalantidis, Y. (2023). Fake it till you make it: Learning transferable representations from synthetic imagenet clones. In CVPR 2023\u2013IEEE\/CVF Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR52729.2023.00774"},{"key":"6665_CR31","doi-asserted-by":"crossref","unstructured":"Song, K., Zhang, S., Luo, Z., Wang, T., & Xie, J. (2023). Semantics-consistent feature search for self-supervised visual representation learning. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 16099\u201316108.","DOI":"10.1109\/ICCV51070.2023.01475"},{"key":"6665_CR32","unstructured":"Tian, Y., Fan, L., Isola, P., Chang, H., & Krishnan, D. (2023). Stablerep: Synthetic images from text-to-image models make strong visual representation learners. arXiv preprint arXiv:2306.00984"},{"key":"6665_CR33","unstructured":"Tian, Y., Fan, L., Isola, P., Chang, H., & Krishnan, D. (2024). Stablerep: Synthetic images from text-to-image models make strong visual representation learners. Advances in Neural Information Processing Systems, 36."},{"key":"6665_CR34","doi-asserted-by":"crossref","unstructured":"Tian, Y., Krishnan, D., & Isola, P. (2020). Contrastive multiview coding. In Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XI 16, pp. 776\u2013794. Springer.","DOI":"10.1007\/978-3-030-58621-8_45"},{"key":"6665_CR35","unstructured":"Touvron, H., Lavril, T., Izacard, G., Martinet, X., Lachaux, M.-A., Lacroix, T., Rozi\u00e8re, B., Goyal, N., Hambro, E., Azhar, F., et al. (2023). Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971"},{"key":"6665_CR36","unstructured":"Wang, H., Ge, S., Lipton, Z., & Xing, E. P. (2019). Learning robust global representations by penalizing local predictive power. In Advances in Neural Information Processing Systems, pp. 10506\u201310518."},{"key":"6665_CR37","unstructured":"Xinlei, C., Saining, X., & Kaiming, H. (2021). An empirical study of training self-supervised visual transformers. arXiv preprint arXiv:2104.020578, 7."},{"key":"6665_CR38","doi-asserted-by":"crossref","unstructured":"Xu, K., Ye, F., Zhong, Q., & Xie, D. (2022). Topology-aware convolutional neural network for efficient skeleton-based action recognition. In Proceedings of the AAAI Conference on Artificial Intelligence, vol. 36, pp. 2866\u20132874.","DOI":"10.1609\/aaai.v36i3.20191"},{"key":"6665_CR39","unstructured":"Yang, L., Xu, X., Kang, B., Shi, Y., & Zhao, H. (2024). Freemask: Synthetic images with dense annotations make stronger segmentation models. Advances in Neural Information Processing Systems, 36."},{"key":"6665_CR40","unstructured":"Zbontar, J., Jing, L., Misra, I., LeCun, Y., & Deny, S. (2021). Barlow twins: Self-supervised learning via redundancy reduction. In International Conference on Machine Learning, pp. 12310\u201312320. PMLR."},{"key":"6665_CR41","doi-asserted-by":"crossref","unstructured":"Zhang, L., Rao, A., & Agrawala, M. (2023). Adding conditional control to text-to-image diffusion models. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3836\u20133847.","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"6665_CR42","doi-asserted-by":"crossref","unstructured":"Zhu, J.-Y., Park, T., Isola, P., & Efros, A. A. (2017). Unpaired image-to-image translation using cycle-consistent adversarial networks. In Proceedings of the IEEE International Conference on Computer Vision, pp. 2223\u20132232.","DOI":"10.1109\/ICCV.2017.244"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-024-06665-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10994-024-06665-1","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-024-06665-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T01:02:34Z","timestamp":1770339754000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10994-024-06665-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,6]]},"references-count":42,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2025,3]]}},"alternative-id":["6665"],"URL":"https:\/\/doi.org\/10.1007\/s10994-024-06665-1","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,2,6]]},"assertion":[{"value":"28 May 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 August 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 December 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 February 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"None.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}],"article-number":"63"}}