{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,4]],"date-time":"2025-06-04T05:26:22Z","timestamp":1749014782920,"version":"3.40.3"},"publisher-location":"Cham","reference-count":106,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031731129"},{"type":"electronic","value":"9783031731136"}],"license":[{"start":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T00:00:00Z","timestamp":1732147200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T00:00:00Z","timestamp":1732147200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73113-6_18","type":"book-chapter","created":{"date-parts":[[2024,11,20]],"date-time":"2024-11-20T08:43:51Z","timestamp":1732092231000},"page":"306-325","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["GenView: Enhancing View Quality with\u00a0Pretrained Generative Model for\u00a0Self-Supervised Learning"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6449-2727","authenticated-orcid":false,"given":"Xiaojie","family":"Li","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0530-7231","authenticated-orcid":false,"given":"Yibo","family":"Yang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0550-8247","authenticated-orcid":false,"given":"Xiangtai","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0247-5221","authenticated-orcid":false,"given":"Jianlong","family":"Wu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9865-2212","authenticated-orcid":false,"given":"Yue","family":"Yu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5534-587X","authenticated-orcid":false,"given":"Bernard","family":"Ghanem","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3895-5510","authenticated-orcid":false,"given":"Min","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,21]]},"reference":[{"key":"18_CR1","unstructured":"Asano, Y.M., Rupprecht, C., Vedaldi, A.: Self-labelling via simultaneous clustering and representation learning. In: ICLR (2020)"},{"key":"18_CR2","doi-asserted-by":"crossref","unstructured":"Assran, M., et al.: Self-supervised learning from images with a joint-embedding predictive architecture. In: CVPR, pp. 15619\u201315629. IEEE (2023)","DOI":"10.1109\/CVPR52729.2023.01499"},{"key":"18_CR3","unstructured":"Astolfi, P., Casanova, A., Verbeek, J., Vincent, P., Romero-Soriano, A., Drozdzal, M.: Instance-conditioned gan data augmentation for representation learning. arXiv preprint arXiv:2303.09677 (2023)"},{"key":"18_CR4","unstructured":"Bao, H., Dong, L., Piao, S., Wei, F.: Beit: Bert pre-training of image transformers. In: ICLR (2021)"},{"key":"18_CR5","unstructured":"Bie, F., et\u00a0al.: Renaissance: A survey into ai text-to-image generation in the era of large model. arXiv preprint arXiv:2309.00810 (2023)"},{"key":"18_CR6","unstructured":"Brock, A., Donahue, J., Simonyan, K.: Large scale gan training for high fidelity natural image synthesis. In: ICLR (2018)"},{"key":"18_CR7","unstructured":"Burg, M.F., et al.: A data augmentation perspective on diffusion models and retrieval. arXiv preprint arXiv:2304.10253 (2023)"},{"key":"18_CR8","doi-asserted-by":"publisher","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: ECCV, pp. 213\u2013229. Springer (2022). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"18_CR9","unstructured":"Carlini, N., et al.: Extracting training data from diffusion models. In: USENIX Security, pp. 5253\u20135270. USENIX Association (2023)"},{"key":"18_CR10","doi-asserted-by":"publisher","unstructured":"Caron, M., Bojanowski, P., Joulin, A., Douze, M.: Deep clustering for unsupervised learning of visual features. In: ECCV, pp. 132\u2013149. Springer (2018). https:\/\/doi.org\/10.1007\/978-3-030-01264-9_9","DOI":"10.1007\/978-3-030-01264-9_9"},{"key":"18_CR11","unstructured":"Caron, M., Misra, I., Mairal, J., Goyal, P., Bojanowski, P., Joulin, A.: Unsupervised learning of visual features by contrasting cluster assignments. In: NeurIPS, pp. 9912\u20139924. MIT Press (2020)"},{"key":"18_CR12","doi-asserted-by":"crossref","unstructured":"Caron, M., Touvron, H., Misra, I., J\u00e9gou, H., Mairal, J., Bojanowski, P., Joulin, A.: Emerging properties in self-supervised vision transformers. In: ICCV, pp. 9650\u20139660. IEEE (2021)","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"18_CR13","doi-asserted-by":"crossref","unstructured":"Changpinyo, S., Sharma, P., Ding, N., Soricut, R.: Conceptual 12m: pushing web-scale image-text pre-training to recognize long-tail visual concepts. In: CVPR, pp. 3558\u20133568. IEEE (2021)","DOI":"10.1109\/CVPR46437.2021.00356"},{"issue":"1","key":"18_CR14","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1007\/s44267-023-00029-4","volume":"1","author":"J Chen","year":"2023","unstructured":"Chen, J., Gao, C., Sun, L., Sang, N.: Ccsd: cross-camera self-distillation for unsupervised person re-identification. Visual Intell. 1(1), 27 (2023)","journal-title":"Visual Intell."},{"key":"18_CR15","unstructured":"Chen, T., Kornblith, S., Norouzi, M., Hinton, G.: A simple framework for contrastive learning of visual representations. In: ICML, pp. 1597\u20131607. PMLR (2020)"},{"key":"18_CR16","unstructured":"Chen, X., Fan, H., Girshick, R., He, K.: Improved baselines with momentum contrastive learning. In: arXiv preprint arXiv:2003.04297 (2020)"},{"key":"18_CR17","doi-asserted-by":"crossref","unstructured":"Chen, X., He, K.: Exploring simple siamese representation learning. In: CVPR, pp. 15750\u201315758. IEEE (2021)","DOI":"10.1109\/CVPR46437.2021.01549"},{"key":"18_CR18","doi-asserted-by":"crossref","unstructured":"Chen, X., Xie, S., He, K.: An empirical study of training self-supervised vision transformers. In: ICCV, pp. 9640\u20139649. IEEE (2021)","DOI":"10.1109\/ICCV48922.2021.00950"},{"key":"18_CR19","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: Imagenet: a large-scale hierarchical image database. In: CVPR, pp. 248\u2013255. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"18_CR20","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: transformers for image recognition at scale. In: ICLR (2020)"},{"key":"18_CR21","unstructured":"Dunlap, L., Umino, A., Zhang, H., Yang, J., Gonzalez, J.E., Darrell, T.: Diversify your vision datasets with automatic diffusion-based augmentation. In: NeurIPS, pp. 79024\u201379034. MIT Press (2023)"},{"key":"18_CR22","doi-asserted-by":"crossref","unstructured":"Dwibedi, D., Aytar, Y., Tompson, J., Sermanet, P., Zisserman, A.: With a little help from my friends: nearest-neighbor contrastive learning of visual representations. In: ICCV, pp. 9588\u20139597. IEEE (2021)","DOI":"10.1109\/ICCV48922.2021.00945"},{"key":"18_CR23","unstructured":"Ermolov, A., Siarohin, A., Sangineto, E., Sebe, N.: Whitening for self-supervised representation learning. In: ICML, pp. 3015\u20133024. PMLR (2021)"},{"key":"18_CR24","doi-asserted-by":"crossref","unstructured":"Feng, C.M., Yu, K., Liu, Y., Khan, S., Zuo, W.: Diverse data augmentation with diffusions for effective test-time prompt tuning. In: ICCV, pp. 2704\u20132714. IEEE (2023)","DOI":"10.1109\/ICCV51070.2023.00255"},{"key":"18_CR25","unstructured":"Garrido, Q., Assran, M., Ballas, N., Bardes, A., Najman, L., LeCun, Y.: Learning and leveraging world models in visual representation learning. arXiv preprint arXiv:2403.00504 (2024)"},{"key":"18_CR26","unstructured":"Gidaris, S., Singh, P., Komodakis, N.: Unsupervised representation learning by predicting image rotations. In: ICLR (2018)"},{"key":"18_CR27","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., Malik, J.: Rich feature hierarchies for accurate object detection and semantic segmentation. In: CVPR, pp. 580\u2013587. IEEE (2014)","DOI":"10.1109\/CVPR.2014.81"},{"key":"18_CR28","unstructured":"Goodfellow, I., et al.: Generative adversarial nets. In: NeurIPS, pp. 2672\u20132680. MIT Press (2014)"},{"key":"18_CR29","unstructured":"Grill, J.B., M., et\u00a0al.: Bootstrap your own latent: a new approach to self-supervised learning. In: NeurIPS, pp. 21271\u201321284. MIT Press (2020)"},{"key":"18_CR30","unstructured":"Han, L., et\u00a0al.: Constructive assimilation: Boosting contrastive learning performance through view generation strategies. arXiv preprint arXiv:2304.00601 (2023)"},{"key":"18_CR31","doi-asserted-by":"crossref","unstructured":"He, K., Chen, X., Xie, S., Li, Y., Doll\u00e1r, P., Girshick, R.: Masked autoencoders are scalable vision learners. In: CVPR, pp. 16000\u201316009. IEEE (2022)","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"18_CR32","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S., Girshick, R.: Momentum contrast for unsupervised visual representation learning. In: CVPR, pp. 9729\u20139738. IEEE (2020)","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"18_CR33","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask r-cnn. In: ICCV, pp. 2961\u20132969. IEEE (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"18_CR34","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR, pp. 770\u2013778. IEEE (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"18_CR35","unstructured":"He, R., et al.: Is synthetic data from generative models ready for image recognition? In: ICLR (2022)"},{"key":"18_CR36","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: NeurIPS, pp. 6840\u20136851. MIT Press (2020)"},{"key":"18_CR37","unstructured":"Ho, J., Salimans, T.: Classifier-free diffusion guidance. In: NeurIPS. MIT Press (2022)"},{"key":"18_CR38","doi-asserted-by":"crossref","unstructured":"Huang, L., You, S., Zheng, M., Wang, F., Qian, C., Yamasaki, T.: Learning where to learn in cross-view self-supervised learning. In: CVPR, pp. 14451\u201314460. IEEE (2022)","DOI":"10.1109\/CVPR52688.2022.01405"},{"issue":"4","key":"18_CR39","doi-asserted-by":"publisher","first-page":"2506","DOI":"10.1109\/TPAMI.2023.3336525","volume":"46","author":"Z Huang","year":"2024","unstructured":"Huang, Z., et al.: Contrastive masked autoencoders are stronger vision learners. TPAMI 46(4), 2506\u20132517 (2024)","journal-title":"TPAMI"},{"key":"18_CR40","unstructured":"Jahanian, A., Puig, X., Tian, Y., Isola, P.: Generative models as a data source for multiview representation learning. In: ICLR (2021)"},{"key":"18_CR41","unstructured":"Karras, T., Aittala, M., Hellsten, J., Laine, S., Lehtinen, J., Aila, T.: Training generative adversarial networks with limited data. In: NeurIPS, pp. 12104\u201312114. MIT Press (2020)"},{"key":"18_CR42","doi-asserted-by":"crossref","unstructured":"Kim, T., Das, D., Choi, S., Jeong, M., Yang, S., Yun, S., Kim, C.: Neural transformation network to generate diverse views for contrastive learning. In: CVPR, pp. 4901\u20134911. IEEE (2023)","DOI":"10.1109\/CVPRW59228.2023.00518"},{"key":"18_CR43","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational Bayes. In: ICLR (2014)"},{"key":"18_CR44","unstructured":"Krizhevsky, A., et\u00a0al.: Learning multiple layers of features from tiny images (2009)"},{"key":"18_CR45","unstructured":"Le, Y., Yang, X.: Tiny imagenet visual recognition challenge. In: CS 231N (2015)"},{"key":"18_CR46","unstructured":"Li, J., Zhou, P., Xiong, C., Socher, R., Hoi, S.C.: Prototypical contrastive learning of unsupervised representations. In: ICLR. PMLR (2020)"},{"key":"18_CR47","unstructured":"Li, X., et al.: Transformer-based visual segmentation: A survey. arXiv preprint arXiv:2304.2023 (2023)"},{"key":"18_CR48","doi-asserted-by":"crossref","unstructured":"Li, X., et al.: Omg-seg: Is one model good enough for all segmentation? In: CVPR, pp. 27948\u201327959. IEEE (2024)","DOI":"10.1109\/CVPR52733.2024.02640"},{"key":"18_CR49","doi-asserted-by":"crossref","unstructured":"Li, X., He, S., Wu, J., Yu, Y., Nie, L., Zhang, M.: Mask again: Masked knowledge distillation for masked video modeling. In: ACM MM, pp. 2221\u20132232. ACM (2023)","DOI":"10.1145\/3581783.3612129"},{"key":"18_CR50","doi-asserted-by":"publisher","unstructured":"Li, X., Wu, J., Fang, H., Liao, Y., Wang, F., Qian, C.: Local correlation consistency for knowledge distillation. In: ECCV, pp. 18\u201333. Springer (2020). https:\/\/doi.org\/10.1007\/978-3-030-58610-2_2","DOI":"10.1007\/978-3-030-58610-2_2"},{"key":"18_CR51","doi-asserted-by":"crossref","unstructured":"Li, X., et al.: Fine-grained key-value memory enhanced predictor for video representation learning. In: ACM MM, pp. 2264\u20132274. ACM (2023)","DOI":"10.1145\/3581783.3612131"},{"key":"18_CR52","doi-asserted-by":"publisher","unstructured":"Li, X., Yang, L., Song, Q., Zhou, F.: Detector-in-detector: Multi-level analysis for human-parts. In: ACCV, pp. 228\u2013240. Springer (2019). https:\/\/doi.org\/10.1007\/978-3-030-20890-5_15","DOI":"10.1007\/978-3-030-20890-5_15"},{"key":"18_CR53","doi-asserted-by":"publisher","unstructured":"Li, Z., Geng, Z., Kang, Z., Chen, W., Yang, Y.: Eliminating gradient conflict in reference-based line-art colorization. In: ECCV, pp. 579\u2013596. Springer (2022). https:\/\/doi.org\/10.1007\/978-3-031-19790-1_35","DOI":"10.1007\/978-3-031-19790-1_35"},{"key":"18_CR54","unstructured":"Li, Z., Li, Y., Zhao, P., Song, R., Li, X., Yang, J.: Is synthetic data from diffusion models ready for knowledge distillation? arXiv preprint arXiv:2305.12954 (2023)"},{"key":"18_CR55","doi-asserted-by":"crossref","unstructured":"Li, Z., Zhou, Q., Zhang, X., Zhang, Y., Wang, Y., Xie, W.: Open-vocabulary object segmentation with diffusion models. In: ICCV, pp. 7667\u20137676. IEEE (2023)","DOI":"10.1109\/ICCV51070.2023.00705"},{"key":"18_CR56","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., Belongie, S.: Feature pyramid networks for object detection. In: CVPR, pp. 2117\u20132125. IEEE (2017)","DOI":"10.1109\/CVPR.2017.106"},{"key":"18_CR57","doi-asserted-by":"publisher","unstructured":"Lin, T.Y., et al.: Microsoft coco: common objects in context. In: ECCV, pp. 740\u2013755. Springer (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"18_CR58","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: CVPR, pp. 3431\u20133440. IEEE (2015)","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"18_CR59","unstructured":"Loshchilov, I., Hutter, F.: SGDR: Stochastic gradient descent with warm restarts. In: ICLR (2017)"},{"key":"18_CR60","unstructured":"Luo, R., Wang, Y., Wang, Y.: Rethinking the effect of data augmentation in adversarial contrastive cearning. In: ICLR (2023)"},{"key":"18_CR61","unstructured":"Nichol, A.Q., et al.: Glide: Towards photorealistic image generation and editing with text-guided diffusion models. In: ICML, pp. 16784\u201316804. PMLR (2022)"},{"key":"18_CR62","doi-asserted-by":"publisher","unstructured":"Noroozi, M., Favaro, P.: Unsupervised learning of visual representations by solving jigsaw puzzles. In: ECCV, pp. 69\u201384. Springer (2016). https:\/\/doi.org\/10.1007\/978-3-319-46466-4_5","DOI":"10.1007\/978-3-319-46466-4_5"},{"key":"18_CR63","unstructured":"Oord, A.v.d., Li, Y., Vinyals, O.: Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748 (2018)"},{"key":"18_CR64","doi-asserted-by":"crossref","unstructured":"Pathak, D., Krahenbuhl, P., Donahue, J., Darrell, T., Efros, A.A.: Context encoders: Feature learning by inpainting. In: CVPR, pp. 2536\u20132544. IEEE (2016)","DOI":"10.1109\/CVPR.2016.278"},{"key":"18_CR65","doi-asserted-by":"crossref","unstructured":"Peng, X., Wang, K., Zhu, Z., Wang, M., You, Y.: Crafting better contrastive views for siamese representation learning. In: CVPR, pp. 16031\u201316040. IEEE (2022)","DOI":"10.1109\/CVPR52688.2022.01556"},{"issue":"4","key":"18_CR66","doi-asserted-by":"publisher","first-page":"2045","DOI":"10.1109\/TPAMI.2020.3029801","volume":"44","author":"GJ Qi","year":"2020","unstructured":"Qi, G.J., Zhang, L., Lin, F., Wang, X.: Learning generalized transformation equivariant representations via autoencoding transformations. TPAMI 44(4), 2045\u20132057 (2020)","journal-title":"TPAMI"},{"key":"18_CR67","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: ICML, pp. 8748\u20138763. PMLR (2021)"},{"key":"18_CR68","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 (2022)"},{"key":"18_CR69","unstructured":"Ramesh, A., Pavlov, M., Goh, G., Gray, S., Voss, C., Radford, A., Chen, M., Sutskever, I.: Zero-shot text-to-image generation. In: ICML, pp. 8821\u20138831. PMLR (2021)"},{"key":"18_CR70","unstructured":"Razavi, A., Van\u00a0den Oord, A., Vinyals, O.: Generating diverse high-fidelity images with vq-vae-2. In: NeurIPS, pp. 14866\u201314876. MIT Press (2019)"},{"key":"18_CR71","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster r-CNN: towards real-time object detection with region proposal networks. In: NeurIPS, pp. 91\u201399. MIT Press (2015)"},{"key":"18_CR72","unstructured":"Ridnik, T., Ben-Baruch, E., Noy, A., Zelnik-Manor, L.: Imagenet-21k pretraining for the masses. arXiv preprint arXiv:2104.10972 (2021)"},{"key":"18_CR73","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: CVPR, pp. 10684\u201310695. IEEE (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"18_CR74","unstructured":"Saharia, C., et\u00a0al.: Photorealistic text-to-image diffusion models with deep language understanding. In: NeurIPS, pp. 36479\u201336494. MIT Press (2022)"},{"key":"18_CR75","doi-asserted-by":"crossref","unstructured":"Sariyildiz, M.B., Alahari, K., Larlus, D., Kalantidis, Y.: Fake it till you make it: learning transferable representations from synthetic imagenet clones. In: CVPR, pp. 8011\u20138021. IEEE (2023)","DOI":"10.1109\/CVPR52729.2023.00774"},{"key":"18_CR76","unstructured":"Schuhmann, C., et\u00a0al.: Laion-5b: an open large-scale dataset for training next generation image-text models. In: NeurIPS, pp. 25278\u201325294. MIT Press (2022)"},{"key":"18_CR77","unstructured":"Schuhmann, C., et al.: Laion-400m: open dataset of clip-filtered 400 million image-text pairs. In: NeurIPS. MIT Press (2021)"},{"key":"18_CR78","doi-asserted-by":"crossref","unstructured":"Selvaraju, R.R., Desai, K., Johnson, J., Naik, N.: Casting your model: learning to localize improves self-supervised representations. In: CVPR, pp. 11058\u201311067. IEEE (2021)","DOI":"10.1109\/CVPR46437.2021.01091"},{"key":"18_CR79","doi-asserted-by":"crossref","unstructured":"Shipard, J., Wiliem, A., Thanh, K.N., Xiang, W., Fookes, C.: Diversity is definitely needed: improving model-agnostic zero-shot classification via stable diffusion. In: CVPR, pp. 769\u2013778. IEEE (2023)","DOI":"10.1109\/CVPRW59228.2023.00084"},{"key":"18_CR80","unstructured":"Tamkin, A., Wu, M., Goodman, N.: Viewmaker networks: learning views for unsupervised representation learning. In: ICLR (2020)"},{"key":"18_CR81","unstructured":"Tian, Y., Fan, L., Isola, P., Chang, H., Krishnan, D.: Stablerep: synthetic images from text-to-image models make strong visual representation learners. In: NeurIPS, pp. 48382\u201348402. MIT Press (2023)"},{"key":"18_CR82","doi-asserted-by":"publisher","unstructured":"Tian, Y., Krishnan, D., Isola, P.: Contrastive multiview coding. In: ECCV, pp. 776\u2013794. Springer (2020). https:\/\/doi.org\/10.1007\/978-3-030-58621-8_45","DOI":"10.1007\/978-3-030-58621-8_45"},{"key":"18_CR83","unstructured":"Tian, Y., Sun, C., Poole, B., Krishnan, D., Schmid, C., Isola, P.: What makes for good views for contrastive learning? In: NeurIPS, pp. 6827\u20136839. MIT Press (2020)"},{"key":"18_CR84","unstructured":"Trabucco, B., Doherty, K., Gurinas, M., Salakhutdinov, R.: Effective data augmentation with diffusion models. In: ICLR (2023)"},{"key":"18_CR85","doi-asserted-by":"crossref","unstructured":"Vincent, P., Larochelle, H., Bengio, Y., Manzagol, P.A.: Extracting and composing robust features with denoising autoencoders. In: ICML, pp. 1096\u20131103. PMLR (2008)","DOI":"10.1145\/1390156.1390294"},{"key":"18_CR86","doi-asserted-by":"publisher","unstructured":"Wang, L., et al.: Head: Hetero-assists distillation for heterogeneous object detectors. In: ECCV, pp. 314\u2013331. Springer (2022). https:\/\/doi.org\/10.1007\/978-3-031-20077-9_19","DOI":"10.1007\/978-3-031-20077-9_19"},{"key":"18_CR87","doi-asserted-by":"crossref","unstructured":"Wang, R., Yang, Y., Tao, D.: Art-point: Improving rotation robustness of point cloud classifiers via adversarial rotation. In: CVPR, pp. 14371\u201314380. IEEE (2022)","DOI":"10.1109\/CVPR52688.2022.01397"},{"key":"18_CR88","doi-asserted-by":"crossref","unstructured":"Wang, X., Zhang, R., Shen, C., Kong, T., Li, L.: Dense contrastive learning for self-supervised visual pre-training. In: CVPR, pp. 3024\u20133033. IEEE (2021)","DOI":"10.1109\/CVPR46437.2021.00304"},{"key":"18_CR89","unstructured":"Wu, J., et al.: Deep comprehensive correlation mining for image clustering. In: CVPR, pp. 8150\u20138159. IEEE (2019)"},{"key":"18_CR90","doi-asserted-by":"crossref","unstructured":"Wu, J., et\u00a0al.: Towards language-driven video inpainting via multimodal large language models. In: CVPR, pp. 12501\u201312511 (2024)","DOI":"10.1109\/CVPR52733.2024.01188"},{"issue":"7","key":"18_CR91","doi-asserted-by":"publisher","first-page":"5092","DOI":"10.1109\/TPAMI.2024.3361862","volume":"46","author":"J Wu","year":"2024","unstructured":"Wu, J., et al.: Towards open vocabulary learning: a survey. TPAMI 46(7), 5092\u20135113 (2024)","journal-title":"TPAMI"},{"key":"18_CR92","unstructured":"Wu, Y., Kirillov, A., Massa, F., Lo, W.Y., Girshick, R.: Detectron2 (2019)"},{"key":"18_CR93","doi-asserted-by":"crossref","unstructured":"Wu, Z., Xiong, Y., Yu, S.X., Lin, D.: Unsupervised feature learning via non-parametric instance discrimination. In: CVPR, pp. 3733\u20133742. IEEE (2018)","DOI":"10.1109\/CVPR.2018.00393"},{"key":"18_CR94","doi-asserted-by":"crossref","unstructured":"Xiao, T., Reed, C.J., Wang, X., Keutzer, K., Darrell, T.: Region similarity representation learning. In: ICCV, pp. 10539\u201310548. IEEE (2021)","DOI":"10.1109\/ICCV48922.2021.01037"},{"key":"18_CR95","doi-asserted-by":"crossref","unstructured":"Xie, J., Li, W., Li, X., Liu, Z., Ong, Y.S., Loy, C.C.: Mosaicfusion: Diffusion models as data augmenters for large vocabulary instance segmentation. arXiv preprint arXiv:2309.13042 (2023)","DOI":"10.1007\/s11263-024-02223-3"},{"issue":"1","key":"18_CR96","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1007\/s44267-024-00043-0","volume":"2","author":"X Xie","year":"2024","unstructured":"Xie, X., Wu, J., Liu, G., Lin, Z.: Sscnet: learning-based subspace clustering. Visual Intell. 2(1), 11 (2024)","journal-title":"Visual Intell."},{"key":"18_CR97","unstructured":"Yang, Y., Wang, H., Yuan, H., Lin, Z.: Towards theoretically inspired neural initialization optimization. In: NeurIPS, pp. 18983\u201318995. MIT Press (2022)"},{"key":"18_CR98","unstructured":"Yang, Y., Cheung, W.Y., Liu, C., Ji, X.: Local manifold augmentation for multiview semantic consistency. arXiv preprint arXiv:2211.02798 (2022)"},{"key":"18_CR99","unstructured":"Ye-Bin, M., et al.: Exploiting synthetic data for data imbalance problems: baselines from a data perspective. arXiv preprint arXiv:2308.00994 (2023)"},{"key":"18_CR100","unstructured":"Zang, Z., et\u00a0al.: Boosting unsupervised contrastive learning using diffusion-based data augmentation from scratch. arXiv preprint arXiv:2309.07909 (2023)"},{"key":"18_CR101","unstructured":"Zbontar, J., Jing, L., Misra, I., LeCun, Y., Deny, S.: Barlow twins: Self-supervised learning via redundancy reduction. In: ICML, pp. 12310\u201312320. PMLR (2021)"},{"key":"18_CR102","unstructured":"Zhang, D.J., et al.: Free-atm: Exploring unsupervised learning on diffusion-generated images with free attention masks. arXiv preprint arXiv:2308.06739 (2023)"},{"key":"18_CR103","doi-asserted-by":"crossref","unstructured":"Zhang, L., Zhang, Y., Long, D., Xie, P., Zhang, M., Zhang, M.: A two-stage adaptation of large language models for text ranking. arXiv preprint arXiv:2311.16720 (2024)","DOI":"10.18653\/v1\/2024.findings-acl.706"},{"key":"18_CR104","unstructured":"Zhang, Y., Zhou, D., Hooi, B., Wang, K., Feng, J.: Expanding small-scale datasets with guided imagination. In: NeurIPS, pp. 76558\u201376618. MIT Press (2023)"},{"key":"18_CR105","unstructured":"Zheng, M., et al.: Ressl: Relational self-supervised learning with weak augmentation. In: NeurIPS, pp. 2543\u20132555. MIT Press (2021)"},{"key":"18_CR106","unstructured":"Zhou, Y., Sahak, H., Ba, J.: Training on thin air: Improve image classification with generated data. arXiv preprint arXiv:2305.15316 (2023)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73113-6_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,20]],"date-time":"2024-11-20T10:10:53Z","timestamp":1732097453000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73113-6_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,21]]},"ISBN":["9783031731129","9783031731136"],"references-count":106,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73113-6_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,21]]},"assertion":[{"value":"21 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}