{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,16]],"date-time":"2026-02-16T09:27:27Z","timestamp":1771234047292,"version":"3.50.1"},"reference-count":66,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2024,10,19]],"date-time":"2024-10-19T00:00:00Z","timestamp":1729296000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,19]],"date-time":"2024-10-19T00:00:00Z","timestamp":1729296000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2025,4]]},"DOI":"10.1007\/s11263-024-02263-9","type":"journal-article","created":{"date-parts":[[2024,10,19]],"date-time":"2024-10-19T20:30:43Z","timestamp":1729369843000},"page":"1727-1754","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["On the Generalization and Causal Explanation in Self-Supervised Learning"],"prefix":"10.1007","volume":"133","author":[{"given":"Wenwen","family":"Qiang","sequence":"first","affiliation":[]},{"given":"Zeen","family":"Song","sequence":"additional","affiliation":[]},{"given":"Ziyin","family":"Gu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3376-1522","authenticated-orcid":false,"given":"Jiangmeng","family":"Li","sequence":"additional","affiliation":[]},{"given":"Changwen","family":"Zheng","sequence":"additional","affiliation":[]},{"given":"Fuchun","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Hui","family":"Xiong","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,19]]},"reference":[{"key":"2263_CR1","unstructured":"Arora, S., Khandeparkar, H., Khodak, M., Plevrakis, O., & Saunshi, N. (2019). A theoretical analysis of contrastive unsupervised representation learning., arXiv preprint arXiv:1902.09229."},{"key":"2263_CR2","unstructured":"Ash, J. T., Goel, S., Krishnamurthy, A., & Misra, D. (2021). Investigating the role of negatives in contrastive representation learning. arXiv preprint arXiv:2106.09943."},{"key":"2263_CR3","doi-asserted-by":"crossref","unstructured":"Assran, M., Duval, Q., Misra, I., Bojanowski, P., Vincent, P., Rabbat, M., LeCun, Y., & Ballas, N. (2023). Self-supervised learning from images with a joint-embedding predictive architecture. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 15619\u201315629).","DOI":"10.1109\/CVPR52729.2023.01499"},{"key":"2263_CR4","unstructured":"Awasthi, P., Dikkala, N., & Kamath, P. (2022). Do more negative samples necessarily hurt in contrastive learning? arXiv preprint arXiv:2205.01789."},{"key":"2263_CR5","unstructured":"Bai, J., Liu, Z., Wang, H., Hao, J., Feng, Y., Chu, H., & Haoji, H. (2023). On the effectiveness of out-of-distribution data in self-supervised long-tail learning., arXiv preprint arXiv:2306.04934."},{"key":"2263_CR6","unstructured":"Bardes, A., Ponce, J., & LeCun, Y. (2022). Vicregl: Self-supervised learning of local visual features. arXiv preprint arXiv:2210.01571."},{"key":"2263_CR7","unstructured":"Caron, M., Misra, I., Mairal, J., Goyal, P., Bojanowski, P., & Joulin, A. (2020). Unsupervised learning of visual features by contrasting cluster assignments. CoRR, abs\/2006.09882."},{"issue":"1","key":"2263_CR8","first-page":"4907","volume":"23","author":"KHR Chan","year":"2022","unstructured":"Chan, K. H. R., Yaodong, Yu., You, C., Qi, H., Wright, J., & Ma, Y. (2022). Redunet: A white-box deep network from the principle of maximizing rate reduction. The Journal of Machine Learning Research, 23(1), 4907\u20135009.","journal-title":"The Journal of Machine Learning Research"},{"key":"2263_CR9","doi-asserted-by":"crossref","unstructured":"Chen, X., & He, K. (2021). Exploring simple siamese representation learning. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (pp. 15750\u201315758).","DOI":"10.1109\/CVPR46437.2021.01549"},{"key":"2263_CR10","unstructured":"Chen, X., Fan, H., Girshick, R., & He, K. (2020). Improved baselines with momentum contrastive learning., arXiv preprint arXiv:2003.04297."},{"key":"2263_CR11","unstructured":"Chen, T., Kornblith, S., Norouzi, M., & Hinton, G. (2020). A simple framework for contrastive learning of visual representations. In International conference on machine learning (pp. 1597\u20131607) PMLR."},{"key":"2263_CR12","unstructured":"Chen, S., Niu, G., Gong, C., Li, J., Yang, J., & Sugiyama, M. (2021). Large-margin contrastive learning with distance polarization regularizer. In International conference on machine learning (pp. 1673\u20131683). PMLR."},{"key":"2263_CR13","doi-asserted-by":"crossref","unstructured":"Chen, X., Xie, S., & He, K. (2021). An empirical study of training self-supervised vision transformers. In Proceedings of the IEEE\/CVF international conference on computer vision (pp. 9640\u20139649).","DOI":"10.1109\/ICCV48922.2021.00950"},{"key":"2263_CR14","unstructured":"Coates, A., Ng, A., & Lee, H. (2011). An analysis of single-layer networks in unsupervised feature learning. In Proceedings of the fourteenth international conference on artificial intelligence and statistics (pp. 215\u2013223). JMLR Workshop and Conference Proceedings."},{"key":"2263_CR15","doi-asserted-by":"crossref","unstructured":"Cui, Y., Jia, M., Lin, T.-Y., Song, Y., & Belongie, S. (2019). Class-balanced loss based on effective number of samples. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 9268\u20139277).","DOI":"10.1109\/CVPR.2019.00949"},{"key":"2263_CR16","unstructured":"Darmois, G. (1951). Analyse des liaisons de probabilit\u00e9. In Proceedings of International Statics Conferences 1947 (pp. 231)."},{"key":"2263_CR17","unstructured":"Ermolov, A., Siarohin, A., Sangineto, E., & Sebe, N. (2021). Whitening for self-supervised representation learning. In International Conference on Machine Learning (pp. 3015\u20133024). PMLR."},{"issue":"2","key":"2263_CR18","doi-asserted-by":"crossref","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C. K. I., Winn, J., & Zisserman, A. (2010). The pascal visual object classes (voc) challenge. International Journal of Computer Vision, 88(2), 303\u2013338.","journal-title":"International Journal of Computer Vision"},{"key":"2263_CR19","unstructured":"G\u00e1lvez, B.\u00a0R., Blaas, A., Rodr\u00edguez, P., Golinski, A., Suau, X., Ramapuram, J., Busbridge, D., & Zappella, L. (2023). The role of entropy and reconstruction in multi-view self-supervised learning. In International Conference on Machine Learning (pp. 29143\u201329160). PMLR."},{"key":"2263_CR20","unstructured":"Garrido, Q., Chen, Y., Bardes, A., Najman, L., & LeCun, Y. (2022). On the duality between contrastive and non-contrastive self-supervised learning. In The Eleventh International Conference on Learning Representations."},{"key":"2263_CR21","unstructured":"Grill, J.-B., Strub, F., Altch\u00e9, F., Tallec, C., Richemond, P. H., chatskaya, E., Doersch, C., Pires, B.\u00a0A., Guo, Z.\u00a0D., & Azar, Mohammad,\u00a0G. et\u00a0al. (2020). Bootstrap your own latent: A new approach to self-supervised learning. arXiv preprint arXiv:2006.07733,."},{"key":"2263_CR22","first-page":"21271","volume":"33","author":"J-B Grill","year":"2020","unstructured":"Grill, J.-B., Strub, F., Altch\u00e9, F., Tallec, C., Richemond, P., Buchatskaya, E., Doersch, C., Pires, B. A., Guo, Z., Azar, M. G., et al. (2020). Bootstrap your own latent-a new approach to self-supervised learning. Advances in Neural Information Processing Systems, 33, 21271\u201321284.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2263_CR23","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Yuxin, W., Xie, S., & Girshick, R. (2020). Momentum contrast for unsupervised visual representation learning. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 9729\u20139738).","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"2263_CR24","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., & Girshick, R. (2017). Mask r-cnn. In Proceedings of the IEEE international conference on computer vision (pp. 2961\u20132969).","DOI":"10.1109\/ICCV.2017.322"},{"key":"2263_CR25","doi-asserted-by":"crossref","unstructured":"Hendrycks, D., Basart, S., Norman, M., Kadavath, S., Wang, F., Dorundo, E., Desai, R., Zhu, T., Parajuli, S., Guo, M., et al. (2021). The many faces of robustness: A critical analysis of out-of-distribution generalization. In Proceedings of the IEEE\/CVF international conference on computer vision (pp. 8340\u20138349).","DOI":"10.1109\/ICCV48922.2021.00823"},{"key":"2263_CR26","unstructured":"Hendrycks, D., Mazeika, M., & Dietterich, T. (2018). Deep anomaly detection with outlier exposure., arXiv preprint arXiv:1812.04606."},{"key":"2263_CR27","unstructured":"Hjelm, R. D., Fedorov, A., Lavoie-Marchildon, S., Grewal, K., Bachman, P., Trischler, A., & Bengio, Y. (2018). Learning deep representations by mutual information estimation and maximization. arXiv preprint arXiv:1808.06670."},{"key":"2263_CR28","unstructured":"Hu, T., Zhili, L., Zhou, F., Wang, W., & Huang, W. (2022). Your contrastive learning is secretly doing stochastic neighbor embedding. In The Eleventh international conference on learning representations."},{"key":"2263_CR29","unstructured":"Hyvarinen, A. J., & Morioka, H. (2017). Nonlinear ica of temporally dependent stationary sources. In 20th international conference on artificial intelligence and statistics."},{"issue":"3","key":"2263_CR30","doi-asserted-by":"crossref","first-page":"429","DOI":"10.1016\/S0893-6080(98)00140-3","volume":"12","author":"A Hyv\u00e4rinen","year":"1999","unstructured":"Hyv\u00e4rinen, A., & Pajunen, P. (1999). Nonlinear independent component analysis: Existence and uniqueness results. Neural Networks, 12(3), 429\u2013439.","journal-title":"Neural Networks"},{"issue":"1","key":"2263_CR31","doi-asserted-by":"crossref","first-page":"2","DOI":"10.3390\/technologies9010002","volume":"9","author":"A Jaiswal","year":"2020","unstructured":"Jaiswal, A., Babu, A. R., Zadeh, M. Z., Banerjee, D., & Makedon, F. (2020). A survey on contrastive self-supervised learning. Technologies, 9(1), 2.","journal-title":"Technologies"},{"key":"2263_CR32","unstructured":"Jiang, Z., Chen, T., Mortazavi, B.\u00a0J., Wang, Z. (2021). Self-damaging contrastive learning. In International conference on machine learning (pp. 4927\u20134939). PMLR."},{"key":"2263_CR33","unstructured":"Krizhevsky, A., Hinton, G., et al. (2009). Learning multiple layers of features from tiny images."},{"key":"2263_CR34","first-page":"1097","volume":"25","author":"A Krizhevsky","year":"2012","unstructured":"Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). Imagenet classification with deep convolutional neural networks. Advances in Neural Information Processing Systems, 25, 1097\u20131105.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2263_CR35","unstructured":"Le, Y. & Yang, X. (2015) Tiny imagenet visual recognition challenge. CS 231N 7(7):3"},{"key":"2263_CR36","unstructured":"Li, Y., Pogodin, R., Sutherland, D.\u00a0J., & Gretton, A. (2021). Self-supervised learning with kernel dependence maximization. arXiv preprint arXiv:2106.08320."},{"key":"2263_CR37","unstructured":"Li, J., Qiang, W., Zheng, C., Bing, S., & Xiong, H. (2022). Metaug: Contrastive learning via meta feature augmentation. International Conference on Machine Learning,."},{"key":"2263_CR38","unstructured":"Li, J., Zhou, P., & Xiong, C. (2020). Prototypical contrastive learning of unsupervised representations., arXiv preprint arXiv:2005.04966."},{"key":"2263_CR39","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., & Zitnick, C.\u00a0L. (2014). Microsoft coco: Common objects in context. In European conference on computer vision (pp. 740\u2013755). Springer.","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"2263_CR40","first-page":"34091","volume":"35","author":"X Liu","year":"2022","unstructured":"Liu, X., Wang, Z., Li, Y.-L., & Wang, S. (2022). Self-supervised learning via maximum entropy coding. Advances in Neural Information Processing Systems, 35, 34091\u201334105.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2263_CR41","unstructured":"Novak, R., Bahri, Y., Abolafia, D. A., Pennington, J., & Sohl-Dickstein, J. (2018). Sensitivity and generalization in neural networks: an empirical study. International Conference on Learning Representations,"},{"key":"2263_CR42","first-page":"5784","volume":"34","author":"K Nozawa","year":"2021","unstructured":"Nozawa, K., & Sato, I. (2021). Understanding negative samples in instance discriminative self-supervised representation learning. Advances in Neural Information Processing Systems, 34, 5784\u20135797.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2263_CR43","first-page":"35240","volume":"35","author":"S Ozsoy","year":"2022","unstructured":"Ozsoy, S., Hamdan, S., Arik, S., Yuret, D., & Erdogan, A. (2022). Self-supervised learning with an information maximization criterion. Advances in Neural Information Processing Systems, 35, 35240\u201335253.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2263_CR44","unstructured":"Qiang, W., Li, J., Zheng, C., Bing, S., & Xiong, H. (2022). Interventional contrastive learning with meta semantic regularizer. In International conference on machine learning (pp. 18018\u201318030). PMLR."},{"key":"2263_CR45","unstructured":"Radford, A., Kim, J.W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J., et al. (2021). Learning transferable visual models from natural language supervision. In International Conference on Machine Learning (pp. 8748\u20138763) PMLR."},{"key":"2263_CR46","unstructured":"Ren, S., He, K., Girshick, R., & Sun, J.. (2015). Faster r-cnn: Towards real-time object detection with region proposal networks. Advances in Neural Information Processing Systems,28."},{"key":"2263_CR47","unstructured":"Rudin, W. (2006). Real and complex analysis. Tata McGraw-hill education."},{"key":"2263_CR48","doi-asserted-by":"crossref","unstructured":"Ruzhansky, M., Cho, Y. J., Agarwal, P., Area, I., et\u00a0al. (2017). Advances in real and complex analysis with applications. Springer.","DOI":"10.1007\/978-981-10-4337-6"},{"key":"2263_CR49","doi-asserted-by":"crossref","first-page":"727","DOI":"10.1016\/j.neucom.2022.06.069","volume":"501","author":"L Si","year":"2022","unstructured":"Si, L., Qiang, W., Li, J., Fanjiang, X., & Sun, F. (2022). Multi-view representation learning from local consistency and global alignment. Neurocomputing, 501, 727\u2013740.","journal-title":"Neurocomputing"},{"key":"2263_CR50","unstructured":"Simon, J.\u00a0B., Knutins, M., Ziyin, L., Geisz, D., Fetterman, A.\u00a0J., & Albrecht, J. (2023). On the stepwise nature of self-supervised learning."},{"key":"2263_CR51","doi-asserted-by":"crossref","unstructured":"Spivak, M. (2018). Calculus on manifolds: A modern approach to classical theorems of advanced calculus. CRC Press,.","DOI":"10.1201\/9780429501906"},{"key":"2263_CR52","unstructured":"Stephenson, C., Padhy, S., Ganesh, A., Hui, Y., Tang, H., & Chung, S. Y. (2021). On the geometry of generalization and memorization in deep neural networks., arXiv preprint arXiv:2105.14602."},{"key":"2263_CR53","doi-asserted-by":"crossref","unstructured":"Tao, C., Zhu, X., Weijie, S., Huang, G., Li, B., Jie Z., Yu, Q., Xiaogang, W., & Dai, J. (2023). Siamese image modeling for self-supervised vision representation learning. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 2132\u20132141).","DOI":"10.1109\/CVPR52729.2023.00212"},{"key":"2263_CR54","doi-asserted-by":"crossref","unstructured":"Tian, Y., Krishnan, D., & Isola, P. (2020). Contrastive multiview coding. In European conference on computer vision (pp. 776\u2013794). Springer.","DOI":"10.1007\/978-3-030-58621-8_45"},{"key":"2263_CR55","doi-asserted-by":"crossref","unstructured":"Tian, Y., Krishnan, D., & Isola, P. (2020). Contrastive Multiview Coding.","DOI":"10.1007\/978-3-030-58621-8_45"},{"key":"2263_CR56","unstructured":"Tomasev, N., Bica, I., cWilliams, L., Buesing, M., Pascanu, R., Blundell, C., & Mitrovic, J. (2022). Pushing the limits of self-supervised resnets: Can we outperform supervised learning without labels on imagenet? arXiv preprint arXiv:2201.05119."},{"key":"2263_CR57","unstructured":"van den Oord, A., Li, Y., & Vinyals, O. (2018). Representation learning with contrastive predictive coding., arXiv preprint arXiv:1807.03748."},{"key":"2263_CR58","first-page":"16451","volume":"34","author":"J Von K\u00fcgelgen","year":"2021","unstructured":"Von K\u00fcgelgen, J., Sharma, Y., Gresele, L., Brendel, W., Sch\u00f6lkopf, B., Besserve, M., & Locatello, F. (2021). Self-supervised learning with data augmentations provably isolates content from style. Advances in Neural Information Processing Systems, 34, 16451\u201316467.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2263_CR59","unstructured":"Wang, T., & Isola, P. (2020). Understanding contrastive representation learning through alignment and uniformity on the hypersphere. In International conference on machine learning (pp. 9929\u20139939). PMLR."},{"key":"2263_CR60","unstructured":"Wu, Y., Kirillov, A., Massa, F., Lo, W.-Y., & Girshick, R. (2019). Detectron2. https:\/\/github.com\/facebookresearch\/detectron2."},{"key":"2263_CR61","unstructured":"Zbontar, J., Jing, L., Misra, I., LeCun, Y., & Deny, S. (2021). Barlow twins: Self-supervised learning via redundancy reduction., arXiv preprint arXiv:2103.03230."},{"issue":"3","key":"2263_CR62","doi-asserted-by":"crossref","first-page":"107","DOI":"10.1145\/3446776","volume":"64","author":"C Zhang","year":"2021","unstructured":"Zhang, C., Bengio, S., Hardt, M., Recht, B., & Vinyals, O. (2021). Understanding deep learning (still) requires rethinking generalization. Communications of the ACM, 64(3), 107\u2013115.","journal-title":"Communications of the ACM"},{"key":"2263_CR63","first-page":"2543","volume":"34","author":"M Zheng","year":"2021","unstructured":"Zheng, M., You, S., Wang, F., Qian, C., Zhang, C., Wang, X., & Chang, X. (2021). Ressl: Relational self-supervised learning with weak augmentation. Advances in Neural Information Processing Systems, 34, 2543\u20132555.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2263_CR64","unstructured":"Zhou, Z., Yao, J., Wang, Y.-F., Han, B.,\u00a0 & Zhang, Y.. (2022). Contrastive learning with boosted memorization. In International Conference on Machine Learning (pp. 27367\u201327377). PMLR."},{"key":"2263_CR65","unstructured":"Zimmermann, R. S., Sharma, Y., Schneider, S., Bethge, M., & Brendel, W. (2021). Contrastive learning inverts the data generating process. In International conference on machine learning (pp. 12979\u201312990). PMLR."},{"key":"2263_CR66","unstructured":"Zimmermann, R. S., Sharma, Y., Schneider, S., Bethge, M., & Brendel, W. (2021). Contrastive learning inverts the data generating process."}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-024-02263-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-024-02263-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-024-02263-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,30]],"date-time":"2025-03-30T22:14:11Z","timestamp":1743372851000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-024-02263-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,19]]},"references-count":66,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,4]]}},"alternative-id":["2263"],"URL":"https:\/\/doi.org\/10.1007\/s11263-024-02263-9","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,19]]},"assertion":[{"value":"11 September 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 September 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 October 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}