{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,19]],"date-time":"2026-01-19T09:25:54Z","timestamp":1768814754901,"version":"3.49.0"},"publisher-location":"Cham","reference-count":46,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031729423","type":"print"},{"value":"9783031729430","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T00:00:00Z","timestamp":1732838400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T00:00:00Z","timestamp":1732838400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72943-0_3","type":"book-chapter","created":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T13:43:16Z","timestamp":1732801396000},"page":"41-58","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Textual Knowledge Matters: Cross-Modality Co-teaching for\u00a0Generalized Visual Class Discovery"],"prefix":"10.1007","author":[{"given":"Haiyang","family":"Zheng","sequence":"first","affiliation":[]},{"given":"Nan","family":"Pu","sequence":"additional","affiliation":[]},{"given":"Wenjing","family":"Li","sequence":"additional","affiliation":[]},{"given":"Nicu","family":"Sebe","sequence":"additional","affiliation":[]},{"given":"Zhun","family":"Zhong","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,29]]},"reference":[{"key":"3_CR1","doi-asserted-by":"publisher","unstructured":"Assran, M., et al.: Masked Siamese networks for label-efficient learning. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) Computer Vision \u2013 ECCV 2022. ECCV 2022. LNCS, vol. 13691, pp. 456\u2013473. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19821-2_26","DOI":"10.1007\/978-3-031-19821-2_26"},{"key":"3_CR2","unstructured":"Berrios, W., Mittal, G., Thrush, T., Kiela, D., Singh, A.: Towards language models that can see: computer vision through the lens of natural language. arXiv preprint arXiv:2306.16410 (2023)"},{"key":"3_CR3","doi-asserted-by":"crossref","unstructured":"Blum, A., Mitchell, T.: Combining labeled and unlabeled data with co-training. In: Proceedings of the Eleventh Annual Conference on Computational Learning Theory, pp. 92\u2013100 (1998)","DOI":"10.1145\/279943.279962"},{"key":"3_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"446","DOI":"10.1007\/978-3-319-10599-4_29","volume-title":"Computer Vision \u2013 ECCV 2014","author":"L Bossard","year":"2014","unstructured":"Bossard, L., Guillaumin, M., Van Gool, L.: Food-101 \u2013 mining discriminative components with random forests. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8694, pp. 446\u2013461. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10599-4_29"},{"key":"3_CR5","unstructured":"Brown, T., et\u00a0al.: Language models are few-shot learners. In: Advances in Neural Information Processing Systems (2020)"},{"key":"3_CR6","doi-asserted-by":"crossref","unstructured":"Caron, M., et al.: Emerging properties in self-supervised vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (2021)","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"3_CR7","doi-asserted-by":"crossref","unstructured":"Cimpoi, M., Maji, S., Kokkinos, I., Mohamed, S., Vedaldi, A.: Describing textures in the wild. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2014)","DOI":"10.1109\/CVPR.2014.461"},{"key":"3_CR8","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: Imagenet: a large-scale hierarchical image database. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"3_CR9","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16 $$\\times $$ 16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"3_CR10","doi-asserted-by":"crossref","unstructured":"Fini, E., Sangineto, E., Lathuili\u00e8re, S., Zhong, Z., Nabi, M., Ricci, E.: A unified objective for novel class discovery. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (2021)","DOI":"10.1109\/ICCV48922.2021.00915"},{"key":"3_CR11","doi-asserted-by":"crossref","unstructured":"Gupta, A., Dollar, P., Girshick, R.: LVIS: a dataset for large vocabulary instance segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2019)","DOI":"10.1109\/CVPR.2019.00550"},{"key":"3_CR12","unstructured":"Han, B., et al.: Co-teaching: robust training of deep neural networks with extremely noisy labels. In: Advances in Neural Information Processing Systems (2018)"},{"key":"3_CR13","doi-asserted-by":"crossref","unstructured":"Han, K., Rebuffi, S.A., Ehrhardt, S., Vedaldi, A., Zisserman, A.: Autonovel: automatically discovering and learning novel visual categories. IEEE Trans. Pattern Anal. Mach. Intell. (2021)","DOI":"10.1109\/TPAMI.2021.3091944"},{"key":"3_CR14","doi-asserted-by":"crossref","unstructured":"Han, K., Vedaldi, A., Zisserman, A.: Learning to discover novel visual categories via deep transfer clustering. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (2019)","DOI":"10.1109\/ICCV.2019.00849"},{"key":"3_CR15","doi-asserted-by":"crossref","unstructured":"Krause, J., Stark, M., Deng, J., Fei-Fei, L.: 3D object representations for fine-grained categorization. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision Workshops (2013)","DOI":"10.1109\/ICCVW.2013.77"},{"key":"3_CR16","doi-asserted-by":"crossref","unstructured":"Krishna, R., et\u00a0al.: Visual genome: connecting language and vision using crowdsourced dense image annotations. Int. J. Comput. Vis. (2017)","DOI":"10.1007\/s11263-016-0981-7"},{"key":"3_CR17","unstructured":"Krizhevsky, A., Hinton, G., et\u00a0al.: Learning multiple layers of features from tiny images. Technical report (2009)"},{"key":"3_CR18","doi-asserted-by":"crossref","unstructured":"Kuznetsova, A., et\u00a0al.: The open images dataset v4: unified image classification, object detection, and visual relationship detection at scale. Int. J. Comput. Vis. (2020)","DOI":"10.1007\/s11263-020-01316-z"},{"key":"3_CR19","unstructured":"Li, F.F., Andreeto, M., Ranzato, M., Perona, P.: Caltech 101 (2022)"},{"key":"3_CR20","doi-asserted-by":"crossref","unstructured":"Li, Y., Fan, H., Hu, R., Feichtenhofer, C., He, K.: Scaling language-image pre-training via masking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2023)","DOI":"10.1109\/CVPR52729.2023.02240"},{"key":"3_CR21","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"3_CR22","unstructured":"MacQueen, J., et\u00a0al.: Some methods for classification and analysis of multivariate observations. In: Proceedings of the Fifth Berkeley Symposium on Mathematical Statistics and Probability (1967)"},{"key":"3_CR23","unstructured":"Menon, S., Vondrick, C.: Visual classification via description from large language models. arXiv preprint arXiv:2210.07183 (2022)"},{"key":"3_CR24","doi-asserted-by":"crossref","unstructured":"Nilsback, M.E., Zisserman, A.: Automated flower classification over a large number of classes. In: 2008 Sixth Indian Conference on Computer Vision, Graphics & Image Processing (2008)","DOI":"10.1109\/ICVGIP.2008.47"},{"key":"3_CR25","unstructured":"Novack, Z., McAuley, J., Lipton, Z.C., Garg, S.: Chils: zero-shot image classification with hierarchical label sets. In: International Conference on Machine Learning. PMLR (2023)"},{"key":"3_CR26","unstructured":"Ouldnoughi, R., Kuo, C.W., Kira, Z.: CLIP-GCD: simple language guided generalized category discovery. arXiv preprint arXiv:2305.10420 (2023)"},{"key":"3_CR27","doi-asserted-by":"crossref","unstructured":"Parkhi, O.M., Vedaldi, A., Zisserman, A., Jawahar, C.: Cats and dogs. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2012)","DOI":"10.1109\/CVPR.2012.6248092"},{"key":"3_CR28","doi-asserted-by":"crossref","unstructured":"Pu, N., Zhong, Z., Sebe, N.: Dynamic conceptional contrastive learning for generalized category discovery. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2023)","DOI":"10.1109\/CVPR52729.2023.00732"},{"key":"3_CR29","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning (2021)"},{"key":"3_CR30","doi-asserted-by":"crossref","unstructured":"Roy, S., Krivosheev, E., Zhong, Z., Sebe, N., Ricci, E.: Curriculum graph co-teaching for multi-target domain adaptation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2021)","DOI":"10.1109\/CVPR46437.2021.00531"},{"key":"3_CR31","doi-asserted-by":"publisher","unstructured":"Roy, S., Liu, M., Zhong, Z., Sebe, N., Ricci, E.: Class-incremental novel class discovery. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) Computer Vision \u2013 ECCV 2022. ECCV 2022. LNCS, vol. 13693, pp. 317\u2013333. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19827-4_19","DOI":"10.1007\/978-3-031-19827-4_19"},{"key":"3_CR32","doi-asserted-by":"crossref","unstructured":"Russakovsky, O., et\u00a0al.: Imagenet large scale visual recognition challenge. Int. J. Comput. Vis. (2015)","DOI":"10.1007\/s11263-015-0816-y"},{"key":"3_CR33","doi-asserted-by":"crossref","unstructured":"Sloutsky, V.M.: From perceptual categories to concepts: what develops? Cognit. Sci. (2010)","DOI":"10.1111\/j.1551-6709.2010.01129.x"},{"key":"3_CR34","doi-asserted-by":"crossref","unstructured":"Vaze, S., Han, K., Vedaldi, A., Zisserman, A.: Generalized category discovery. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2022)","DOI":"10.1109\/CVPR52688.2022.00734"},{"key":"3_CR35","unstructured":"Wah, C., Branson, S., Welinder, P., Perona, P., Belongie, S.: The caltech-ucsd birds-200-2011 dataset. Computation & Neural Systems Technical report (2011)"},{"key":"3_CR36","unstructured":"Wang, H., Vaze, S., Han, K.: Sptnet: an efficient alternative framework for generalized category discovery with spatial prompt tuning. In: The Twelfth International Conference on Learning Representations (2023)"},{"key":"3_CR37","doi-asserted-by":"crossref","unstructured":"Wen, X., Zhao, B., Qi, X.: Parametric classification for generalized category discovery: a baseline study. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (2023)","DOI":"10.1109\/ICCV51070.2023.01521"},{"key":"3_CR38","doi-asserted-by":"crossref","unstructured":"Xiao, J., Hays, J., Ehinger, K.A., Oliva, A., Torralba, A.: Sun database: large-scale scene recognition from abbey to zoo. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2010)","DOI":"10.1109\/CVPR.2010.5539970"},{"key":"3_CR39","doi-asserted-by":"crossref","unstructured":"Yang, F., et al.: Asymmetric co-teaching for unsupervised cross-domain person re-identification. In: Proceedings of the AAAI Conference on Artificial Intelligence (2020)","DOI":"10.1609\/aaai.v34i07.6950"},{"key":"3_CR40","doi-asserted-by":"crossref","unstructured":"Yang, Y., Panagopoulou, A., Zhou, S., Jin, D., Callison-Burch, C., Yatskar, M.: Language in a bottle: language model guided concept bottlenecks for interpretable image classification. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2023)","DOI":"10.1109\/CVPR52729.2023.01839"},{"key":"3_CR41","unstructured":"Yu, J., Wang, Z., Vasudevan, V., Yeung, L., Seyedhosseini, M., Wu, Y.: Coca: contrastive captioners are image-text foundation models. arXiv preprint arXiv:2205.01917 (2022)"},{"key":"3_CR42","unstructured":"Yuan, Y., Chen, C.S., Liu, Z., Neiswanger, W., Liu, X.S.: Importance-aware co-teaching for offline model-based optimization. Adv. Neural Inf. Process. Syst. (2024)"},{"key":"3_CR43","doi-asserted-by":"crossref","unstructured":"Zhang, S., Khan, S., Shen, Z., Naseer, M., Chen, G., Khan, F.S.: Promptcal: contrastive affinity learning via auxiliary prompts for generalized novel category discovery. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2023)","DOI":"10.1109\/CVPR52729.2023.00339"},{"key":"3_CR44","doi-asserted-by":"crossref","unstructured":"Zhao, B., Wen, X., Han, K.: Learning semi-supervised gaussian mixture models for generalized category discovery. arXiv preprint arXiv:2305.06144 (2023)","DOI":"10.1109\/ICCV51070.2023.01524"},{"key":"3_CR45","doi-asserted-by":"crossref","unstructured":"Zhong, Z., Fini, E., Roy, S., Luo, Z., Ricci, E., Sebe, N.: Neighborhood contrastive learning for novel class discovery. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2021)","DOI":"10.1109\/CVPR46437.2021.01072"},{"key":"3_CR46","doi-asserted-by":"crossref","unstructured":"Zhong, Z., Zhu, L., Luo, Z., Li, S., Yang, Y., Sebe, N.: Openmix: reviving known knowledge for discovering novel visual categories in an open world. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2021)","DOI":"10.1109\/CVPR46437.2021.00934"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72943-0_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T14:14:53Z","timestamp":1732803293000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72943-0_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,29]]},"ISBN":["9783031729423","9783031729430"],"references-count":46,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72943-0_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,29]]},"assertion":[{"value":"29 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}