{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,2]],"date-time":"2025-11-02T19:04:20Z","timestamp":1762110260984,"version":"build-2065373602"},"publisher-location":"Cham","reference-count":72,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031727740"},{"type":"electronic","value":"9783031727757"}],"license":[{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72775-7_9","type":"book-chapter","created":{"date-parts":[[2024,9,29]],"date-time":"2024-09-29T07:01:50Z","timestamp":1727593310000},"page":"142-161","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["AlignZeg: Mitigating Objective Misalignment for\u00a0Zero-Shot Semantic Segmentation"],"prefix":"10.1007","author":[{"given":"Jiannan","family":"Ge","sequence":"first","affiliation":[]},{"given":"Lingxi","family":"Xie","sequence":"additional","affiliation":[]},{"given":"Hongtao","family":"Xie","sequence":"additional","affiliation":[]},{"given":"Pandeng","family":"Li","sequence":"additional","affiliation":[]},{"given":"Xiaopeng","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yongdong","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Qi","family":"Tian","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,9,30]]},"reference":[{"key":"9_CR1","doi-asserted-by":"crossref","unstructured":"Atzmon, Y., Chechik, G.: Adaptive confidence smoothing for generalized zero-shot learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11671\u201311680 (2019)","DOI":"10.1109\/CVPR.2019.01194"},{"key":"9_CR2","doi-asserted-by":"crossref","unstructured":"Baek, D., Oh, Y., Ham, B.: Exploiting a joint embedding space for generalized zero-shot semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9536\u20139545 (2021)","DOI":"10.1109\/ICCV48922.2021.00940"},{"key":"9_CR3","unstructured":"Bucher, M., Vu, T.H., Cord, M., P\u00e9rez, P.: Zero-shot semantic segmentation. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"9_CR4","doi-asserted-by":"crossref","unstructured":"Caesar, H., Uijlings, J., Ferrari, V.: COCO-Stuff: thing and stuff classes in context. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1209\u20131218 (2018)","DOI":"10.1109\/CVPR.2018.00132"},{"key":"9_CR5","doi-asserted-by":"crossref","unstructured":"Cai, K., et al.: MixReorg: cross-modal mixed patch reorganization is a good mask learner for open-world semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1196\u20131205 (2023)","DOI":"10.1109\/ICCV51070.2023.00116"},{"key":"9_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1007\/978-3-030-58452-8_13","volume-title":"Computer Vision \u2013 ECCV 2020","author":"N Carion","year":"2020","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 213\u2013229. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13"},{"key":"9_CR7","doi-asserted-by":"crossref","unstructured":"Chen, J., et al.: Exploring open-vocabulary semantic segmentation from clip vision encoder distillation only. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 699\u2013710 (2023)","DOI":"10.1109\/ICCV51070.2023.00071"},{"key":"9_CR8","doi-asserted-by":"publisher","unstructured":"Chen, P., et al.: Point-to-box network for accurate object detection via single point supervision. In: Avidan, S., Brostow, G., Cisse, M., Farinella, G.M., Hassner, T. (eds.) Computer Vision \u2013 ECCV 2022. LNCS, vol. 13669, pp. 51\u201367. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20077-9_4","DOI":"10.1007\/978-3-031-20077-9_4"},{"key":"9_CR9","doi-asserted-by":"crossref","unstructured":"Chen, S., et al.: TransZero: attribute-guided transformer for zero-shot learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a036, pp. 330\u2013338 (2022)","DOI":"10.1609\/aaai.v36i1.19909"},{"key":"9_CR10","doi-asserted-by":"crossref","unstructured":"Chen, S., et al.: Free: feature refinement for generalized zero-shot learning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 122\u2013131 (2021)","DOI":"10.1109\/ICCV48922.2021.00019"},{"key":"9_CR11","doi-asserted-by":"crossref","unstructured":"Chen, Z., et al.: Semantics disentangling for generalized zero-shot learning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 8712\u20138720 (2021)","DOI":"10.1109\/ICCV48922.2021.00859"},{"key":"9_CR12","doi-asserted-by":"crossref","unstructured":"Cheng, B., Misra, I., Schwing, A.G., Kirillov, A., Girdhar, R.: Masked-attention mask transformer for universal image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1290\u20131299 (2022)","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"9_CR13","first-page":"17864","volume":"34","author":"B Cheng","year":"2021","unstructured":"Cheng, B., Schwing, A., Kirillov, A.: Per-pixel classification is not all you need for semantic segmentation. Adv. Neural. Inf. Process. Syst. 34, 17864\u201317875 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"9_CR14","doi-asserted-by":"crossref","unstructured":"Cheng, J., Nandi, S., Natarajan, P., Abd-Almageed, W.: SIGN: spatial-information incorporated generative network for generalized zero-shot semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9556\u20139566 (2021)","DOI":"10.1109\/ICCV48922.2021.00942"},{"key":"9_CR15","doi-asserted-by":"crossref","unstructured":"Cho, S., et al.: CAT-Seg: cost aggregation for open-vocabulary semantic segmentation. arXiv preprint arXiv:2303.11797 (2023)","DOI":"10.1109\/CVPR52733.2024.00394"},{"key":"9_CR16","unstructured":"Chou, Y.Y., Lin, H.T., Liu, T.L.: Adaptive and generative zero-shot learning. In: International Conference on Learning Representations (2020)"},{"key":"9_CR17","unstructured":"Deng, R., et\u00a0al.: Segment Anything Model (SAM) for digital pathology: assess zero-shot segmentation on whole slide imaging. arXiv preprint arXiv:2304.04155 (2023)"},{"key":"9_CR18","doi-asserted-by":"crossref","unstructured":"Ding, J., Xue, N., Xia, G.S., Dai, D.: Decoupling zero-shot semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11583\u201311592 (2022)","DOI":"10.1109\/CVPR52688.2022.01129"},{"key":"9_CR19","unstructured":"Everingham, M., Winn, J.: The PASCAL visual object classes challenge 2012 (VOC2012) development kit. Pattern Anal. Stat. Model. Comput. Learn., Tech. Rep 2007(1\u201345), 5 (2012)"},{"key":"9_CR20","doi-asserted-by":"crossref","unstructured":"Ge, J., Xie, H., Li, P., Xie, L., Min, S., Zhang, Y.: Towards discriminative feature generation for generalized zero-shot learning. IEEE Trans. Multimedia (2024)","DOI":"10.1109\/TMM.2024.3408048"},{"key":"9_CR21","doi-asserted-by":"crossref","unstructured":"Ge, J., Xie, H., Min, S., Li, P., Zhang, Y.: Dual part discovery network for zero-shot learning. In: Proceedings of the 30th ACM International Conference on Multimedia, pp. 3244\u20133252 (2022)","DOI":"10.1145\/3503161.3547889"},{"key":"9_CR22","doi-asserted-by":"crossref","unstructured":"Gu, Z., Zhou, S., Niu, L., Zhao, Z., Zhang, L.: Context-aware feature generation for zero-shot semantic segmentation. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 1921\u20131929 (2020)","DOI":"10.1145\/3394171.3413593"},{"key":"9_CR23","doi-asserted-by":"crossref","unstructured":"Han, C., Zhong, Y., Li, D., Han, K., Ma, L.: Open-vocabulary semantic segmentation with decoupled one-pass network. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1086\u20131096 (2023)","DOI":"10.1109\/ICCV51070.2023.00106"},{"key":"9_CR24","doi-asserted-by":"crossref","unstructured":"Han, Z., Fu, Z., Chen, S., Yang, J.: Contrastive embedding for generalized zero-shot learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2371\u20132381 (2021)","DOI":"10.1109\/CVPR46437.2021.00240"},{"key":"9_CR25","doi-asserted-by":"crossref","unstructured":"He, S., Ding, H., Jiang, W.: Primitive generation and semantic-related alignment for universal zero-shot segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11238\u201311247 (2023)","DOI":"10.1109\/CVPR52729.2023.01081"},{"key":"9_CR26","doi-asserted-by":"crossref","unstructured":"He, S., Ding, H., Jiang, W.: Semantic-promoted debiasing and background disambiguation for zero-shot instance segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19498\u201319507 (2023)","DOI":"10.1109\/CVPR52729.2023.01868"},{"key":"9_CR27","doi-asserted-by":"crossref","unstructured":"Huo, X., Xie, L., Hu, H., Zhou, W., Li, H., Tian, Q.: Domain-agnostic prior for transfer semantic segmentation. In: Proceedings of the IEEE\/CVF conference on Computer Vision and Pattern Recognition, pp. 7075\u20137085 (2022)","DOI":"10.1109\/CVPR52688.2022.00694"},{"key":"9_CR28","doi-asserted-by":"crossref","unstructured":"Huynh, D., Elhamifar, E.: Fine-grained generalized zero-shot learning via dense attribute-based attention. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4483\u20134493 (2020)","DOI":"10.1109\/CVPR42600.2020.00454"},{"key":"9_CR29","unstructured":"Jayaraman, D., Grauman, K.: Zero-shot recognition with unreliable attributes. In: Advances in Neural Information Processing Systems, vol. 27 (2014)"},{"key":"9_CR30","unstructured":"Jia, C., et al.: Scaling up visual and vision-language representation learning with noisy text supervision. In: International Conference on Machine Learning, pp. 4904\u20134916. PMLR (2021)"},{"key":"9_CR31","doi-asserted-by":"publisher","unstructured":"Jia, M., et al.: Visual prompt tuning. In: Avidan, S., Brostow, G., Cisse, M., Farinella, G.M., Hassner, T. (eds.) Computer Vision \u2013 ECCV 2022. LNCS, vol. 13693, pp. 709\u2013727. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19827-4_41","DOI":"10.1007\/978-3-031-19827-4_41"},{"key":"9_CR32","unstructured":"Jiao, S., Wei, Y., Wang, Y., Zhao, Y., Shi, H.: Learning mask-aware clip representations for zero-shot segmentation. In: Thirty-Seventh Conference on Neural Information Processing Systems (2023)"},{"key":"9_CR33","doi-asserted-by":"crossref","unstructured":"Jin, Z., et al.: Mining contextual information beyond image for semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7231\u20137241 (2021)","DOI":"10.1109\/ICCV48922.2021.00714"},{"key":"9_CR34","first-page":"21798","volume":"33","author":"Y Kalantidis","year":"2020","unstructured":"Kalantidis, Y., Sariyildiz, M.B., Pion, N., Weinzaepfel, P., Larlus, D.: Hard negative mixing for contrastive learning. Adv. Neural. Inf. Process. Syst. 33, 21798\u201321809 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"9_CR35","doi-asserted-by":"crossref","unstructured":"Karazija, L., Laina, I., Vedaldi, A., Rupprecht, C.: Diffusion models for zero-shot open-vocabulary segmentation. arXiv preprint arXiv:2306.09316 (2023)","DOI":"10.1007\/978-3-031-72652-1_18"},{"key":"9_CR36","doi-asserted-by":"crossref","unstructured":"Kirillov, A., et al.: Segment anything. arXiv:2304.02643 (2023)","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"9_CR37","unstructured":"Kwon, G., Al\u00a0Regib, G.: A gating model for bias calibration in generalized zero-shot learning. IEEE Trans. Image Process. (2022)"},{"key":"9_CR38","doi-asserted-by":"crossref","unstructured":"Li, J., Chen, P., Qian, S., Jia, J.: TagClip: improving discrimination ability of open-vocabulary semantic segmentation. arXiv preprint arXiv:2304.07547 (2023)","DOI":"10.1109\/TPAMI.2024.3454647"},{"key":"9_CR39","unstructured":"Li, L.H., Yatskar, M., Yin, D., Hsieh, C.J., Chang, K.W.: VisualBERT: a simple and performant baseline for vision and language. arXiv preprint arXiv:1908.03557 (2019)"},{"key":"9_CR40","unstructured":"Li, P., et al.: MomentDiff: generative video moment retrieval from random to real. In: Advances in Neural Information Processing Systems, pp. 65948\u201365966 (2023)"},{"key":"9_CR41","doi-asserted-by":"crossref","unstructured":"Li, P., et al.: Progressive spatio-temporal prototype matching for text-video retrieval. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4100\u20134110 (2023)","DOI":"10.1109\/ICCV51070.2023.00379"},{"key":"9_CR42","doi-asserted-by":"publisher","first-page":"5909","DOI":"10.1109\/TIP.2022.3203612","volume":"31","author":"P Li","year":"2022","unstructured":"Li, P., Xie, H., Min, S., Ge, J., Chen, X., Zhang, Y.: Deep Fourier ranking quantization for semi-supervised image retrieval. Trans. Image Process. 31, 5909\u20135922 (2022)","journal-title":"Trans. Image Process."},{"key":"9_CR43","first-page":"10317","volume":"33","author":"P Li","year":"2020","unstructured":"Li, P., Wei, Y., Yang, Y.: Consistent structural relation learning for zero-shot segmentation. Adv. Neural. Inf. Process. Syst. 33, 10317\u201310327 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"9_CR44","doi-asserted-by":"crossref","unstructured":"Liang, F., et al.: Open-vocabulary semantic segmentation with mask-adapted clip. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7061\u20137070 (2023)","DOI":"10.1109\/CVPR52729.2023.00682"},{"key":"9_CR45","doi-asserted-by":"crossref","unstructured":"Liu, J., Bao, Y., Xie, G.S., Xiong, H., Sonke, J.J., Gavves, E.: Dynamic prototype convolution network for few-shot semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11553\u201311562 (2022)","DOI":"10.1109\/CVPR52688.2022.01126"},{"key":"9_CR46","doi-asserted-by":"crossref","unstructured":"Liu, M., Li, F., Zhang, C., Wei, Y., Bai, H., Zhao, Y.: Progressive semantic-visual mutual adaption for generalized zero-shot learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15337\u201315346 (2023)","DOI":"10.1109\/CVPR52729.2023.01472"},{"key":"9_CR47","doi-asserted-by":"crossref","unstructured":"Liu, X., et al.: Delving into shape-aware zero-shot semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2999\u20133009 (2023)","DOI":"10.1109\/CVPR52729.2023.00293"},{"key":"9_CR48","doi-asserted-by":"crossref","unstructured":"Liu, Y., et al.: Goal-oriented gaze estimation for zero-shot learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3794\u20133803 (2021)","DOI":"10.1109\/CVPR46437.2021.00379"},{"key":"9_CR49","unstructured":"Luo, H., Bao, J., Wu, Y., He, X., Li, T.: SegClip: patch aggregation with learnable centers for open-vocabulary semantic segmentation. In: International Conference on Machine Learning, pp. 23033\u201323044. PMLR (2023)"},{"key":"9_CR50","unstructured":"Van\u00a0der Maaten, L., Hinton, G.: Visualizing data using t-SNE. J. Mach. Learn. Res. 9(11) (2008)"},{"key":"9_CR51","doi-asserted-by":"crossref","unstructured":"Min, S., Yao, H., Xie, H., Wang, C., Zha, Z.J., Zhang, Y.: Domain-aware visual bias eliminating for generalized zero-shot learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12664\u201312673 (2020)","DOI":"10.1109\/CVPR42600.2020.01268"},{"key":"9_CR52","doi-asserted-by":"crossref","unstructured":"Mottaghi, R., et al.: The role of context for object detection and semantic segmentation in the wild. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 891\u2013898 (2014)","DOI":"10.1109\/CVPR.2014.119"},{"key":"9_CR53","doi-asserted-by":"crossref","unstructured":"Pastore, G., Cermelli, F., Xian, Y., Mancini, M., Akata, Z., Caputo, B.: A closer look at self-training for zero-label semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2693\u20132702 (2021)","DOI":"10.1109\/CVPRW53098.2021.00303"},{"key":"9_CR54","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"9_CR55","doi-asserted-by":"crossref","unstructured":"Rao, Y., Chen, G., Lu, J., Zhou, J.: Counterfactual attention learning for fine-grained visual categorization and re-identification. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1025\u20131034 (2021)","DOI":"10.1109\/ICCV48922.2021.00106"},{"key":"9_CR56","first-page":"24474","volume":"35","author":"A Roy","year":"2022","unstructured":"Roy, A., Shah, A., Shah, K., Dhar, P., Cherian, A., Chellappa, R.: FeLMi: few shot learning with hard Mixup. Adv. Neural. Inf. Process. Syst. 35, 24474\u201324486 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"9_CR57","unstructured":"Socher, R., Ganjoo, M., Manning, C.D., Ng, A.: Zero-shot learning through cross-modal transfer. In: Advances in Neural Information Processing Systems, vol. 26 (2013)"},{"key":"9_CR58","doi-asserted-by":"crossref","unstructured":"Su, H., Li, J., Chen, Z., Zhu, L., Lu, K.: Distinguishing unseen from seen for generalized zero-shot learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7885\u20137894 (2022)","DOI":"10.1109\/CVPR52688.2022.00773"},{"key":"9_CR59","unstructured":"Su, W., et al.: VL-BERT: pre-training of generic visual-linguistic representations. arXiv preprint arXiv:1908.08530 (2019)"},{"key":"9_CR60","unstructured":"Verma, V., et al.: Manifold mixup: better representations by interpolating hidden states. In: International Conference on Machine Learning, pp. 6438\u20136447. PMLR (2019)"},{"key":"9_CR61","doi-asserted-by":"crossref","unstructured":"Wu, W., Zhao, Y., Shou, M.Z., Zhou, H., Shen, C.: DiffuMask: synthesizing images with pixel-level annotations for semantic segmentation using diffusion models. arXiv preprint arXiv:2303.11681 (2023)","DOI":"10.1109\/ICCV51070.2023.00117"},{"key":"9_CR62","doi-asserted-by":"crossref","unstructured":"Xian, Y., Choudhury, S., He, Y., Schiele, B., Akata, Z.: Semantic projection network for zero-and few-label semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8256\u20138265 (2019)","DOI":"10.1109\/CVPR.2019.00845"},{"key":"9_CR63","doi-asserted-by":"crossref","unstructured":"Xu, M., Zhang, Z., Wei, F., Hu, H., Bai, X.: Side adapter network for open-vocabulary semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2945\u20132954 (2023)","DOI":"10.1109\/CVPR52729.2023.00288"},{"key":"9_CR64","doi-asserted-by":"publisher","unstructured":"Xu, M., et al.: A simple baseline for open-vocabulary semantic segmentation with pre-trained vision-language model. In: Avidan, S., Brostow, G., Cisse, M., Farinella, G.M., Hassner, T. (eds.) Computer Vision \u2013 ECCV 2022. ECCV 2022. LNCS, vol. 13689, pp. 736\u2013753. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19818-2_42","DOI":"10.1007\/978-3-031-19818-2_42"},{"key":"9_CR65","first-page":"21969","volume":"33","author":"W Xu","year":"2020","unstructured":"Xu, W., Xian, Y., Wang, J., Schiele, B., Akata, Z.: Attribute prototype network for zero-shot learning. Adv. Neural. Inf. Process. Syst. 33, 21969\u201321980 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"9_CR66","doi-asserted-by":"crossref","unstructured":"Xu, W., Xian, Y., Wang, J., Schiele, B., Akata, Z.: VGSE: visually-grounded semantic embeddings for zero-shot learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9316\u20139325 (2022)","DOI":"10.1109\/CVPR52688.2022.00910"},{"key":"9_CR67","doi-asserted-by":"crossref","unstructured":"Yue, Z., Wang, T., Sun, Q., Hua, X.S., Zhang, H.: Counterfactual zero-shot and open-set visual recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15404\u201315414 (2021)","DOI":"10.1109\/CVPR46437.2021.01515"},{"key":"9_CR68","unstructured":"Zhang, H., Cisse, M., Dauphin, Y.N., Lopez-Paz, D.: Mixup: beyond empirical risk minimization. arXiv preprint arXiv:1710.09412 (2017)"},{"key":"9_CR69","doi-asserted-by":"crossref","unstructured":"Zhang, H., Ding, H.: Prototypical matching and open set rejection for zero-shot semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6974\u20136983 (2021)","DOI":"10.1109\/ICCV48922.2021.00689"},{"key":"9_CR70","doi-asserted-by":"crossref","unstructured":"Zheng, Y., Wu, J., Qin, Y., Zhang, F., Cui, L.: Zero-shot instance segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2593\u20132602 (2021)","DOI":"10.1109\/CVPR46437.2021.00262"},{"key":"9_CR71","doi-asserted-by":"crossref","unstructured":"Zhou, B., Zhao, H., Puig, X., Fidler, S., Barriuso, A., Torralba, A.: Scene parsing through ADE20K dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 633\u2013641 (2017)","DOI":"10.1109\/CVPR.2017.544"},{"key":"9_CR72","doi-asserted-by":"crossref","unstructured":"Zhou, Z., Lei, Y., Zhang, B., Liu, L., Liu, Y.: ZegCLIP: towards adapting clip for zero-shot semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11175\u201311185 (2023)","DOI":"10.1109\/CVPR52729.2023.01075"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72775-7_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T21:21:20Z","timestamp":1732828880000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72775-7_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,30]]},"ISBN":["9783031727740","9783031727757"],"references-count":72,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72775-7_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,9,30]]},"assertion":[{"value":"30 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}