{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,8]],"date-time":"2026-05-08T16:27:31Z","timestamp":1778257651665,"version":"3.51.4"},"reference-count":85,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2025,5,9]],"date-time":"2025-05-09T00:00:00Z","timestamp":1746748800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,5,9]],"date-time":"2025-05-09T00:00:00Z","timestamp":1746748800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1007\/s11263-025-02442-2","type":"journal-article","created":{"date-parts":[[2025,5,9]],"date-time":"2025-05-09T15:42:54Z","timestamp":1746805374000},"page":"5569-5588","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["CLIMS++: Cross Language Image Matching with Automatic Context Discovery for Weakly Supervised Semantic Segmentation"],"prefix":"10.1007","volume":"133","author":[{"given":"Jinheng","family":"Xie","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Songhe","family":"Deng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xianxu","family":"Hou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhaochuan","family":"Luo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1420-0815","authenticated-orcid":false,"given":"Linlin","family":"Shen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yawen","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yefeng","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mike Zheng","family":"Shou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,5,9]]},"reference":[{"key":"2442_CR1","doi-asserted-by":"crossref","unstructured":"Ahn, J., & Kwak, S. (2018). Learning pixel-level semantic affinity with image-level supervision for weakly supervised semantic segmentation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 4981\u20134990)","DOI":"10.1109\/CVPR.2018.00523"},{"key":"2442_CR2","doi-asserted-by":"crossref","unstructured":"Ahn, J., Cho, S., & Kwak, S. (2019). Weakly supervised learning of instance segmentation with inter-pixel relations. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 2209\u20132218)","DOI":"10.1109\/CVPR.2019.00231"},{"key":"2442_CR3","doi-asserted-by":"crossref","unstructured":"Araslanov, N., & Roth, S. (2020). Single-stage semantic segmentation from image labels. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 4253\u20134262)","DOI":"10.1109\/CVPR42600.2020.00431"},{"key":"2442_CR4","doi-asserted-by":"crossref","unstructured":"Bearman, A. L., Russakovsky, O., Ferrari, V., & Li, F. (2016) What\u2019s the point: Semantic segmentation with point supervision. In Proceeding of the European Conference on Computer Vision, (pp. 549\u2013565)","DOI":"10.1007\/978-3-319-46478-7_34"},{"key":"2442_CR5","unstructured":"Bird, S., Klein, E., & Loper, E. (2009). Natural language processing with Python: analyzing text with the natural language toolkit. O\u2019Reilly Media, Inc."},{"key":"2442_CR6","doi-asserted-by":"crossref","unstructured":"Chang, Y., Wang, Q., Hung, W., Piramuthu, R., Ai, Y., & Yang, M. (2020). Weakly-supervised semantic segmentation via sub-category exploration. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 8988\u20138997)","DOI":"10.1109\/CVPR42600.2020.00901"},{"key":"2442_CR7","doi-asserted-by":"crossref","unstructured":"Chen, Z., & Sun, Q. (2023). Extracting class activation maps from non-discriminative features as well. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 3135\u20133144)","DOI":"10.1109\/CVPR52729.2023.00306"},{"key":"2442_CR8","unstructured":"Chen, L., Papandreou, G., Kokkinos, I., Murphy, K., & Yuille, A. L. (2015). Semantic image segmentation with deep convolutional nets and fully connected CRFS. In ICLR"},{"key":"2442_CR9","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"L Chen","year":"2018","unstructured":"Chen, L., Papandreou, G., Kokkinos, I., Murphy, K., & Yuille, A. L. (2018). Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected CRFS. TPAMI, 40, 834\u2013848.","journal-title":"TPAMI"},{"key":"2442_CR10","doi-asserted-by":"crossref","unstructured":"Chen, L., Wu, W., Fu, C., Han, X., & Zhang, Y. (2020). Weakly supervised semantic segmentation with boundary exploration. In Proceeding of the European Conference on Computer Vision, (pp. 347\u2013362)","DOI":"10.1007\/978-3-030-58574-7_21"},{"key":"2442_CR11","doi-asserted-by":"crossref","unstructured":"Chen, Q., Yang, L., Lai, J. H., & Xie, X. (2022a). Self-supervised image-specific prototype exploration for weakly supervised semantic segmentation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 4288\u20134298)","DOI":"10.1109\/CVPR52688.2022.00425"},{"key":"2442_CR12","doi-asserted-by":"crossref","unstructured":"Chen, Z., Wang, T., Wu, X., Hua, X. S., Zhan, H., & Sun, Q. (2022b). Class re-activation maps for weakly-supervised semantic segmentation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 969\u2013978)","DOI":"10.1109\/CVPR52688.2022.00104"},{"key":"2442_CR13","doi-asserted-by":"crossref","unstructured":"Chen, L., Lei, C., Li, R., Li, S., Zhang, Z., & Zhang, L. (2023a). Fpr: False positive rectification for weakly supervised semantic segmentation. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1108\u20131118","DOI":"10.1109\/ICCV51070.2023.00108"},{"key":"2442_CR14","unstructured":"Chen, T., Mai, Z., Li, R., & Chao, W. l. (2023b). Segment anything model (SAM) enhanced pseudo labels for weakly supervised semantic segmentation. arXiv preprint arXiv:2305.05803"},{"key":"2442_CR15","doi-asserted-by":"crossref","unstructured":"Cheng, Z., Qiao, P., Li, K., Li, S., Wei, P., Ji, X., Yuan, L., Liu, C. and Chen, J., (2023). Out-of-candidate rectification for weakly supervised semantic segmentation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 23673\u201323684)","DOI":"10.1109\/CVPR52729.2023.02267"},{"key":"2442_CR16","doi-asserted-by":"crossref","unstructured":"Dai, J., He, K., & Sun, J. (2015). Boxsup: Exploiting bounding boxes to supervise convolutional networks for semantic segmentation. In ICCV, pp. 1635\u20131643","DOI":"10.1109\/ICCV.2015.191"},{"key":"2442_CR17","doi-asserted-by":"crossref","unstructured":"Deng, S., Zhuo, W., Xie, J., & Shen, L. (2023). Qa-clims: Question-answer cross language image matching for weakly supervised semantic segmentation. In Proceedings of the 31st ACM International Conference on Multimedia, (pp. 5572\u20135583)","DOI":"10.1145\/3581783.3612148"},{"key":"2442_CR18","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S. & Uszkoreit, J., (2020). An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929"},{"key":"2442_CR19","doi-asserted-by":"crossref","unstructured":"Du, Y., Fu, Z., Liu, Q., & Wang, Y. (2022). Weakly supervised semantic segmentation by pixel-to-prototype contrast. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 4320\u20134329)","DOI":"10.1109\/CVPR52688.2022.00428"},{"key":"2442_CR20","unstructured":"Everingham, M., Van\u00a0Gool, L., Williams, C. K. I., Winn, J., & Zisserman, A. (2012). The PASCAL Visual Object Classes Challenge 2012 (VOC2012) Results. http:\/\/www.pascal-network.org\/challenges\/VOC\/voc2012\/workshop\/index.html"},{"key":"2442_CR21","doi-asserted-by":"crossref","unstructured":"Gao, W., Wan, F., Pan, X., Peng, Z., Tian, Q., Han, Z., Zhou, B., & Ye, Q. (2021). Ts-cam: Token semantic coupled attention map for weakly supervised object localization. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, (pp. 2886\u20132895)","DOI":"10.1109\/ICCV48922.2021.00288"},{"key":"2442_CR22","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 770\u2013778)","DOI":"10.1109\/CVPR.2016.90"},{"key":"2442_CR23","first-page":"547","volume":"31","author":"Q Hou","year":"2018","unstructured":"Hou, Q., Jiang, P., Wei, Y., & Cheng, M. M. (2018). Self-erasing network for integral object attention. Advances in neural information processing systems, 31, 547\u2013557.","journal-title":"Advances in neural information processing systems"},{"key":"2442_CR24","doi-asserted-by":"crossref","unstructured":"Huang, Z., Wang, X., Wang, J., Liu, W., & Wang, J. (2018). Weakly-supervised semantic segmentation network with deep seeded region growing. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. (pp. 7014\u20137023)","DOI":"10.1109\/CVPR.2018.00733"},{"key":"2442_CR25","doi-asserted-by":"crossref","unstructured":"Jang, S., Yun, J., Kwon, J., Lee, E., & Kim, Y. (2024). Dial: Dense image-text alignment for weakly supervised semantic segmentation. arXiv preprint arXiv:2409.15801","DOI":"10.1007\/978-3-031-72890-7_15"},{"key":"2442_CR26","unstructured":"Jiang, P. T., & Yang, Y. (2023). Segment anything is a good pseudo-label generator for weakly supervised semantic segmentation. arXiv preprint arXiv:2305.01275"},{"key":"2442_CR27","doi-asserted-by":"crossref","unstructured":"Jiang, P. T., Hou, Q., Cao, Y., Cheng, M. M., Wei, Y. & Xiong, H. K., (2019). Integral object mining via online attention accumulation. In ICCV, (pp. 2070\u20132079)","DOI":"10.1109\/ICCV.2019.00216"},{"key":"2442_CR28","doi-asserted-by":"crossref","unstructured":"Jiang, P. T., Yang, Y., Hou, Q., & Wei, Y. (2022). L2g: A simple local-to-global knowledge transfer framework for weakly supervised semantic segmentation. In Proceedings of the IEEE\/CVF conference on Computer Vision and Pattern Recognition, pp. (16886\u201316896)","DOI":"10.1109\/CVPR52688.2022.01638"},{"key":"2442_CR29","doi-asserted-by":"crossref","unstructured":"Jo, S., & Yu, I. J. (2021). Puzzle-cam: Improved localization via matching partial and full features. In IEEE International Conference on Image Processing, (pp. 639\u2013643)","DOI":"10.1109\/ICIP42928.2021.9506058"},{"key":"2442_CR30","doi-asserted-by":"crossref","unstructured":"Jo, S., Yu, I. J., & Kim, K. (2023). Mars: Model-agnostic biased object removal without additional supervision for weakly-supervised semantic segmentation. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, (pp. 614\u2013623)","DOI":"10.1109\/ICCV51070.2023.00063"},{"key":"2442_CR31","doi-asserted-by":"crossref","unstructured":"Jo, S., Pan, F., Yu, I. J. & Kim, K., (2024). Dhr: Dual features-driven hierarchical rebalancing in inter- and intra-class regions for weakly-supervised semantic segmentation. In European Conference on Computer Vision (ECCV)","DOI":"10.1007\/978-3-031-73004-7_14"},{"key":"2442_CR32","doi-asserted-by":"crossref","unstructured":"Kirillov, A., Mintun, E., Ravi, N., Mao, H., Rolland, C., Gustafson, L., Xiao, T., Whitehead, S., Berg, A. C., Lo, W. Y. & Doll\u00e1r, P., (2023). Segment anything. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, (pp. 4015\u20134026)","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"2442_CR33","first-page":"695","volume-title":"Proceeding of the european conference on computer vision","author":"A Kolesnikov","year":"2016","unstructured":"Kolesnikov, A., & Lampert, C. H. (2016). Seed, expand and constrain: Three principles for weakly-supervised image segmentation. Proceeding of the european conference on computer vision (pp. 695\u2013711). Cham: Springer."},{"key":"2442_CR34","first-page":"109","volume":"24","author":"P Kr\u00e4henb\u00fchl","year":"2011","unstructured":"Kr\u00e4henb\u00fchl, P., & Koltun, V. (2011). Efficient inference in fully connected CRFS with gaussian edge potentials. Advances in neural information processing systems, 24, 109\u2013117.","journal-title":"Advances in neural information processing systems"},{"key":"2442_CR35","doi-asserted-by":"crossref","unstructured":"Kweon, H., & Yoon, K. J. (2024). From SAM to cams: Exploring segment anything model for weakly supervised semantic segmentation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 19499\u201319509)","DOI":"10.1109\/CVPR52733.2024.01844"},{"key":"2442_CR36","doi-asserted-by":"crossref","unstructured":"Kweon, H., Yoon, S. H., Kim, H., Park, D. & Yoon, K. J., (2021). Unlocking the potential of ordinary classifier: Class-specific adversarial erasing framework for weakly supervised semantic segmentation. In: ICCV, (pp. 6994\u20137003)","DOI":"10.1109\/ICCV48922.2021.00691"},{"key":"2442_CR37","doi-asserted-by":"crossref","unstructured":"Lee, J., Kim, E., Lee, S., Lee, J., & Yoon, S. (2019). Ficklenet: Weakly and semi-supervised semantic image segmentation using stochastic inference. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 5267\u20135276)","DOI":"10.1109\/CVPR.2019.00541"},{"key":"2442_CR38","doi-asserted-by":"crossref","unstructured":"Lee, J., Kim, E., & Yoon, S. (2021a). Anti-adversarially manipulated attributions for weakly and semi-supervised semantic segmentation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 4071\u20134080)","DOI":"10.1109\/CVPR46437.2021.00406"},{"key":"2442_CR39","doi-asserted-by":"crossref","unstructured":"Lee, J., Yi, J., Shin, C., & Yoon, S. (2021b). Bbam: Bounding box attribution map for weakly supervised semantic and instance segmentation. In Proceedings of the IEEE\/CVF conference on Computer Vision and Pattern Recognition, (pp. 2643\u20132652)","DOI":"10.1109\/CVPR46437.2021.00267"},{"key":"2442_CR40","doi-asserted-by":"crossref","unstructured":"Lee, S., Lee, M., Lee, J., & Shim, H. (2021c). Railroad is not a train: Saliency as pseudo-pixel supervision for weakly supervised semantic segmentation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 5495\u20135505)","DOI":"10.1109\/CVPR46437.2021.00545"},{"key":"2442_CR41","doi-asserted-by":"crossref","unstructured":"Lee, J., Oh, S. J., Yun, S., Choe, J., Kim, E., & Yoon, S. (2022a). Weakly supervised semantic segmentation using out-of-distribution data. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 16897\u201316906)","DOI":"10.1109\/CVPR52688.2022.01639"},{"key":"2442_CR42","doi-asserted-by":"crossref","unstructured":"Lee, M., Kim, D., & Shim, H. (2022b). Threshold matters in WSSS: Manipulating the activation for the robust and accurate segmentation model against thresholds. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, (pp. 4330\u20134339)","DOI":"10.1109\/CVPR52688.2022.00429"},{"key":"2442_CR43","unstructured":"Li, B., Weinberger, K. Q., Belongie, S., Koltun, V., & Ranftl, R. (2022a). Language-driven semantic segmentation. arXiv preprint arXiv:2201.03546"},{"key":"2442_CR44","doi-asserted-by":"crossref","unstructured":"Liang, F., Wu, B., Dai, X., Li, K., Zhao, Y., Zhang, H., Zhang, P., Vajda, P., & Marculescu, D. (2023). Open-vocabulary semantic segmentation with mask-adapted clip. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 7061\u20137070)","DOI":"10.1109\/CVPR52729.2023.00682"},{"key":"2442_CR45","doi-asserted-by":"publisher","first-page":"1447","DOI":"10.1609\/aaai.v36i2.20034","volume":"36","author":"Y Li","year":"2022","unstructured":"Li, Y., Duan, Y., Kuang, Z., Chen, Y., Zhang, W., & Li, X. (2022). Uncertainty estimation via response scaling for pseudo-mask noise mitigation in weakly-supervised semantic segmentation. Proceedings of the AAAI Conference on Artificial Intelligence, 36, 1447\u20131455.","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"2442_CR46","first-page":"16037","volume":"35","author":"J Li","year":"2022","unstructured":"Li, J., Jie, Z., Wang, X., Wei, X., & Ma, L. (2022). Expansion and shrinkage of localization for weakly-supervised semantic segmentation. Advances in Neural Information Processing Systems, 35, 16037\u201316051.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2442_CR47","doi-asserted-by":"crossref","unstructured":"Lin, Y., Chen, M., Wang, W., Wu, B., Li, K., Lin, B., Liu, H., & He, X. (2023). Clip is also an efficient segmenter: A text-driven approach for weakly supervised semantic segmentation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 15305\u201315314)","DOI":"10.1109\/CVPR52729.2023.01469"},{"key":"2442_CR48","doi-asserted-by":"crossref","unstructured":"Lin, D., Dai, J., Jia, J., He, K., & Sun, J. (2016). Scribblesup: Scribble-supervised convolutional networks for semantic segmentation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 3159\u20133167)","DOI":"10.1109\/CVPR.2016.344"},{"key":"2442_CR49","first-page":"740","volume-title":"Proceeding of the European Conference on Computer Vision","author":"TY Lin","year":"2014","unstructured":"Lin, T. Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., & Zitnick, C. L. (2014). Microsoft coco: Common objects in context. Proceeding of the European Conference on Computer Vision (pp. 740\u2013755). Cham: Springer."},{"issue":"3","key":"2442_CR50","doi-asserted-by":"publisher","first-page":"1415","DOI":"10.1109\/TPAMI.2020.3023152","volume":"44","author":"Y Liu","year":"2020","unstructured":"Liu, Y., Wu, Y. H., Wen, P., Shi, Y., Qiu, Y., & Cheng, M. M. (2020). Leveraging instance-, image-and dataset-level information for weakly supervised instance segmentation. IEEE Transactions on Pattern Analysis and Machine Intelligence, 44(3), 1415\u20131428.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2442_CR51","first-page":"38","volume-title":"European Conference on Computer Vision","author":"S Liu","year":"2025","unstructured":"Liu, S., Zeng, Z., Ren, T., Li, F., Zhang, H., Yang, J., Jiang, Q., Li, C., Yang, J., Su, H., et al. (2025). Grounding DINO: Marrying DINO with grounded pre-training for open-set object detection. European Conference on Computer Vision (pp. 38\u201355). Cham: Springer."},{"key":"2442_CR52","doi-asserted-by":"crossref","unstructured":"Papandreou, G., Chen, L. C., Murphy, K.P. , & Yuille, A. L. (2015). Weakly- and nsemi-supervised learning of a deep convolutional network for semantic image segmentation. In ICCV, (pp. 1742\u20131750)","DOI":"10.1109\/ICCV.2015.203"},{"key":"2442_CR53","doi-asserted-by":"crossref","unstructured":"Pathak, D., Kr\u00e4henb\u00fchl, P., & Darrell, T. (2015). Constrained convolutional neural networks for weakly supervised segmentation. In: ICCV, (pp. 1796\u20131804)","DOI":"10.1109\/ICCV.2015.209"},{"key":"2442_CR54","doi-asserted-by":"crossref","unstructured":"Peng, Z., Wang, G., Xie, L., Jiang, D., Shen, W., & Tian, Q. (2023). Usage: A unified seed area generation paradigm for weakly supervised semantic segmentation. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, (pp. 624\u2013634)","DOI":"10.1109\/ICCV51070.2023.00064"},{"key":"2442_CR55","doi-asserted-by":"crossref","unstructured":"Pinheiro, P. H. O., & Collobert, R. (2015). From image-level to pixel-level labeling with convolutional networks. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 1713\u20131721)","DOI":"10.1109\/CVPR.2015.7298780"},{"key":"2442_CR56","unstructured":"Radford, A., Kim, J. W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J. & Krueger, G. (2021). Learning transferable visual models from natural language supervision. In ICML, (pp. 8748\u20138763)"},{"key":"2442_CR57","doi-asserted-by":"crossref","unstructured":"Ru, L., Du, B., & Wu, C. (2021). Learning visual words for weakly-supervised semantic segmentation. In IJCAI","DOI":"10.24963\/ijcai.2021\/136"},{"key":"2442_CR58","doi-asserted-by":"crossref","unstructured":"Ru, L., Zhan, Y., Yu, B., & Du, B. (2022). Learning affinity from attention: End-to-end weakly-supervised semantic segmentation with transformers. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 16846\u201316855)","DOI":"10.1109\/CVPR52688.2022.01634"},{"key":"2442_CR59","doi-asserted-by":"crossref","unstructured":"Ru, L., Zheng, H., Zhan, Y., & Du, B. (2023). Token contrast for weakly-supervised semantic segmentation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 3093\u20133102)","DOI":"10.1109\/CVPR52729.2023.00302"},{"key":"2442_CR60","doi-asserted-by":"crossref","unstructured":"Su, Y., Sun, R., Lin, G., & Wu, Q. (2021). Context decoupling augmentation for weakly supervised semantic segmentation. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, (pp. 7004\u20137014)","DOI":"10.1109\/ICCV48922.2021.00692"},{"key":"2442_CR61","doi-asserted-by":"crossref","unstructured":"Su, G., Wang, W., Dai, J., & Gool, L. V. (2020). Mining cross-image semantics for weakly supervised semantic segmentation. In: Proceeding of the European Conference on Computer Vision, (pp. 347\u2013365)","DOI":"10.1007\/978-3-030-58536-5_21"},{"key":"2442_CR62","unstructured":"Sun W, Liu Z, Zhang Y, Zhong Y, & Barnes N (2023) An alternative to WSSS? an empirical study of the segment anything model (SAM) on weakly-supervised semantic segmentation problems. arXiv preprint arXiv:2305.01586"},{"key":"2442_CR63","unstructured":"Van\u00a0der Maaten, L., & Hinton, G. (2008). Visualizing data using t-SNE. Journal of Machine Learning Research 9(11)"},{"key":"2442_CR64","doi-asserted-by":"crossref","unstructured":"Vernaza, P., & Chandraker, M. (2017). Learning random-walk label propagation for weakly-supervised semantic segmentation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 7158\u20137166)","DOI":"10.1109\/CVPR.2017.315"},{"key":"2442_CR65","doi-asserted-by":"publisher","first-page":"1736","DOI":"10.1007\/s11263-020-01293-3","volume":"128","author":"X Wang","year":"2020","unstructured":"Wang, X., Liu, S., Ma, H., & Yang, M. (2020a). Weakly-supervised semantic segmentation by iterative affinity learning. International Journal of Computer Vision, 128, 1736\u20131749.","journal-title":"International Journal of Computer Vision"},{"key":"2442_CR66","doi-asserted-by":"crossref","unstructured":"Wang, Y., Zhang, J., Kan, M., Shan, S., & Chen, X. (2020b). Self-supervised equivariant attention mechanism for weakly supervised semantic segmentation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. (12275\u201312284)","DOI":"10.1109\/CVPR42600.2020.01229"},{"key":"2442_CR67","doi-asserted-by":"crossref","unstructured":"Wang, Z., Lu, Y., Li, Q., Tao, X., Guo, Y., Gong, M., & Liu, T. (2022a). Cris: Clip-driven referring image segmentation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR52688.2022.01139"},{"key":"2442_CR68","unstructured":"Wang, P., Yang, A., Men, R., Lin, J., Bai, S., Li, Z., Ma, J., Zhou, C., Zhou, J., & Yang, H. (2022b). Ofa: Unifying architectures, tasks, and modalities through a simple sequence-to-sequence learning framework. In International Conference on Machine Learning, PMLR, (pp. 23318\u201323340)"},{"key":"2442_CR69","doi-asserted-by":"crossref","unstructured":"Wu, T., Huang, J., Gao, G., Wei, X., Wei, X., Luo, X., & Liu, C. H. (2021). Embedded discriminative attention mechanism for weakly supervised semantic segmentation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 16765\u201316774)","DOI":"10.1109\/CVPR46437.2021.01649"},{"key":"2442_CR70","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1016\/j.patcog.2019.01.006","volume":"90","author":"Z Wu","year":"2019","unstructured":"Wu, Z., Shen, C., & van den Hengel, A. (2019). Wider or deeper: Revisiting the resnet model for visual recognition. Pattern Recognition, 90, 119\u2013133.","journal-title":"Pattern Recognition"},{"key":"2442_CR71","doi-asserted-by":"crossref","unstructured":"Xie, J., Hou, X., Ye, K., & Shen, L. (2022a). Clims: Cross language image matching for weakly supervised semantic segmentation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 4483\u20134492)","DOI":"10.1109\/CVPR52688.2022.00444"},{"key":"2442_CR72","doi-asserted-by":"crossref","unstructured":"Xie, J., Xiang, J., Chen, J., Hou, X., Zhao, X., & Shen, L. (2022b). C2am: Contrastive learning of class-agnostic activation map for weakly supervised object localization and semantic segmentation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 989\u2013998)","DOI":"10.1109\/CVPR52688.2022.00106"},{"key":"2442_CR73","doi-asserted-by":"crossref","unstructured":"Xu, J., De\u00a0Mello, S., Liu, S., Byeon, W., Breuel, T., Kautz, J., & Wang, X. (2022a). Groupvit: Semantic segmentation emerges from text supervision. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 18134\u201318144)","DOI":"10.1109\/CVPR52688.2022.01760"},{"key":"2442_CR74","doi-asserted-by":"crossref","unstructured":"Xu, L., Ouyang, W., Bennamoun, M., Boussaid, F., & Xu, D. (2022b). Multi-class token transformer for weakly supervised semantic segmentation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 4310\u20134319)","DOI":"10.1109\/CVPR52688.2022.00427"},{"key":"2442_CR75","doi-asserted-by":"crossref","unstructured":"Xu, L., Ouyang, W., Bennamoun, M., Boussaid, F., & Xu, D. (2023). Learning multi-modal class-specific tokens for weakly supervised dense object localization. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 19596\u201319605)","DOI":"10.1109\/CVPR52729.2023.01877"},{"key":"2442_CR76","doi-asserted-by":"crossref","unstructured":"Xu, L., Ouyang, W., Bennamoun, M., Boussaid, F., Sohel, F., & Xu, D. (2021). Leveraging auxiliary tasks with affinity learning for weakly supervised semantic segmentation. In ICCV, (pp. 6984\u20136993)","DOI":"10.1109\/ICCV48922.2021.00690"},{"key":"2442_CR77","first-page":"736","volume-title":"European Conference on Computer Vision","author":"M Xu","year":"2022","unstructured":"Xu, M., Zhang, Z., Wei, F., Lin, Y., Cao, Y., Hu, H., & Bai, X. (2022). A simple baseline for open-vocabulary semantic segmentation with pre-trained vision-language model. European Conference on Computer Vision (pp. 736\u2013753). Cham: Springer."},{"key":"2442_CR78","doi-asserted-by":"crossref","unstructured":"Yang, X., & Gong, X. (2024). Foundation model assisted weakly supervised semantic segmentation. In Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, (pp. 523\u2013532)","DOI":"10.1109\/WACV57701.2024.00058"},{"key":"2442_CR79","first-page":"393","volume-title":"European Conference on Computer Vision","author":"SH Yoon","year":"2025","unstructured":"Yoon, S. H., Kwon, H., Jeong, J., Park, D., & Yoon, K. J. (2025). Diffusion-guided weakly supervised semantic segmentation. European Conference on Computer Vision (pp. 393\u2013411). Cham: Springer."},{"issue":"11","key":"2442_CR80","doi-asserted-by":"publisher","first-page":"8082","DOI":"10.1109\/TPAMI.2021.3083269","volume":"44","author":"B Zhang","year":"2021","unstructured":"Zhang, B., Xiao, J., Jiao, J., Wei, Y., & Zhao, Y. (2021). Affinity attention graph neural network for weakly supervised semantic segmentation. IEEE Transactions on Pattern Analysis and Machine Intelligence, 44(11), 8082\u20138096.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2442_CR81","first-page":"655","volume":"33","author":"D Zhang","year":"2020","unstructured":"Zhang, D., Zhang, H., Tang, J., Hua, X., & Sun, Q. (2020). Causal intervention for weakly-supervised semantic segmentation. Advances in neural information processing systems, 33, 655\u2013666.","journal-title":"Advances in neural information processing systems"},{"key":"2442_CR82","doi-asserted-by":"crossref","unstructured":"Zhou, B., Khosla, A., Lapedriza, A., Oliva, A., & Torralba, A. (2016). Learning deep features for discriminative localization. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 2921\u20132929)","DOI":"10.1109\/CVPR.2016.319"},{"key":"2442_CR83","first-page":"696","volume-title":"European Conference on Computer Vision","author":"C Zhou","year":"2022","unstructured":"Zhou, C., Loy, C. C., & Dai, B. (2022a). Extract free dense labels from clip. European Conference on Computer Vision (pp. 696\u2013712). Cham: Springer."},{"key":"2442_CR84","doi-asserted-by":"crossref","unstructured":"Zhou, T., Zhang, M., & Zhao, F., Li, J. (2022b). Regional semantic contrast and aggregation for weakly supervised semantic segmentation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (pp. 4299\u20134309)","DOI":"10.1109\/CVPR52688.2022.00426"},{"key":"2442_CR85","doi-asserted-by":"crossref","unstructured":"Zhu, L., Wang, X., Feng, J., Cheng, T., Li, Y., Jiang, B., Zhang, D., & Han, J. (2024). Weakclip: Adapting clip for weakly-supervised semantic segmentation. International Journal of Computer Vision. 1\u201321","DOI":"10.1007\/s11263-024-02224-2"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02442-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-025-02442-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02442-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T14:31:52Z","timestamp":1757169112000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-025-02442-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,9]]},"references-count":85,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2025,8]]}},"alternative-id":["2442"],"URL":"https:\/\/doi.org\/10.1007\/s11263-025-02442-2","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,5,9]]},"assertion":[{"value":"22 April 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 March 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 May 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}