{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T17:43:40Z","timestamp":1772905420303,"version":"3.50.1"},"publisher-location":"Cham","reference-count":79,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031727832","type":"print"},{"value":"9783031727849","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72784-9_18","type":"book-chapter","created":{"date-parts":[[2024,9,29]],"date-time":"2024-09-29T07:01:50Z","timestamp":1727593310000},"page":"314-333","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["WPS-SAM: Towards Weakly-Supervised Part Segmentation with\u00a0Foundation Models"],"prefix":"10.1007","author":[{"given":"Xin-Jian","family":"Wu","sequence":"first","affiliation":[]},{"given":"Ruisong","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Jie","family":"Qin","sequence":"additional","affiliation":[]},{"given":"Shijie","family":"Ma","sequence":"additional","affiliation":[]},{"given":"Cheng-Lin","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,9,30]]},"reference":[{"key":"18_CR1","doi-asserted-by":"crossref","unstructured":"Ahn, J., Kwak, S.: Learning pixel-level semantic affinity with image-level supervision for weakly supervised semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4981\u20134990 (2018)","DOI":"10.1109\/CVPR.2018.00523"},{"key":"18_CR2","doi-asserted-by":"crossref","unstructured":"Araslanov, N., Roth, S.: Single-stage semantic segmentation from image labels. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4253\u20134262 (2020)","DOI":"10.1109\/CVPR42600.2020.00431"},{"issue":"2","key":"18_CR3","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1037\/0033-295X.94.2.115","volume":"94","author":"I Biederman","year":"1987","unstructured":"Biederman, I.: Recognition-by-components: a theory of human image understanding. Psychol. Rev. 94(2), 115 (1987)","journal-title":"Psychol. Rev."},{"key":"18_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1007\/978-3-030-58452-8_13","volume-title":"Computer Vision \u2013 ECCV 2020","author":"N Carion","year":"2020","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 213\u2013229. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13"},{"key":"18_CR5","unstructured":"Chen, J., Yang, Z., Zhang, L.: Semantic segment anything (2023)"},{"key":"18_CR6","doi-asserted-by":"crossref","unstructured":"Chen, K., et al.: Rsprompter: learning to prompt for remote sensing instance segmentation based on visual foundation model. arXiv preprint arXiv:2306.16269 (2023)","DOI":"10.1109\/TGRS.2024.3356074"},{"key":"18_CR7","unstructured":"Chen, L.C., Papandreou, G., Schroff, F., Adam, H.: Rethinking atrous convolution for semantic image segmentation. arXiv preprint arXiv:1706.05587 (2017)"},{"key":"18_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"833","DOI":"10.1007\/978-3-030-01234-2_49","volume-title":"Computer Vision \u2013 ECCV 2018","author":"L-C Chen","year":"2018","unstructured":"Chen, L.-C., Zhu, Y., Papandreou, G., Schroff, F., Adam, H.: Encoder-decoder with atrous separable convolution for semantic image segmentation. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11211, pp. 833\u2013851. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01234-2_49"},{"key":"18_CR9","unstructured":"Chen, T., Mai, Z., Li, R., Chao, W.l.: Segment anything model (SAM) enhanced pseudo labels for weakly supervised semantic segmentation. arXiv preprint arXiv:2305.05803 (2023)"},{"key":"18_CR10","doi-asserted-by":"crossref","unstructured":"Chen, X., Mottaghi, R., Liu, X., Fidler, S., Urtasun, R., Yuille, A.: Detect what you can: Detecting and representing objects using holistic models and body parts. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1971\u20131978 (2014)","DOI":"10.1109\/CVPR.2014.254"},{"key":"18_CR11","first-page":"17864","volume":"34","author":"B Cheng","year":"2021","unstructured":"Cheng, B., Schwing, A., Kirillov, A.: Per-pixel classification is not all you need for semantic segmentation. Adv. Neural. Inf. Process. Syst. 34, 17864\u201317875 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"18_CR12","doi-asserted-by":"crossref","unstructured":"Dai, J., He, K., Sun, J.: Boxsup: exploiting bounding boxes to supervise convolutional networks for semantic segmentation. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1635\u20131643 (2015)","DOI":"10.1109\/ICCV.2015.191"},{"key":"18_CR13","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: A large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"18_CR14","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"18_CR15","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. In: International Conference on Learning Representations (2021)"},{"key":"18_CR16","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"18_CR17","doi-asserted-by":"crossref","unstructured":"Du, Y., Fu, Z., Liu, Q., Wang, Y.: Weakly supervised semantic segmentation by pixel-to-prototype contrast. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4320\u20134329 (2022)","DOI":"10.1109\/CVPR52688.2022.00428"},{"key":"18_CR18","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C.K., Winn, J., Zisserman, A.: The pascal visual object classes (VOC) challenge. Int. J. Comput. Vision 88, 303\u2013338 (2010)","journal-title":"Int. J. Comput. Vision"},{"key":"18_CR19","doi-asserted-by":"crossref","unstructured":"Fan, J., Zhang, Z., Tan, T., Song, C., Xiao, J.: Cian: cross-image affinity net for weakly supervised semantic segmentation. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a034, pp. 10762\u201310769 (2020)","DOI":"10.1609\/aaai.v34i07.6705"},{"issue":"4","key":"18_CR20","doi-asserted-by":"publisher","first-page":"594","DOI":"10.1109\/TPAMI.2006.79","volume":"28","author":"L Fei-Fei","year":"2006","unstructured":"Fei-Fei, L., Fergus, R., Perona, P.: One-shot learning of object categories. IEEE Trans. Pattern Anal. Mach. Intell. 28(4), 594\u2013611 (2006)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"9","key":"18_CR21","doi-asserted-by":"publisher","first-page":"1627","DOI":"10.1109\/TPAMI.2009.167","volume":"32","author":"PF Felzenszwalb","year":"2009","unstructured":"Felzenszwalb, P.F., Girshick, R.B., McAllester, D., Ramanan, D.: Object detection with discriminatively trained part-based models. IEEE Trans. Pattern Anal. Mach. Intell. 32(9), 1627\u20131645 (2009)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"18_CR22","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1023\/B:VISI.0000042934.15159.49","volume":"61","author":"PF Felzenszwalb","year":"2005","unstructured":"Felzenszwalb, P.F., Huttenlocher, D.P.: Pictorial structures for object recognition. Int. J. Comput. Vision 61, 55\u201379 (2005)","journal-title":"Int. J. Comput. Vision"},{"issue":"1","key":"18_CR23","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1109\/T-C.1973.223602","volume":"100","author":"MA Fischler","year":"1973","unstructured":"Fischler, M.A., Elschlager, R.A.: The representation and matching of pictorial structures. IEEE Trans. Comput. 100(1), 67\u201392 (1973)","journal-title":"IEEE Trans. Comput."},{"key":"18_CR24","unstructured":"Girshick, R., Felzenszwalb, P., McAllester, D.: Object detection with grammar models. In: Advances in Neural Information Processing Systems, vol. 24 (2011)"},{"key":"18_CR25","doi-asserted-by":"crossref","unstructured":"He, J., Chen, J., Lin, M.X., Yu, Q., Yuille, A.L.: Compositor: bottom-up clustering and compositing for robust part and object segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11259\u201311268 (2023)","DOI":"10.1109\/CVPR52729.2023.01083"},{"key":"18_CR26","series-title":"LNCS","first-page":"128","volume-title":"ECCV 2022","author":"J He","year":"2022","unstructured":"He, J., et al.: PartimageNet: a large, high-quality dataset of parts. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13668, pp. 128\u2013145. Springer, Cham (2022)"},{"key":"18_CR27","doi-asserted-by":"crossref","unstructured":"He, K., Chen, X., Xie, S., Li, Y., Doll\u00e1r, P., Girshick, R.: Masked autoencoders are scalable vision learners. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16000\u201316009 (2022)","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"18_CR28","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"18_CR29","unstructured":"Hinton, G., Vinyals, O., Dean, J.: Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 (2015)"},{"key":"18_CR30","doi-asserted-by":"crossref","unstructured":"Hung, W.C., Jampani, V., Liu, S., Molchanov, P., Yang, M.H., Kautz, J.: Scops: self-supervised co-part segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 869\u2013878 (2019)","DOI":"10.1109\/CVPR.2019.00096"},{"key":"18_CR31","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11633-023-1385-0","volume":"21","author":"W Ji","year":"2024","unstructured":"Ji, W., Li, J., Bi, Q., Liu, T., Li, W., Cheng, L.: Segment anything is not always perfect: an investigation of SAM on different real-world applications. Mach. Intell. Res. 21, 1\u201314 (2024)","journal-title":"Mach. Intell. Res."},{"key":"18_CR32","unstructured":"Jia, C., et al.: Scaling up visual and vision-language representation learning with noisy text supervision. In: International Conference on Machine Learning, pp. 4904\u20134916. PMLR (2021)"},{"key":"18_CR33","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1007\/s11633-022-1378-4","volume":"20","author":"R Jiang","year":"2023","unstructured":"Jiang, R., Zhu, R., Su, H., Li, Y., Xie, Y., Zou, W.: Deep learning-based moving object segmentation: Recent progress and research prospects. Mach. Intell. Res. 20, 335\u2013369 (2023)","journal-title":"Mach. Intell. Res."},{"key":"18_CR34","doi-asserted-by":"crossref","unstructured":"Khoreva, A., Benenson, R., Hosang, J., Hein, M., Schiele, B.: Simple does it: weakly supervised instance and semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 876\u2013885 (2017)","DOI":"10.1109\/CVPR.2017.181"},{"key":"18_CR35","doi-asserted-by":"crossref","unstructured":"Kirillov, A., et al.: Segment anything. arXiv:2304.02643 (2023)","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"18_CR36","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"290","DOI":"10.1007\/978-3-030-58583-9_18","volume-title":"Computer Vision \u2013 ECCV 2020","author":"V Kulharia","year":"2020","unstructured":"Kulharia, V., Chandra, S., Agrawal, A., Torr, P., Tyagi, A.: Box2Seg: attention weighted loss and discriminative feature learning for weakly supervised segmentation. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12372, pp. 290\u2013308. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58583-9_18"},{"key":"18_CR37","doi-asserted-by":"crossref","unstructured":"Kweon, H., Yoon, S.H., Kim, H., Park, D., Yoon, K.J.: Unlocking the potential of ordinary classifier: class-specific adversarial erasing framework for weakly supervised semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6994\u20137003 (2021)","DOI":"10.1109\/ICCV48922.2021.00691"},{"key":"18_CR38","doi-asserted-by":"crossref","unstructured":"Kweon, H., Yoon, S.H., Yoon, K.J.: Weakly supervised semantic segmentation via adversarial learning of classifier and reconstructor. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11329\u201311339 (2023)","DOI":"10.1109\/CVPR52729.2023.01090"},{"issue":"6266","key":"18_CR39","doi-asserted-by":"publisher","first-page":"1332","DOI":"10.1126\/science.aab3050","volume":"350","author":"BM Lake","year":"2015","unstructured":"Lake, B.M., Salakhutdinov, R., Tenenbaum, J.B.: Human-level concept learning through probabilistic program induction. Science 350(6266), 1332\u20131338 (2015)","journal-title":"Science"},{"key":"18_CR40","doi-asserted-by":"crossref","unstructured":"Lee, J., Kim, E., Yoon, S.: Anti-adversarially manipulated attributions for weakly and semi-supervised semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4071\u20134080 (2021)","DOI":"10.1109\/CVPR46437.2021.00406"},{"key":"18_CR41","doi-asserted-by":"crossref","unstructured":"Lee, S., Lee, M., Lee, J., Shim, H.: Railroad is not a train: saliency as pseudo-pixel supervision for weakly supervised semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5495\u20135505 (2021)","DOI":"10.1109\/CVPR46437.2021.00545"},{"key":"18_CR42","unstructured":"Li, F., : Semantic-SAM: segment and recognize anything at any granularity. arXiv preprint arXiv:2307.04767 (2023)"},{"key":"18_CR43","doi-asserted-by":"crossref","unstructured":"Li, J., Fan, J., Zhang, Z.: Towards noiseless object contours for weakly supervised semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16856\u201316865 (2022)","DOI":"10.1109\/CVPR52688.2022.01635"},{"key":"18_CR44","doi-asserted-by":"crossref","unstructured":"Li, R., He, C., Zhang, Y., Li, S., Chen, L., Zhang, L.: Sim: semantic-aware instance mask generation for box-supervised instance segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7193\u20137203 (2023)","DOI":"10.1109\/CVPR52729.2023.00695"},{"key":"18_CR45","doi-asserted-by":"crossref","unstructured":"Lin, Y., : Clip is also an efficient segmenter: a text-driven approach for weakly supervised semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15305\u201315314 (2023)","DOI":"10.1109\/CVPR52729.2023.01469"},{"key":"18_CR46","doi-asserted-by":"crossref","unstructured":"Liu, S., Zhang, L., Yang, X., Su, H., Zhu, J.: Unsupervised part segmentation through disentangling appearance and shape. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8355\u20138364 (2021)","DOI":"10.1109\/CVPR46437.2021.00825"},{"key":"18_CR47","unstructured":"Liu, Y., Zhu, M., Li, H., Chen, H., Wang, X., Shen, C.: Matcher: segment anything with one shot using all-purpose feature matching. arXiv preprint arXiv:2305.13310 (2023)"},{"key":"18_CR48","doi-asserted-by":"crossref","unstructured":"Liu, Y., Zhang, J., She, Z., Kheradmand, A., Armand, M.: SAMM (segment any medical model): a 3D slicer integration to sam. arXiv preprint arXiv:2304.05622 (2023)","DOI":"10.1117\/12.3001069"},{"key":"18_CR49","unstructured":"Liu, Y., et al.: Roberta: a robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)"},{"key":"18_CR50","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: SWIN transformer: Hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"18_CR51","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3431\u20133440 (2015)","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"18_CR52","unstructured":"Oquab, M., et\u00a0al.: Dinov2: learning robust visual features without supervision. arXiv preprint arXiv:2304.07193 (2023)"},{"key":"18_CR53","doi-asserted-by":"crossref","unstructured":"Pan, T.Y., Liu, Q., Chao, W.L., Price, B.: Towards open-world segmentation of parts. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15392\u201315401 (2023)","DOI":"10.1109\/CVPR52729.2023.01477"},{"key":"18_CR54","doi-asserted-by":"crossref","unstructured":"Pathak, D., Krahenbuhl, P., Darrell, T.: Constrained convolutional neural networks for weakly supervised segmentation. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1796\u20131804 (2015)","DOI":"10.1109\/ICCV.2015.209"},{"key":"18_CR55","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"18_CR56","doi-asserted-by":"crossref","unstructured":"Ramanathan, V., et\u00a0al.: Paco: Parts and attributes of common objects. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7141\u20137151 (2023)","DOI":"10.1109\/CVPR52729.2023.00690"},{"key":"18_CR57","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: Cortes, C., Lawrence, N., Lee, D., Sugiyama, M., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol.\u00a028 (2015)"},{"key":"18_CR58","doi-asserted-by":"crossref","unstructured":"Rong, S., Tu, B., Wang, Z., Li, J.: Boundary-enhanced co-training for weakly supervised semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19574\u201319584 (2023)","DOI":"10.1109\/CVPR52729.2023.01875"},{"key":"18_CR59","doi-asserted-by":"crossref","unstructured":"Ru, L., Zhan, Y., Yu, B., Du, B.: Learning affinity from attention: end-to-end weakly-supervised semantic segmentation with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16846\u201316855 (2022)","DOI":"10.1109\/CVPR52688.2022.01634"},{"key":"18_CR60","doi-asserted-by":"crossref","unstructured":"Song, C., Huang, Y., Ouyang, W., Wang, L.: Box-driven class-wise region masking and filling rate guided loss for weakly supervised semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3136\u20133145 (2019)","DOI":"10.1109\/CVPR.2019.00325"},{"key":"18_CR61","doi-asserted-by":"crossref","unstructured":"Stewart, R., Andriluka, M., Ng, A.Y.: End-to-end people detection in crowded scenes. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2325\u20132333 (2016)","DOI":"10.1109\/CVPR.2016.255"},{"key":"18_CR62","unstructured":"Sun, W., Liu, Z., Zhang, Y., Zhong, Y., Barnes, N.: An alternative to WSSS? An empirical study of the segment anything model (SAM) on weakly-supervised semantic segmentation problems. arXiv preprint arXiv:2305.01586 (2023)"},{"key":"18_CR63","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"524","DOI":"10.1007\/978-3-030-01270-0_31","volume-title":"Computer Vision \u2013 ECCV 2018","author":"M Tang","year":"2018","unstructured":"Tang, M., Perazzi, F., Djelouah, A., Ayed, I.B., Schroers, C., Boykov, Y.: On regularized losses for weakly-supervised CNN segmentation. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11220, pp. 524\u2013540. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01270-0_31"},{"key":"18_CR64","doi-asserted-by":"crossref","unstructured":"Thewlis, J., Bilen, H., Vedaldi, A.: Unsupervised learning of object landmarks by factorized spatial embeddings. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5916\u20135925 (2017)","DOI":"10.1109\/ICCV.2017.348"},{"key":"18_CR65","doi-asserted-by":"crossref","unstructured":"Wang, X., Zhang, X., Cao, Y., Wang, W., Shen, C., Huang, T.: SEGGPT: segmenting everything in context. arXiv preprint arXiv:2304.03284 (2023)","DOI":"10.1109\/ICCV51070.2023.00110"},{"key":"18_CR66","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1007\/3-540-45054-8_2","volume-title":"Computer Vision - ECCV 2000","author":"M Weber","year":"2000","unstructured":"Weber, M., Welling, M., Perona, P.: Unsupervised learning of models for recognition. In: Vernon, D. (ed.) ECCV 2000, Part I. LNCS, vol. 1842, pp. 18\u201332. Springer, Heidelberg (2000). https:\/\/doi.org\/10.1007\/3-540-45054-8_2"},{"key":"18_CR67","unstructured":"Wei, M., Yue, X., Zhang, W., Kong, S., Liu, X., Pang, J.: OV-parts: towards open-vocabulary part segmentation. arXiv preprint arXiv:2310.05107 (2023)"},{"key":"18_CR68","doi-asserted-by":"crossref","unstructured":"Wei, Y., Feng, J., Liang, X., Cheng, M.M., Zhao, Y., Yan, S.: Object region mining with adversarial erasing: A simple classification to semantic segmentation approach. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1568\u20131576 (2017)","DOI":"10.1109\/CVPR.2017.687"},{"key":"18_CR69","unstructured":"Xie, C., Ren, D., Wang, L., Zuo, W.: Learning class-agnostic pseudo mask generation for box-supervised semantic segmentation. arXiv preprint arXiv:2103.05463 (2021)"},{"key":"18_CR70","first-page":"12077","volume":"34","author":"E Xie","year":"2021","unstructured":"Xie, E., Wang, W., Yu, Z., Anandkumar, A., Alvarez, J.M., Luo, P.: SegFormer: simple and efficient design for semantic segmentation with transformers. Adv. Neural. Inf. Process. Syst. 34, 12077\u201312090 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"18_CR71","doi-asserted-by":"crossref","unstructured":"Xiong, Y., et\u00a0al.: EfficientSAM: leveraged masked image pretraining for efficient segment anything. arXiv preprint arXiv:2312.00863 (2023)","DOI":"10.1109\/CVPR52733.2024.01525"},{"key":"18_CR72","doi-asserted-by":"crossref","unstructured":"Yang, X., Gong, X.: Foundation model assisted weakly supervised semantic segmentation. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 523\u2013532 (2024)","DOI":"10.1109\/WACV57701.2024.00058"},{"key":"18_CR73","doi-asserted-by":"publisher","first-page":"14413","DOI":"10.1109\/ACCESS.2020.2966647","volume":"8","author":"Q Yao","year":"2020","unstructured":"Yao, Q., Gong, X.: Saliency guided self-attention network for weakly and semi-supervised semantic segmentation. IEEE Access 8, 14413\u201314423 (2020)","journal-title":"IEEE Access"},{"key":"18_CR74","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1007\/BF00127169","volume":"8","author":"AL Yuille","year":"1992","unstructured":"Yuille, A.L., Hallinan, P.W., Cohen, D.S.: Feature extraction from faces using deformable templates. Int. J. Comput. Vision 8, 99\u2013111 (1992)","journal-title":"Int. J. Comput. Vision"},{"key":"18_CR75","doi-asserted-by":"crossref","unstructured":"Zhang, B., Xiao, J., Wei, Y., Sun, M., Huang, K.: Reliability does matter: an end-to-end weakly supervised semantic segmentation approach. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a034, pp. 12765\u201312772 (2020)","DOI":"10.1609\/aaai.v34i07.6971"},{"key":"18_CR76","unstructured":"Zhao, X., et al.: Fast segment anything. arXiv preprint arXiv:2306.12156 (2023)"},{"key":"18_CR77","doi-asserted-by":"crossref","unstructured":"Zhou, T., Zhang, M., Zhao, F., Li, J.: Regional semantic contrast and aggregation for weakly supervised semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4299\u20134309 (2022)","DOI":"10.1109\/CVPR52688.2022.00426"},{"key":"18_CR78","doi-asserted-by":"crossref","unstructured":"Zhu, S.C., Mumford, D., et\u00a0al.: A stochastic grammar of images. Found. Trends\u00ae Comput. Graphics Vis. 2(4), 259\u2013362 (2007)","DOI":"10.1561\/0600000018"},{"key":"18_CR79","unstructured":"Zou, X., et al.: Segment everything everywhere all at once. arXiv preprint arXiv:2304.06718 (2023)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72784-9_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,29]],"date-time":"2024-09-29T07:51:23Z","timestamp":1727596283000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72784-9_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,30]]},"ISBN":["9783031727832","9783031727849"],"references-count":79,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72784-9_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,9,30]]},"assertion":[{"value":"30 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}