{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T04:16:10Z","timestamp":1773202570523,"version":"3.50.1"},"reference-count":95,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2023,8,12]],"date-time":"2023-08-12T00:00:00Z","timestamp":1691798400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,8,12]],"date-time":"2023-08-12T00:00:00Z","timestamp":1691798400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1007\/s11263-023-01862-2","type":"journal-article","created":{"date-parts":[[2023,8,12]],"date-time":"2023-08-12T11:02:07Z","timestamp":1691838127000},"page":"3252-3271","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":14,"title":["Toward Practical Weakly Supervised Semantic Segmentation via Point-Level Supervision"],"prefix":"10.1007","volume":"131","author":[{"given":"Junsong","family":"Fan","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2648-3875","authenticated-orcid":false,"given":"Zhaoxiang","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,8,12]]},"reference":[{"key":"1862_CR1","doi-asserted-by":"crossref","unstructured":"Ahn, J., Cho, S., & Kwak, S. (2019). Weakly supervised learning of instance segmentation with inter-pixel relations. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 2209\u20132218).","DOI":"10.1109\/CVPR.2019.00231"},{"key":"1862_CR2","doi-asserted-by":"crossref","unstructured":"Ahn, J. & Kwak, S. (2018). Learning pixel-level semantic affinity with image-level supervision for weakly supervised semantic segmentation. arXiv preprint arXiv:1803.10464.","DOI":"10.1109\/CVPR.2018.00523"},{"key":"1862_CR3","doi-asserted-by":"crossref","unstructured":"Araslanov, N. & Roth, S. (2020). Single-stage semantic segmentation from image labels. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 4253\u20134262).","DOI":"10.1109\/CVPR42600.2020.00431"},{"key":"1862_CR4","doi-asserted-by":"crossref","unstructured":"Bearman, A., Russakovsky, O., Ferrari, V., & Fei-Fei, L. (2016). What\u2019s the point: Semantic segmentation with point supervision. In European conference on computer vision (pp. 549\u2013565). Springer.","DOI":"10.1007\/978-3-319-46478-7_34"},{"key":"1862_CR5","doi-asserted-by":"crossref","unstructured":"Chaudhry, A., Dokania, P.\u00a0K., & Torr, P.\u00a0H. (2017). Discovering class-specific pixels for weakly-supervised semantic segmentation. arXiv preprint arXiv:1707.05821.","DOI":"10.5244\/C.31.20"},{"key":"1862_CR6","unstructured":"Chen, L.-C., Papandreou, G., Kokkinos, I., Murphy, K., & Yuille, A.\u00a0L. (2014). Semantic image segmentation with deep convolutional nets and fully connected CRFs. arXiv preprint arXiv:1412.7062."},{"issue":"4","key":"1862_CR7","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"L-C Chen","year":"2018","unstructured":"Chen, L.-C., Papandreou, G., Kokkinos, I., Murphy, K., & Yuille, A. L. (2018). DeepLab: Semantic image segmentation with deep convolutional nets, Atrous convolution, and fully connected CRFs. IEEE Transactions on Pattern Analysis and Machine Intelligence, 40(4), 834\u2013848.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1862_CR8","unstructured":"Chen, L.-C., Papandreou, G., Schroff, F., & Adam, H. (2017). Rethinking Atrous convolution for semantic image segmentation. arXiv preprint arXiv:1706.05587."},{"key":"1862_CR9","doi-asserted-by":"crossref","unstructured":"Chen, L.-C., Zhu, Y., Papandreou, G., Schroff, F., & Adam, H. (2018b). Encoder-decoder with Atrous separable convolution for semantic image segmentation. In Proceedings of the European conference on computer vision (ECCV) (pp. 801\u2013818).","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"1862_CR10","unstructured":"Chen, T., Kornblith, S., Norouzi, M., & Hinton, G. (2020). A simple framework for contrastive learning of visual representations. arXiv preprint arXiv:2002.05709."},{"key":"1862_CR11","doi-asserted-by":"crossref","unstructured":"Chen, X., Yuan, Y., Zeng, G., & Wang, J. (2021). Semi-supervised semantic segmentation with cross pseudo supervision. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 2613\u20132622).","DOI":"10.1109\/CVPR46437.2021.00264"},{"key":"1862_CR12","doi-asserted-by":"crossref","unstructured":"Cordts, M., Omran, M., Ramos, S., Rehfeld, T., Enzweiler, M., Benenson, R., Franke, U., Roth, S., & Schiele, B. (2016). The cityscapes dataset for semantic urban scene understanding. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 3213\u20133223).","DOI":"10.1109\/CVPR.2016.350"},{"key":"1862_CR13","doi-asserted-by":"crossref","unstructured":"Dai, J., He, K., & Sun, J. (2015). BoxSup: Exploiting bounding boxes to supervise convolutional networks for semantic segmentation. In Proceedings of the IEEE international conference on computer vision (pp. 1635\u20131643).","DOI":"10.1109\/ICCV.2015.191"},{"key":"1862_CR14","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Li, K., & Fei-Fei, L. (2009). ImageNet: A large-scale hierarchical image database. In Computer vision and pattern recognition, 2009. CVPR 2009. IEEE conference on (pp. 248\u2013255). IEEE.","DOI":"10.1109\/CVPR.2009.5206848"},{"issue":"2","key":"1862_CR15","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C. K., Winn, J., & Zisserman, A. (2010). The Pascal visual object classes (VOC) challenge. International Journal of Computer Vision, 88(2), 303\u2013338.","journal-title":"International Journal of Computer Vision"},{"key":"1862_CR16","doi-asserted-by":"crossref","unstructured":"Fan, J., Zhang, Z., Song, C., & Tan, T. (2020a). Learning integral objects with intra-class discriminator for weakly-supervised semantic segmentation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 4283\u20134292).","DOI":"10.1109\/CVPR42600.2020.00434"},{"key":"1862_CR17","doi-asserted-by":"crossref","unstructured":"Fan, J., Zhang, Z., & Tan, T. (2020b). Employing multi-estimations for weakly-supervised semantic segmentation. In European conference on computer vision (pp. 332\u2013348). Springer.","DOI":"10.1007\/978-3-030-58520-4_20"},{"key":"1862_CR18","doi-asserted-by":"crossref","unstructured":"Fan, J., Zhang, Z., Tan, T., Song, C., & Xiao, J. (2020). CIAN: Cross-image affinity net for weakly supervised semantic segmentation. In Proceedings of the AAAI conference on artificial intelligence (vol. 34, pp. 10762\u201310769).","DOI":"10.1609\/aaai.v34i07.6705"},{"key":"1862_CR19","doi-asserted-by":"crossref","unstructured":"Fan, R., Hou, Q., Cheng, M.-M., Yu, G., Martin, R.\u00a0R., & Hu, S.-M. (2018). Associating inter-image salient instances for weakly supervised semantic segmentation. In Proceedings of the European conference on computer vision (ECCV) (pp. 367\u2013383).","DOI":"10.1007\/978-3-030-01240-3_23"},{"issue":"2","key":"1862_CR20","doi-asserted-by":"publisher","first-page":"167","DOI":"10.1023\/B:VISI.0000022288.19776.77","volume":"59","author":"PF Felzenszwalb","year":"2004","unstructured":"Felzenszwalb, P. F., & Huttenlocher, D. P. (2004). Efficient graph-based image segmentation. International Journal of Computer Vision, 59(2), 167\u2013181.","journal-title":"International Journal of Computer Vision"},{"key":"1862_CR21","doi-asserted-by":"crossref","unstructured":"Fu, J., Liu, J., Tian, H., Li, Y., Bao, Y., Fang, Z., & Lu, H. (2018). Dual attention network for scene segmentation. arXiv preprint arXiv:1809.02983.","DOI":"10.1109\/CVPR.2019.00326"},{"key":"1862_CR22","doi-asserted-by":"crossref","unstructured":"Ge, W., Guo, S., Huang, W., & Scott, M.\u00a0R. (2019). Label-PEnet: Sequential label propagation and enhancement networks for weakly supervised instance segmentation. In Proceedings of the IEEE\/CVF international conference on computer vision (pp. 3345\u20133354).","DOI":"10.1109\/ICCV.2019.00344"},{"issue":"1","key":"1862_CR23","first-page":"723","volume":"13","author":"A Gretton","year":"2012","unstructured":"Gretton, A., Borgwardt, K. M., Rasch, M. J., Sch\u00f6lkopf, B., & Smola, A. (2012). A kernel two-sample test. The Journal of Machine Learning Research, 13(1), 723\u2013773.","journal-title":"The Journal of Machine Learning Research"},{"key":"1862_CR24","doi-asserted-by":"crossref","unstructured":"Hadsell, R., Chopra, S., & LeCun, Y. (2006). Dimensionality reduction by learning an invariant mapping. In 2006 IEEE computer society conference on computer vision and pattern recognition (CVPR\u201906) (vol. 2, pp. 1735\u20131742). IEEE.","DOI":"10.1109\/CVPR.2006.100"},{"key":"1862_CR25","doi-asserted-by":"crossref","unstructured":"Hariharan, B., Arbel\u00e1ez, P., Bourdev, L., Maji, S., & Malik, J. (2011). Semantic contours from inverse detectors. In 2011 international conference on computer vision (pp. 991\u2013998). IEEE.","DOI":"10.1109\/ICCV.2011.6126343"},{"key":"1862_CR26","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S., & Girshick, R. (2020). Momentum contrast for unsupervised visual representation learning. In Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition (pp. 9729\u20139738).","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"1862_CR27","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., & Girshick, R. (2017). Mask R-CNN. In Proceedings of the IEEE international conference on computer vision (pp. 2961\u20132969).","DOI":"10.1109\/ICCV.2017.322"},{"key":"1862_CR28","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 770\u2013778).","DOI":"10.1109\/CVPR.2016.90"},{"key":"1862_CR29","unstructured":"H\u00e9naff, O.\u00a0J., Srinivas, A., De\u00a0Fauw, J., Razavi, A., Doersch, C., Eslami, S., & Oord, A. v.\u00a0d. (2019). Data-efficient image recognition with contrastive predictive coding. arXiv preprint arXiv:1905.09272."},{"key":"1862_CR30","doi-asserted-by":"crossref","unstructured":"Hoffer, E. & Ailon, N. (2015). Deep metric learning using triplet network. In International workshop on similarity-based pattern recognition (pp. 84\u201392). Springer.","DOI":"10.1007\/978-3-319-24261-3_7"},{"key":"1862_CR31","doi-asserted-by":"crossref","unstructured":"Hou, Q., Cheng, M.-M., Hu, X., Borji, A., Tu, Z., & Torr, P.\u00a0H. (2017). Deeply supervised salient object detection with short connections. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 3203\u20133212).","DOI":"10.1109\/CVPR.2017.563"},{"key":"1862_CR32","unstructured":"Hou, Q., Jiang, P.-T., Wei, Y., & Cheng, M.-M. (2018). Self-erasing network for integral object attention. arXiv preprint arXiv:1810.09821."},{"key":"1862_CR33","doi-asserted-by":"crossref","unstructured":"Huang, Z., Wang, X., Huang, L., Huang, C., Wei, Y., & Liu, W. (2019). CCNet: Criss-cross attention for semantic segmentation. In Proceedings of the IEEE international conference on computer vision (pp. 603\u2013612).","DOI":"10.1109\/ICCV.2019.00069"},{"key":"1862_CR34","doi-asserted-by":"crossref","unstructured":"Huang, Z., Wang, X., Wang, J., Liu, W., & Wang, J. (2018). Weakly-supervised semantic segmentation network with deep seeded region growing. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 7014\u20137023).","DOI":"10.1109\/CVPR.2018.00733"},{"key":"1862_CR35","unstructured":"Hung, W.-C., Tsai, Y.-H., Liou, Y.-T., Lin, Y.-Y., & Yang, M.-H. (2018). Adversarial learning for semi-supervised semantic segmentation. arXiv preprint arXiv:1802.07934."},{"key":"1862_CR36","doi-asserted-by":"crossref","unstructured":"Hwang, J., Kim, S., Son, J., & Han, B. (2021). Weakly supervised instance segmentation by deep community learning. In Proceedings of the IEEE\/CVF winter conference on applications of computer vision (pp. 1020\u20131029).","DOI":"10.1109\/WACV48630.2021.00106"},{"key":"1862_CR37","doi-asserted-by":"crossref","unstructured":"Jiang, H., Wang, J., Yuan, Z., Wu, Y., Zheng, N., & Li, S. (2013). Salient object detection: A discriminative regional feature integration approach. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 2083\u20132090).","DOI":"10.1109\/CVPR.2013.271"},{"key":"1862_CR38","doi-asserted-by":"crossref","unstructured":"Jiang, P.-T., Hou, Q., Cao, Y., Cheng, M.-M., Wei, Y., & Xiong, H.-K. (2019). Integral object mining via online attention accumulation. In Proceedings of the IEEE international conference on computer vision (pp. 2070\u20132079).","DOI":"10.1109\/ICCV.2019.00216"},{"key":"1862_CR39","doi-asserted-by":"crossref","unstructured":"Jin, Z., Gong, T., Yu, D., Chu, Q., Wang, J., Wang, C., & Shao, J. (2021). Mining contextual information beyond image for semantic segmentation. In Proceedings of the IEEE\/CVF international conference on computer vision (pp. 7231\u20137241).","DOI":"10.1109\/ICCV48922.2021.00714"},{"key":"1862_CR40","doi-asserted-by":"crossref","unstructured":"Ke, Z., Qiu, D., Li, K., Yan, Q., & Lau, R.\u00a0W. (2020). Guided collaborative training for pixel-wise semi-supervised learning. In European conference on computer vision (pp. 429\u2013445). Springer.","DOI":"10.1007\/978-3-030-58601-0_26"},{"key":"1862_CR41","doi-asserted-by":"crossref","unstructured":"Khoreva, A., Benenson, R., Hosang, J., Hein, M., & Schiele, B. (2017). Simple does it: Weakly supervised instance and semantic segmentation. In CVPR (vol. 1, p. 3).","DOI":"10.1109\/CVPR.2017.181"},{"key":"1862_CR42","doi-asserted-by":"crossref","unstructured":"Kim, B., Yoo, Y., Rhee, C.\u00a0E., & Kim, J. (2022). Beyond semantic to instance segmentation: Weakly-supervised instance segmentation via semantic knowledge transfer and self-refinement. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 4278\u20134287).","DOI":"10.1109\/CVPR52688.2022.00424"},{"key":"1862_CR43","doi-asserted-by":"crossref","unstructured":"Kolesnikov, A. & Lampert, C.\u00a0H. (2016). Seed, expand and constrain: Three principles for weakly-supervised image segmentation. In European conference on computer vision (pp. 695\u2013711). Springer.","DOI":"10.1007\/978-3-319-46493-0_42"},{"key":"1862_CR44","unstructured":"Kr\u00e4henb\u00fchl, P. & Koltun, V. (2011a). Efficient inference in fully connected CRFs with gaussian edge potentials. In Advances in neural information processing systems (pp. 109\u2013117)"},{"key":"1862_CR45","unstructured":"Kr\u00e4henb\u00fchl, P. & Koltun, V. (2011b). Efficient inference in fully connected CRFs with gaussian edge potentials. In Advances in neural information processing systems (pp. 109\u2013117)."},{"key":"1862_CR46","doi-asserted-by":"crossref","unstructured":"Kwak, S., Hong, S., & Han, B. (2017). Weakly supervised semantic segmentation using superpixel pooling network. In Proceedings of the AAAI conference on artificial intelligence (vol. 31).","DOI":"10.1609\/aaai.v31i1.11213"},{"key":"1862_CR47","doi-asserted-by":"crossref","unstructured":"Laradji, I.\u00a0H., Vazquez, D., & Schmidt, M. (2019). Where are the masks: Instance segmentation with image-level supervision. arXiv preprint arXiv:1907.01430.","DOI":"10.1109\/ICIP40778.2020.9190782"},{"key":"1862_CR48","doi-asserted-by":"crossref","unstructured":"Lee, J., Kim, E., Lee, S., Lee, J., & Yoon, S. (2019). FickleNet: Weakly and semi-supervised semantic image segmentation using stochastic inference. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 5267\u20135276).","DOI":"10.1109\/CVPR.2019.00541"},{"key":"1862_CR49","doi-asserted-by":"crossref","unstructured":"Lee, J., Kim, E., & Yoon, S. (2021). Anti-adversarially manipulated attributions for weakly and semi-supervised semantic segmentation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 4071\u20134080).","DOI":"10.1109\/CVPR46437.2021.00406"},{"key":"1862_CR50","doi-asserted-by":"crossref","unstructured":"Lee, M., Kim, D., & Shim, H. (2022). Threshold matters in WSSS: Manipulating the activation for the robust and accurate segmentation model against thresholds. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 4330\u20134339).","DOI":"10.1109\/CVPR52688.2022.00429"},{"key":"1862_CR51","doi-asserted-by":"crossref","unstructured":"Li, Q., Arnab, A., & Torr, P.\u00a0H. (2018). Weakly-and semi-supervised panoptic segmentation. In Proceedings of the European conference on computer vision (ECCV) (pp. 102\u2013118).","DOI":"10.1007\/978-3-030-01267-0_7"},{"key":"1862_CR52","doi-asserted-by":"crossref","unstructured":"Lin, D., Dai, J., Jia, J., He, K., & Sun, J. (2016). ScribbleSup: Scribble-supervised convolutional networks for semantic segmentation. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 3159\u20133167).","DOI":"10.1109\/CVPR.2016.344"},{"key":"1862_CR53","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., & Belongie, S. (2017). Feature pyramid networks for object detection. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 2117\u20132125).","DOI":"10.1109\/CVPR.2017.106"},{"issue":"3","key":"1862_CR54","doi-asserted-by":"publisher","first-page":"1415","DOI":"10.1109\/TPAMI.2020.3023152","volume":"44","author":"Y Liu","year":"2020","unstructured":"Liu, Y., Wu, Y.-H., Wen, P., Shi, Y., Qiu, Y., & Cheng, M.-M. (2020). Leveraging instance-, image-and dataset-level information for weakly supervised instance segmentation. IEEE Transactions on Pattern Analysis and Machine Intelligence, 44(3), 1415\u20131428.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1862_CR55","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., & Darrell, T. (2015). Fully convolutional networks for semantic segmentation. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 3431\u20133440).","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"1862_CR56","doi-asserted-by":"crossref","unstructured":"Maninis, K.-K., Caelles, S., Pont-Tuset, J., & Van\u00a0Gool, L. (2018). Deep extreme cut: From extreme points to object segmentation. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 616\u2013625).","DOI":"10.1109\/CVPR.2018.00071"},{"issue":"4","key":"1862_CR57","doi-asserted-by":"publisher","first-page":"1369","DOI":"10.1109\/TPAMI.2019.2960224","volume":"43","author":"S Mittal","year":"2019","unstructured":"Mittal, S., Tatarchenko, M., & Brox, T. (2019). Semi-supervised semantic segmentation with high-and low-level consistency. IEEE Transactions on Pattern Analysis and Machine Intelligence, 43(4), 1369\u20131379.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"12","key":"1862_CR58","doi-asserted-by":"publisher","first-page":"3258","DOI":"10.1049\/ipr2.12558","volume":"16","author":"K Moghalles","year":"2022","unstructured":"Moghalles, K., Li, H.-C., Al-Huda, Z., Raza, A., & Malik, A. (2022). Weakly supervised building semantic segmentation via superpixel-CRF with initial deep seeds guiding. IET Image Processing, 16(12), 3258\u20133267.","journal-title":"IET Image Processing"},{"key":"1862_CR59","doi-asserted-by":"crossref","unstructured":"Papandreou, G., Chen, L.-C., Murphy, K., & Yuille, A.\u00a0L. (2015). Weakly- and semi-supervised learning of a DCNN for semantic image segmentation. arXiv preprint arXiv:1502.02734.","DOI":"10.1109\/ICCV.2015.203"},{"key":"1862_CR60","doi-asserted-by":"crossref","unstructured":"Pathak, D., Kr\u00e4henb\u00fchl, P., & Darrell, T. (2015). Constrained convolutional neural networks for weakly supervised segmentation. In Proceedings of the IEEE international conference on computer vision (pp. 1796\u20131804).","DOI":"10.1109\/ICCV.2015.209"},{"key":"1862_CR61","doi-asserted-by":"crossref","unstructured":"Pinheiro, P.\u00a0O. & Collobert, R. (2015). From image-level to pixel-level labeling with convolutional networks. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 1713\u20131721).","DOI":"10.1109\/CVPR.2015.7298780"},{"issue":"1","key":"1862_CR62","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1109\/TPAMI.2016.2537320","volume":"39","author":"J Pont-Tuset","year":"2016","unstructured":"Pont-Tuset, J., Arbelaez, P., Barron, J. T., Marques, F., & Malik, J. (2016). Multiscale combinatorial grouping for image segmentation and object proposal generation. IEEE Transactions on Pattern Analysis and Machine Intelligence, 39(1), 128\u2013140.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1862_CR63","doi-asserted-by":"crossref","unstructured":"Qian, R., Wei, Y., Shi, H., Li, J., Liu, J., & Huang, T. (2019). Weakly supervised scene parsing with point-based distance metric learning. In Proceedings of the AAAI conference on artificial intelligence (vol. 33, pp. 8843\u20138850).","DOI":"10.1609\/aaai.v33i01.33018843"},{"key":"1862_CR64","doi-asserted-by":"crossref","unstructured":"Ru, L., Zhan, Y., Yu, B., & Du, B. (2022). Learning affinity from attention: End-to-end weakly-supervised semantic segmentation with transformers. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 16846\u201316855).","DOI":"10.1109\/CVPR52688.2022.01634"},{"key":"1862_CR65","doi-asserted-by":"crossref","unstructured":"Schroff, F., Kalenichenko, D., & Philbin, J. (2015). FaceNet: A unified embedding for face recognition and clustering. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 815\u2013823).","DOI":"10.1109\/CVPR.2015.7298682"},{"key":"1862_CR66","unstructured":"Simonyan, K. & Zisserman, A. (2014). Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556."},{"key":"1862_CR67","doi-asserted-by":"crossref","unstructured":"Song, C., Huang, Y., Ouyang, W., & Wang, L. (2019). Box-driven class-wise region masking and filling rate guided loss for weakly supervised semantic segmentation. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 3136\u20133145).","DOI":"10.1109\/CVPR.2019.00325"},{"key":"1862_CR68","unstructured":"Su, H., Deng, J., & Fei-Fei, L. (2012). Crowdsourcing annotations for visual object detection. In Workshops at the twenty-sixth AAAI conference on artificial intelligence."},{"key":"1862_CR69","doi-asserted-by":"crossref","unstructured":"Sun, G., Wang, W., Dai, J., & Van\u00a0Gool, L. (2020). Mining cross-image semantics for weakly supervised semantic segmentation. In European conference on computer vision (pp. 347\u2013365). Springer.","DOI":"10.1007\/978-3-030-58536-5_21"},{"key":"1862_CR70","doi-asserted-by":"crossref","unstructured":"Tang, M., Perazzi, F., Djelouah, A., Ayed, I.\u00a0B., Schroers, C., & Boykov, Y. (2018). On regularized losses for weakly-supervised CNN segmentation. In Proceedings of the European conference on computer vision (ECCV).","DOI":"10.1109\/CVPR.2018.00195"},{"key":"1862_CR71","doi-asserted-by":"crossref","unstructured":"Vernaza, P. & Chandraker, M. (2017). Learning random-walk label propagation for weakly-supervised semantic segmentation. In The IEEE conference on computer vision and pattern recognition (CVPR) (vol 3, p 3).","DOI":"10.1109\/CVPR.2017.315"},{"key":"1862_CR72","doi-asserted-by":"crossref","unstructured":"Wang, W., Zhou, T., Yu, F., Dai, J., Konukoglu, E., & Van\u00a0Gool, L. (2021). Exploring cross-image pixel contrast for semantic segmentation. In Proceedings of the IEEE\/CVF international conference on computer vision (pp. 7303\u20137313).","DOI":"10.1109\/ICCV48922.2021.00721"},{"key":"1862_CR73","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1016\/j.neucom.2019.11.019","volume":"381","author":"X Wang","year":"2020","unstructured":"Wang, X., Ma, H., & You, S. (2020). Deep clustering for weakly-supervised semantic segmentation in autonomous driving scenes. Neurocomputing, 381, 20\u201328.","journal-title":"Neurocomputing"},{"key":"1862_CR74","doi-asserted-by":"crossref","unstructured":"Wang, X., You, S., Li, X., & Ma, H. (2018). Weakly-supervised semantic segmentation by iteratively mining common object features. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 1354\u20131362).","DOI":"10.1109\/CVPR.2018.00147"},{"key":"1862_CR75","doi-asserted-by":"crossref","unstructured":"Wei, Y., Feng, J., Liang, X., Cheng, M.-M., Zhao, Y., & Yan, S. (2017a). Object region mining with adversarial erasing: A simple classification to semantic segmentation approach. In IEEE CVPR (vol. 1, p. 3).","DOI":"10.1109\/CVPR.2017.687"},{"issue":"11","key":"1862_CR76","doi-asserted-by":"publisher","first-page":"2314","DOI":"10.1109\/TPAMI.2016.2636150","volume":"39","author":"Y Wei","year":"2017","unstructured":"Wei, Y., Liang, X., Chen, Y., Shen, X., Cheng, M.-M., Feng, J., Zhao, Y., & Yan, S. (2017). STC: A simple to complex framework for weakly-supervised semantic segmentation. IEEE Transactions on Pattern Analysis and Machine Intelligence, 39(11), 2314\u20132320.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1862_CR77","doi-asserted-by":"crossref","unstructured":"Wei, Y., Xiao, H., Shi, H., Jie, Z., Feng, J., & Huang, T.\u00a0S. (2018). Revisiting dilated convolution: A simple approach for weakly- and semi-supervised semantic segmentation. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 7268\u20137277).","DOI":"10.1109\/CVPR.2018.00759"},{"key":"1862_CR78","doi-asserted-by":"crossref","unstructured":"Wen, Y., Zhang, K., Li, Z., & Qiao, Y. (2016). A discriminative feature learning approach for deep face recognition. In European conference on computer vision (pp. 499\u2013515). Springer.","DOI":"10.1007\/978-3-319-46478-7_31"},{"key":"1862_CR79","doi-asserted-by":"crossref","unstructured":"Wu, T., Huang, J., Gao, G., Wei, X., Wei, X., Luo, X., & Liu, C.\u00a0H. (2021). Embedded discriminative attention mechanism for weakly supervised semantic segmentation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 16765\u201316774).","DOI":"10.1109\/CVPR46437.2021.01649"},{"key":"1862_CR80","first-page":"12077","volume":"34","author":"E Xie","year":"2021","unstructured":"Xie, E., Wang, W., Yu, Z., Anandkumar, A., Alvarez, J. M., & Luo, P. (2021). SegFormer: Simple and efficient design for semantic segmentation with transformers. Advances in Neural Information Processing Systems, 34, 12077\u201312090.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"1862_CR81","doi-asserted-by":"crossref","unstructured":"Xie, J., Hou, X., Ye, K., & Shen, L. (2022). CLIMS: Cross language image matching for weakly supervised semantic segmentation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 4483\u20134492).","DOI":"10.1109\/CVPR52688.2022.00444"},{"key":"1862_CR82","doi-asserted-by":"crossref","unstructured":"Xing, F.\u00a0Z., Cambria, E., Huang, W.-B., & Xu, Y. (2016). Weakly supervised semantic segmentation with superpixel embedding. In 2016 IEEE international conference on image processing (ICIP) (pp. 1269\u20131273). IEEE.","DOI":"10.1109\/ICIP.2016.7532562"},{"key":"1862_CR83","doi-asserted-by":"crossref","unstructured":"Yang, F., Sun, Q., Jin, H., & Zhou, Z. (2020). Superpixel segmentation with fully convolutional networks. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 13964\u201313973).","DOI":"10.1109\/CVPR42600.2020.01398"},{"key":"1862_CR84","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2021.108504","volume":"124","author":"S Yi","year":"2022","unstructured":"Yi, S., Ma, H., Wang, X., Hu, T., Li, X., & Wang, Y. (2022). Weakly-supervised semantic segmentation with superpixel guided local and global consistency. Pattern Recognition, 124, 108504.","journal-title":"Pattern Recognition"},{"key":"1862_CR85","unstructured":"Yuan, Y. & Wang, J. (2018). OCNet: Object context network for scene parsing. arXiv preprint arXiv:1809.00916"},{"key":"1862_CR86","unstructured":"Zeng, Y., Zhuge, Y., Lu, H., & Zhang, L. (2019). Joint learning of saliency detection and weakly supervised semantic segmentation. In Proceedings of the IEEE international conference on computer vision (pp. 7223\u20137233)."},{"key":"1862_CR87","doi-asserted-by":"crossref","unstructured":"Zhang, B., Xiao, J., Wei, Y., Sun, M., & Huang, K. (2020). Reliability does matter: An end-to-end weakly supervised semantic segmentation approach. In Proceedings of the AAAI Conference on Artificial Intelligence (vol. 34, pp. 12765\u201312772).","DOI":"10.1609\/aaai.v34i07.6971"},{"key":"1862_CR88","first-page":"3285","volume":"34","author":"F Zhang","year":"2021","unstructured":"Zhang, F., Torr, P., Ranftl, R., & Richter, S. (2021). Looking beyond single images for contrastive semantic segmentation learning. Advances in Neural Information Processing Systems, 34, 3285\u20133297.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"1862_CR89","doi-asserted-by":"crossref","unstructured":"Zhang, S., Liew, J.\u00a0H., Wei, Y., Wei, S., & Zhao, Y. (2020b). Interactive object segmentation with inside-outside guidance. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 12234\u201312244).","DOI":"10.1109\/CVPR42600.2020.01225"},{"key":"1862_CR90","doi-asserted-by":"crossref","unstructured":"Zhang, X., Peng, Z., Zhu, P., Zhang, T., Li, C., Zhou, H., & Jiao, L. (2021b). Adaptive affinity loss and erroneous pseudo-label refinement for weakly supervised semantic segmentation. In Proceedings of the 29th ACM international conference on multimedia (pp. 5463\u20135472).","DOI":"10.1145\/3474085.3475675"},{"key":"1862_CR91","doi-asserted-by":"crossref","unstructured":"Zhao, H., Shi, J., Qi, X., Wang, X., & Jia, J. (2017). Pyramid scene parsing network. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 2881\u20132890).","DOI":"10.1109\/CVPR.2017.660"},{"key":"1862_CR92","doi-asserted-by":"crossref","unstructured":"Zhou, B., Khosla, A., Lapedriza, A., Oliva, A., & Torralba, A. (2016a). Learning deep features for discriminative localization. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 2921\u20132929).","DOI":"10.1109\/CVPR.2016.319"},{"key":"1862_CR93","doi-asserted-by":"crossref","unstructured":"Zhou, B., Zhao, H., Puig, X., Fidler, S., Barriuso, A., & Torralba, A. (2016b). Semantic understanding of scenes through the ade20k dataset. arXiv preprint arXiv:1608.05442.","DOI":"10.1109\/CVPR.2017.544"},{"key":"1862_CR94","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Zhu, Y., Ye, Q., Qiu, Q., & Jiao, J. (2018). Weakly supervised instance segmentation using class peak response. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 3791\u20133800).","DOI":"10.1109\/CVPR.2018.00399"},{"key":"1862_CR95","doi-asserted-by":"crossref","unstructured":"Zhu, Y., Zhou, Y., Xu, H., Ye, Q., Doermann, D., & Jiao, J. (2019). Learning instance activation maps for weakly supervised instance segmentation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 3116\u20133125).","DOI":"10.1109\/CVPR.2019.00323"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-023-01862-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-023-01862-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-023-01862-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T14:09:00Z","timestamp":1698415740000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-023-01862-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,12]]},"references-count":95,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2023,12]]}},"alternative-id":["1862"],"URL":"https:\/\/doi.org\/10.1007\/s11263-023-01862-2","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,8,12]]},"assertion":[{"value":"2 September 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 July 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 August 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}