{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,3]],"date-time":"2025-07-03T04:12:30Z","timestamp":1751515950223,"version":"3.41.0"},"reference-count":56,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2025,5,28]],"date-time":"2025-05-28T00:00:00Z","timestamp":1748390400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,5,28]],"date-time":"2025-05-28T00:00:00Z","timestamp":1748390400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62373251","62203306"],"award-info":[{"award-number":["62373251","62203306"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Pattern Anal Applic"],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1007\/s10044-025-01489-8","type":"journal-article","created":{"date-parts":[[2025,5,28]],"date-time":"2025-05-28T06:44:23Z","timestamp":1748414663000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["WintN-CSG: a weakly supervised semantic segmentation network based on basic multimodal large-scale pre-trained models"],"prefix":"10.1007","volume":"28","author":[{"given":"Haotian","family":"Wen","sequence":"first","affiliation":[]},{"given":"Derui","family":"Ding","sequence":"additional","affiliation":[]},{"given":"Wei","family":"Liang","sequence":"additional","affiliation":[]},{"given":"Ying","family":"Sun","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,28]]},"reference":[{"key":"1489_CR1","doi-asserted-by":"publisher","DOI":"10.1007\/s10044-023-01207-2","author":"SJ Qu","year":"2024","unstructured":"Qu SJ, Wang Z, Wu J (2024) FBRNet: a feature fusion and border refinement network for real-time semantic segmentation. Pattern Anal Appl. https:\/\/doi.org\/10.1007\/s10044-023-01207-2","journal-title":"Pattern Anal Appl"},{"issue":"1","key":"1489_CR2","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1007\/s10044-024-01237-4","volume":"27","author":"XG Hu","year":"2024","unstructured":"Hu XG, Feng J, Gong JL (2024) LFFNet: lightweight feature-enhanced fusion network for real-time semantic segmentation of road scenes. Pattern Anal Appl 27(1):27. https:\/\/doi.org\/10.1007\/s10044-024-01237-4","journal-title":"Pattern Anal Appl"},{"issue":"1","key":"1489_CR3","doi-asserted-by":"publisher","first-page":"2305411","DOI":"10.1080\/21642583.2024.2305411","volume":"12","author":"J Teng","year":"2024","unstructured":"Teng J (2024) A hybrid approach of deep learning to forecast financial performance: from unsupervised to supervised. Syst Sci Control Eng 12(1):2305411","journal-title":"Syst Sci Control Eng"},{"key":"1489_CR4","doi-asserted-by":"crossref","unstructured":"Ahn J, Kwak S (2018) Learning pixel-level semantic affinity with image-level supervision for weakly supervised semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. p 4981\u20134990","DOI":"10.1109\/CVPR.2018.00523"},{"issue":"4","key":"1489_CR5","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1007\/s10044-024-01251-6","volume":"27","author":"YB Feng","year":"2024","unstructured":"Feng YB, Hafiane A, Laurent H (2024) A weakly supervised end-to-end framework for semantic segmentation of cancerous area in whole slide image. Pattern Anal Appl 27(4):35. https:\/\/doi.org\/10.1007\/s10044-024-01251-6","journal-title":"Pattern Anal Appl"},{"issue":"1","key":"1489_CR6","doi-asserted-by":"publisher","first-page":"2328546","DOI":"10.1080\/21642583.2024.2328546","volume":"12","author":"J Zhang","year":"2024","unstructured":"Zhang J, Zhang Z, Zhang H et al (2024) MASNet: mixed attention Siamese network for visual object tracking. Syst Sci Control Eng 12(1):2328546","journal-title":"Syst Sci Control Eng"},{"key":"1489_CR7","doi-asserted-by":"crossref","unstructured":"Bearman A, Russakovsky O, Ferrari V et al (2016) What\u2019s the point: Semantic segmentation with point supervision. In: Proceedings of the European Conference on Computer Vision. p 549\u2013565","DOI":"10.1007\/978-3-319-46478-7_34"},{"key":"1489_CR8","doi-asserted-by":"crossref","unstructured":"Lin D, Dai J, Jia J et al (2016) Scribblesup: Scribble-supervised convolutional networks for semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. p 3159\u20133167","DOI":"10.1109\/CVPR.2016.344"},{"key":"1489_CR9","doi-asserted-by":"crossref","unstructured":"Vernaza P, Chandraker M (2017) Learning random-walk label propagation for weakly-supervised semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, p 7158\u20137166","DOI":"10.1109\/CVPR.2017.315"},{"key":"1489_CR10","doi-asserted-by":"crossref","unstructured":"[5] Papandreou G, Chen LC, Murphy KP et al (2015) Weakly and semi-supervised learning of a deep convolutional network for semantic image segmentation. In: Proceedings of the IEEE International Conference on Computer Vision, p 1742\u20131750","DOI":"10.1109\/ICCV.2015.203"},{"key":"1489_CR11","doi-asserted-by":"crossref","unstructured":"Dai J, He K, Sun J (2015) Boxsup: Exploiting bounding boxes to supervise convolutional networks for semantic segmentation. In: Proceedings of the IEEE International Conference on Computer Vision, p 1635\u20131643","DOI":"10.1109\/ICCV.2015.191"},{"key":"1489_CR12","doi-asserted-by":"crossref","unstructured":"Zhou B, Khosla A, Lapedriza A et al (2016) Learning deep features for discriminative localization. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, p 2921\u20132929","DOI":"10.1109\/CVPR.2016.319"},{"key":"1489_CR13","doi-asserted-by":"crossref","unstructured":"Selvaraju RR, Cogswell M, Das A et al (2017) Grad-cam: Visual explanations from deep networks via gradient-based localization. In: Proceedings of the IEEE International Conference on Computer Vision, p 618\u2013626","DOI":"10.1109\/ICCV.2017.74"},{"issue":"12","key":"1489_CR14","doi-asserted-by":"publisher","first-page":"25419","DOI":"10.1109\/TITS.2022.3141107","volume":"23","author":"Y Li","year":"2022","unstructured":"Li Y, Sun J, Li Y (2022) Weakly-supervised semantic segmentation network with iterative dcrf. IEEE Trans Intell Transp Syst 23(12):25419\u201325426","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"1489_CR15","doi-asserted-by":"crossref","unstructured":"Sun G, Wang W, Dai J et al (2020) Mining cross-image semantics for weakly supervised semantic segmentation. In: Proceedings of the 16th European Conference on the Computer Vision, Glasgow, UK, Part II 16, p 347\u2013365","DOI":"10.1007\/978-3-030-58536-5_21"},{"key":"1489_CR16","doi-asserted-by":"crossref","unstructured":"Jiang PT, Hou Q, Cao Y et al (2019) Integral object mining via online attention accumulation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, p 2070\u20132079","DOI":"10.1109\/ICCV.2019.00216"},{"key":"1489_CR17","doi-asserted-by":"crossref","unstructured":"Araslanov N, Roth S (2020) Single-stage semantic segmentation from image labels. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, p 4253\u20134262","DOI":"10.1109\/CVPR42600.2020.00431"},{"key":"1489_CR18","first-page":"12765","volume":"34","author":"B Zhang","year":"2020","unstructured":"Zhang B, Xiao J, Wei Y et al (2020) Reliability does matter: an end-to-end weakly supervised semantic segmentation approach. Proc AAAI Conf Artif Intell 34:12765\u201312772","journal-title":"Proc AAAI Conf Artif Intell"},{"issue":"4","key":"1489_CR19","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1007\/s10044-024-01378-6","volume":"27","author":"R Gong","year":"2024","unstructured":"Gong R, Zhang Y, Zhang YH et al (2024) Demsasa: micro-video scene classification based on denoising multi-shots association self-attention. Pattern Anal Appl 27(4):155. https:\/\/doi.org\/10.1007\/s10044-024-01378-6","journal-title":"Pattern Anal Appl"},{"key":"1489_CR20","doi-asserted-by":"crossref","unstructured":"Wu T, Huang J, Gao G et al (2021) Embedded discriminative attention mechanism for weakly supervised semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, p. 16765\u201316774","DOI":"10.1109\/CVPR46437.2021.01649"},{"key":"1489_CR21","doi-asserted-by":"crossref","unstructured":"Lin CS, Wang CY, Wang YC et al (2024) Semples: Semantic prompt learning for weakly-supervised semantic segmentation. arXiv preprint arXiv:2401.11791. Accessed 12 Jan 2025","DOI":"10.1109\/WACV61041.2025.00849"},{"key":"1489_CR22","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R et al (2009) Imagenet: a large-scale hierarchical image database. In: the 2009 IEEE Conference on Computer Vision and Pattern Recognition, p 248\u2013255","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"1489_CR23","unstructured":"Radford A, Kim JW, Hallacy C et al (2021) Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, p 8748\u20138763"},{"key":"1489_CR24","doi-asserted-by":"crossref","unstructured":"Kirillov A, Mintun E, Ravi N et al (2023) Segment anything. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, p 4015\u20134026","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"1489_CR25","doi-asserted-by":"crossref","unstructured":"Liu S, Zeng Z, Ren T et al (2025) Grounding dino: Marrying dino with grounded pre-training for open-set object detection. In: European Conference on Computer Vision, p 38\u201355","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"1489_CR26","doi-asserted-by":"crossref","unstructured":"Lin Y, Chen M, Wang W et al (2023) Clip is also an efficient segmenter: A text-driven approach for weakly supervised semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, p 15305\u201315314","DOI":"10.1109\/CVPR52729.2023.01469"},{"key":"1489_CR27","doi-asserted-by":"crossref","unstructured":"Yang X, Gong X (2024) Foundation model assisted weakly supervised semantic segmentation. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, p 523\u2013532","DOI":"10.1109\/WACV57701.2024.00058"},{"key":"1489_CR28","unstructured":"Sun W, Liu Z, Zhang Y et al (2023) An alternative to wsss? An empirical study of the segment anything model (sam) on weakly-supervised semantic segmentation problems. arXiv preprint arXiv:2305.01586. Accessed 18 Jan 2023"},{"key":"1489_CR29","unstructured":"Zhu L, Li Y, Fang J et al (2023) Weaktr: Exploring plain vision transformer for weakly-supervised semantic segmentation. arXiv preprint arXiv:2304.01184. Accessed 27 April 2023"},{"key":"1489_CR30","doi-asserted-by":"crossref","unstructured":"Xie J, Hou X, Ye K et al (2022) Clims: Cross language image matching for weakly supervised semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, p 4483\u20134492","DOI":"10.1109\/CVPR52688.2022.00444"},{"key":"1489_CR31","doi-asserted-by":"crossref","unstructured":"Wang Y, Zhang J, Kan M et al (2020) Self-supervised equivariant attention mechanism for weakly supervised semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, p 12275\u201312284","DOI":"10.1109\/CVPR42600.2020.01229"},{"key":"1489_CR32","doi-asserted-by":"crossref","unstructured":"Chang YT, Wang Q, Hung WC et al (2020) Weakly-supervised semantic segmentation via sub-category exploration. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, p 8991\u20139000","DOI":"10.1109\/CVPR42600.2020.00901"},{"key":"1489_CR33","doi-asserted-by":"crossref","unstructured":"Ru L, Du B, Wu C (2021) Learning visual words for weakly-supervised semantic segmentation. In: Proceedings of the 30th International Joint Conference on Artificial Intelligence, p 982\u2013988","DOI":"10.24963\/ijcai.2021\/136"},{"key":"1489_CR34","doi-asserted-by":"crossref","unstructured":"Deng F, Ming Y, Lyu B (2024) CCE-Net: Causal convolution embedding network for streaming automatic speech recognition. Int J Netw Dyn Intell. 3(3) art no 100019","DOI":"10.53941\/ijndi.2024.100019"},{"issue":"1","key":"1489_CR35","volume":"3","author":"S Hu","year":"2024","unstructured":"Hu S, Lu J, Zhou S (2024) Learning regression distribution: information diffusion from template to search for visual object tracking. Int J Netw Dyn Intell 3(1):100006","journal-title":"Int J Netw Dyn Intell"},{"issue":"4","key":"1489_CR36","volume":"2","author":"X Yue","year":"2023","unstructured":"Yue X, Chen J, Zhong G (2023) Metal surface defect detection based on metal-YOLOX. Int J Netw Dyn Intell 2(4):100020","journal-title":"Int J Netw Dyn Intell"},{"key":"1489_CR37","doi-asserted-by":"crossref","unstructured":"Kweon H, Yoon SH, Kim H et al (2021) Unlocking the potential of ordinary classifier: Class-specific adversarial erasing framework for weakly supervised semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, p 6994\u20137003","DOI":"10.1109\/ICCV48922.2021.00691"},{"key":"1489_CR38","doi-asserted-by":"crossref","unstructured":"Wei Y, Feng J, Liang X et al (2017) Object region mining with adversarial erasing: A simple classification to semantic segmentation approach. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, p 1568\u20131576","DOI":"10.1109\/CVPR.2017.687"},{"key":"1489_CR39","unstructured":"Hou Q, Jiang P, Wei Y et al (2018) Self-erasing network for integral object attention. In: Proceedings of the 32nd International Conference on Neural Information Processing Systems, p 547\u2013557"},{"key":"1489_CR40","doi-asserted-by":"crossref","unstructured":"Yao Y, Chen T, Xie GS et al (2021) Non-salient region object mining for weakly supervised semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, p 2623\u20132632","DOI":"10.1109\/CVPR46437.2021.00265"},{"key":"1489_CR41","doi-asserted-by":"crossref","unstructured":"Du Y, Fu Z, Liu Q et al (2022) Weakly supervised semantic segmentation by pixel-to-prototype contrast. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, p 4320\u20134329","DOI":"10.1109\/CVPR52688.2022.00428"},{"key":"1489_CR42","doi-asserted-by":"crossref","unstructured":"Liu Z, Lin Y, Cao Y et al (2021) Swin transformer: Hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, p 10012\u201310022","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"1489_CR43","doi-asserted-by":"crossref","unstructured":"Hariharan B, Arbel\u00e1ez, P, Bourdev L et al (2011) Semantic contours from inverse detectors. In: International Conference on Computer Vision, p 991\u2013998","DOI":"10.1109\/ICCV.2011.6126343"},{"key":"1489_CR44","unstructured":"Chen T, Mai Z, Li R et al (2023) Segment anything model (SAM) enhanced pseudo labels for weakly supervised semantic segmentation. arXiv preprint arXiv:2305.05803. Accessed 3 Nov 2023"},{"key":"1489_CR45","unstructured":"Jiang PT, Yang Y (2023) Segment anything is a good pseudo-label generator for weakly supervised semantic segmentation. arXiv preprint arXiv:2305.01275. Accessed 2 May 2023"},{"key":"1489_CR46","doi-asserted-by":"crossref","unstructured":"Lee J, Kim E, Yoon S (2021) Anti-adversarially manipulated attributions for weakly and semi-supervised semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, p 4071\u20134080. Accessed 16 April 2021","DOI":"10.1109\/CVPR46437.2021.00406"},{"key":"1489_CR47","doi-asserted-by":"crossref","unstructured":"Xu L, Ouyang W, Bennamoun M et al (2022) Multi-class token transformer for weakly supervised semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, p 4310\u20134319. Accessed 6 Mar 2022","DOI":"10.1109\/CVPR52688.2022.00427"},{"key":"1489_CR48","doi-asserted-by":"crossref","unstructured":"Rossetti S, Zappia D, Sanzari M et al (2022) Max pooling with vision transformers reconciles class and shape in weakly supervised semantic segmentation. In: Proceedings of the European Conference on Computer Vision, p 446\u2013463","DOI":"10.1007\/978-3-031-20056-4_26"},{"key":"1489_CR49","doi-asserted-by":"crossref","unstructured":"Murugesan B, Hussain R, Bhattacharya R et al (2024) Prompting classes: Exploring the power of prompt class learning in weakly supervised semantic segmentation. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, p 291\u2013302","DOI":"10.1109\/WACV57701.2024.00036"},{"key":"1489_CR50","first-page":"10762","volume":"34","author":"J Fan","year":"2020","unstructured":"Fan J, Zhang Z, Tan T et al (2020) Cross-image affinity net for weakly supervised semantic segmentation. Proc AAAI Conf Artif Intell 34:10762\u201310769","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"1489_CR51","unstructured":"Touvron H, Cord M, Douze M et al (2021) Training data-efficient image transformers & distillation through attention. In: International Conference on Machine Learning, p 10347\u201310357. Accessed 15 Jan 2021"},{"key":"1489_CR52","doi-asserted-by":"crossref","unstructured":"Lee S, Lee M, Lee J et al (2021) Railroad is not a train: Saliency as pseudo-pixel supervision for weakly supervised semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, p 5495\u20135505","DOI":"10.1109\/CVPR46437.2021.00545"},{"key":"1489_CR53","doi-asserted-by":"crossref","unstructured":"Jiang PT, Yang Y, Hou Q et al (2022) L2G: A simple local-to-global knowledge transfer framework for weakly supervised semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, p 16886\u201316896","DOI":"10.1109\/CVPR52688.2022.01638"},{"key":"1489_CR54","doi-asserted-by":"crossref","unstructured":"Chen Q, Yang L, Lai JH et al (2022) Self-supervised image-specific prototype exploration for weakly supervised semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, p 4288\u20134298","DOI":"10.1109\/CVPR52688.2022.00425"},{"key":"1489_CR55","doi-asserted-by":"crossref","unstructured":"Ru L, Zheng H, Zhan Y et al (2023) Token contrast for weakly-supervised semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, p 3093\u20133102","DOI":"10.1109\/CVPR52729.2023.00302"},{"key":"1489_CR56","doi-asserted-by":"crossref","unstructured":"Cheng Z, Qiao P, Li K et al (2023) Out-of-candidate rectification for weakly supervised semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, p 23673\u201323684","DOI":"10.1109\/CVPR52729.2023.02267"}],"container-title":["Pattern Analysis and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10044-025-01489-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10044-025-01489-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10044-025-01489-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,2]],"date-time":"2025-07-02T16:41:54Z","timestamp":1751474514000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10044-025-01489-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,28]]},"references-count":56,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2025,6]]}},"alternative-id":["1489"],"URL":"https:\/\/doi.org\/10.1007\/s10044-025-01489-8","relation":{},"ISSN":["1433-7541","1433-755X"],"issn-type":[{"type":"print","value":"1433-7541"},{"type":"electronic","value":"1433-755X"}],"subject":[],"published":{"date-parts":[[2025,5,28]]},"assertion":[{"value":"2 March 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 May 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 May 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no potential Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"111"}}