{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,2]],"date-time":"2026-03-02T04:14:04Z","timestamp":1772424844865,"version":"3.50.1"},"reference-count":47,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100018625","name":"Science and Technology Innovation Plan Of Shanghai Science and Technology Commission","doi-asserted-by":"publisher","award":["22511106005"],"award-info":[{"award-number":["22511106005"]}],"id":[{"id":"10.13039\/501100018625","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Image and Vision Computing"],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1016\/j.imavis.2025.105893","type":"journal-article","created":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T07:25:46Z","timestamp":1768202746000},"page":"105893","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":1,"special_numbering":"C","title":["Disentangling co-occurrence with class-specific banks for Weakly Supervised Semantic Segmentation"],"prefix":"10.1016","volume":"167","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-6041-3665","authenticated-orcid":false,"given":"Hang","family":"Yao","sequence":"first","affiliation":[]},{"given":"Yuanchen","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Kequan","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Jide","family":"Li","sequence":"additional","affiliation":[]},{"given":"Chao","family":"Yin","sequence":"additional","affiliation":[]},{"given":"Zihang","family":"Li","sequence":"additional","affiliation":[]},{"given":"Xiaoqiang","family":"Li","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.imavis.2025.105893_b1","doi-asserted-by":"crossref","unstructured":"Y. Wu, X. Ye, K. Yang, J. Li, X. Li, DuPL: Dual Student with Trustworthy Progressive Learning for Robust Weakly Supervised Semantic Segmentation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2024, pp. 3534\u20133543.","DOI":"10.1109\/CVPR52733.2024.00339"},{"key":"10.1016\/j.imavis.2025.105893_b2","doi-asserted-by":"crossref","unstructured":"Y. Wu, X. Li, S. Dai, J. Li, T. Liu, S. Xie, Hierarchical Semantic Contrast for Weakly Supervised Semantic Segmentation, in: Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence, IJCAI-23, 2023, pp. 1542\u20131550.","DOI":"10.24963\/ijcai.2023\/171"},{"key":"10.1016\/j.imavis.2025.105893_b3","doi-asserted-by":"crossref","unstructured":"Y. Wu, X. Li, J. Li, K. Yang, P. Zhu, S. Zhang, DINO is Also a Semantic Guider: Exploiting Class-aware Affinity for Weakly Supervised Semantic Segmentation, in: Proceedings of the 32nd ACM International Conference on Multimedia, 2024, pp. 1389\u20131397.","DOI":"10.1145\/3664647.3681710"},{"key":"10.1016\/j.imavis.2025.105893_b4","doi-asserted-by":"crossref","unstructured":"D. Lin, J. Dai, J. Jia, K. He, J. Sun, Scribblesup: Scribble-supervised convolutional networks for semantic segmentation, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2016, pp. 3159\u20133167.","DOI":"10.1109\/CVPR.2016.344"},{"key":"10.1016\/j.imavis.2025.105893_b5","doi-asserted-by":"crossref","unstructured":"P. Vernaza, M. Chandraker, Learning random-walk label propagation for weakly-supervised semantic segmentation, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2017, pp. 7158\u20137166.","DOI":"10.1109\/CVPR.2017.315"},{"key":"10.1016\/j.imavis.2025.105893_b6","doi-asserted-by":"crossref","unstructured":"J. Lee, J. Yi, C. Shin, S. Yoon, Bbam: Bounding box attribution map for weakly supervised semantic and instance segmentation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2021, pp. 2643\u20132652.","DOI":"10.1109\/CVPR46437.2021.00267"},{"key":"10.1016\/j.imavis.2025.105893_b7","doi-asserted-by":"crossref","unstructured":"Y. Oh, B. Kim, B. Ham, Background-aware pooling and noise-aware loss for weakly-supervised semantic segmentation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2021, pp. 6913\u20136922.","DOI":"10.1109\/CVPR46437.2021.00684"},{"key":"10.1016\/j.imavis.2025.105893_b8","doi-asserted-by":"crossref","unstructured":"J. Xie, X. Hou, K. Ye, L. Shen, Clims: Cross language image matching for weakly supervised semantic segmentation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 4483\u20134492.","DOI":"10.1109\/CVPR52688.2022.00444"},{"key":"10.1016\/j.imavis.2025.105893_b9","doi-asserted-by":"crossref","unstructured":"H. Kweon, S.H. Yoon, K.J. Yoon, Weakly Supervised Semantic Segmentation via Adversarial Learning of Classifier and Reconstructor, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 11329\u201311339.","DOI":"10.1109\/CVPR52729.2023.01090"},{"key":"10.1016\/j.imavis.2025.105893_b10","doi-asserted-by":"crossref","unstructured":"S.H. Yoon, H. Kwon, H. Kim, K.J. Yoon, Class Tokens Infusion for Weakly Supervised Semantic Segmentation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2024, pp. 3595\u20133605.","DOI":"10.1109\/CVPR52733.2024.00345"},{"key":"10.1016\/j.imavis.2025.105893_b11","doi-asserted-by":"crossref","unstructured":"L. Ru, Y. Zhan, B. Yu, B. Du, Learning affinity from attention: end-to-end weakly-supervised semantic segmentation with transformers, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 16846\u201316855.","DOI":"10.1109\/CVPR52688.2022.01634"},{"key":"10.1016\/j.imavis.2025.105893_b12","doi-asserted-by":"crossref","unstructured":"L. Ru, H. Zheng, Y. Zhan, B. Du, Token Contrast for Weakly-Supervised Semantic Segmentation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 3093\u20133102.","DOI":"10.1109\/CVPR52729.2023.00302"},{"key":"10.1016\/j.imavis.2025.105893_b13","doi-asserted-by":"crossref","unstructured":"J. Lee, S.J. Oh, S. Yun, J. Choe, E. Kim, S. Yoon, Weakly supervised semantic segmentation using out-of-distribution data, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 16897\u201316906.","DOI":"10.1109\/CVPR52688.2022.01639"},{"key":"10.1016\/j.imavis.2025.105893_b14","doi-asserted-by":"crossref","unstructured":"L. Chen, C. Lei, R. Li, S. Li, Z. Zhang, L. Zhang, Fpr: False positive rectification for weakly supervised semantic segmentation, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2023, pp. 1108\u20131118.","DOI":"10.1109\/ICCV51070.2023.00108"},{"key":"10.1016\/j.imavis.2025.105893_b15","doi-asserted-by":"crossref","unstructured":"Y. Lin, M. Chen, W. Wang, B. Wu, K. Li, B. Lin, H. Liu, X. He, Clip is also an efficient segmenter: A text-driven approach for weakly supervised semantic segmentation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 15305\u201315314.","DOI":"10.1109\/CVPR52729.2023.01469"},{"key":"10.1016\/j.imavis.2025.105893_b16","series-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2020"},{"key":"10.1016\/j.imavis.2025.105893_b17","doi-asserted-by":"crossref","unstructured":"Z. Chen, Q. Sun, Extracting class activation maps from non-discriminative features as well, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 3135\u20133144.","DOI":"10.1109\/CVPR52729.2023.00306"},{"key":"10.1016\/j.imavis.2025.105893_b18","doi-asserted-by":"crossref","unstructured":"Z. Cheng, P. Qiao, K. Li, S. Li, P. Wei, X. Ji, L. Yuan, C. Liu, J. Chen, Out-of-candidate rectification for weakly supervised semantic segmentation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 23673\u201323684.","DOI":"10.1109\/CVPR52729.2023.02267"},{"key":"10.1016\/j.imavis.2025.105893_b19","article-title":"Spatial structure constraints for weakly supervised semantic segmentation","author":"Chen","year":"2024","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.imavis.2025.105893_b20","doi-asserted-by":"crossref","unstructured":"Z. Yang, K. Fu, M. Duan, L. Qu, S. Wang, Z. Song, Separate and conquer: Decoupling co-occurrence via decomposition and representation for weakly supervised semantic segmentation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2024, pp. 3606\u20133615.","DOI":"10.1109\/CVPR52733.2024.00346"},{"key":"10.1016\/j.imavis.2025.105893_b21","series-title":"International Conference on Machine Learning","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"key":"10.1016\/j.imavis.2025.105893_b22","doi-asserted-by":"crossref","unstructured":"S. Deng, W. Zhuo, J. Xie, L. Shen, Qa-clims: Question-answer cross language image matching for weakly supervised semantic segmentation, in: Proceedings of the 31st ACM International Conference on Multimedia, 2023, pp. 5572\u20135583.","DOI":"10.1145\/3581783.3612148"},{"key":"10.1016\/j.imavis.2025.105893_b23","first-page":"1","article-title":"WeakCLIP: Adapting CLIP for weakly-supervised semantic segmentation","author":"Zhu","year":"2024","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.imavis.2025.105893_b24","series-title":"Hello GPT-4o","author":"Openai","year":"2024"},{"key":"10.1016\/j.imavis.2025.105893_b25","doi-asserted-by":"crossref","unstructured":"J. Ahn, S. Cho, S. Kwak, Weakly supervised learning of instance segmentation with inter-pixel relations, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2019, pp. 2209\u20132218.","DOI":"10.1109\/CVPR.2019.00231"},{"key":"10.1016\/j.imavis.2025.105893_b26","series-title":"European Conference on Computer Vision","first-page":"446","article-title":"Max pooling with vision transformers reconciles class and shape in weakly supervised semantic segmentation","author":"Rossetti","year":"2022"},{"key":"10.1016\/j.imavis.2025.105893_b27","unstructured":"J. Hanna, D. Borth, Know Your Attention Maps: Class-specific Token Masking for Weakly Supervised Semantic Segmentation, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2025, pp. 23763\u201323772."},{"key":"10.1016\/j.imavis.2025.105893_b28","doi-asserted-by":"crossref","unstructured":"P.T. Jiang, Y. Yang, Q. Hou, Y. Wei, L2g: A simple local-to-global knowledge transfer framework for weakly supervised semantic segmentation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 16886\u201316896.","DOI":"10.1109\/CVPR52688.2022.01638"},{"key":"10.1016\/j.imavis.2025.105893_b29","doi-asserted-by":"crossref","unstructured":"Y. Du, Z. Fu, Q. Liu, Y. Wang, Weakly supervised semantic segmentation by pixel-to-prototype contrast, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 4320\u20134329.","DOI":"10.1109\/CVPR52688.2022.00428"},{"key":"10.1016\/j.imavis.2025.105893_b30","doi-asserted-by":"crossref","unstructured":"Z. Chen, T. Wang, X. Wu, X.S. Hua, H. Zhang, Q. Sun, Class re-activation maps for weakly-supervised semantic segmentation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 969\u2013978.","DOI":"10.1109\/CVPR52688.2022.00104"},{"key":"10.1016\/j.imavis.2025.105893_b31","doi-asserted-by":"crossref","unstructured":"L. Xu, W. Ouyang, M. Bennamoun, F. Boussaid, D. Xu, Learning multi-modal class-specific tokens for weakly supervised dense object localization, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 19596\u201319605.","DOI":"10.1109\/CVPR52729.2023.01877"},{"key":"10.1016\/j.imavis.2025.105893_b32","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2024.128540","article-title":"Decoupling foreground and background with siamese ViT networks for weakly-supervised semantic segmentation","volume":"610","author":"Lin","year":"2024","journal-title":"Neurocomputing"},{"key":"10.1016\/j.imavis.2025.105893_b33","doi-asserted-by":"crossref","unstructured":"F. Tang, Z. Xu, Z. Qu, W. Feng, X. Jiang, Z. Ge, Hunting Attributes: Context Prototype-Aware Learning for Weakly Supervised Semantic Segmentation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2024, pp. 3324\u20133334.","DOI":"10.1109\/CVPR52733.2024.00320"},{"key":"10.1016\/j.imavis.2025.105893_b34","series-title":"Modeling the label distributions for weakly-supervised semantic segmentation","author":"Wu","year":"2025"},{"key":"10.1016\/j.imavis.2025.105893_b35","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.110922","article-title":"Complementary branch fusing class and semantic knowledge for robust weakly supervised semantic segmentation","volume":"157","author":"Han","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.imavis.2025.105893_b36","doi-asserted-by":"crossref","unstructured":"N. Araslanov, S. Roth, Single-stage semantic segmentation from image labels, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2020, pp. 4253\u20134262.","DOI":"10.1109\/CVPR42600.2020.00431"},{"issue":"5","key":"10.1016\/j.imavis.2025.105893_b37","doi-asserted-by":"crossref","first-page":"1181","DOI":"10.1007\/s11263-022-01590-z","article-title":"Learning self-supervised low-rank network for single-stage weakly and semi-supervised semantic segmentation","volume":"130","author":"Pan","year":"2022","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.imavis.2025.105893_b38","first-page":"3045","article-title":"Self correspondence distillation for end-to-end weakly-supervised semantic segmentation","volume":"vol. 37","author":"Xu","year":"2023"},{"key":"10.1016\/j.imavis.2025.105893_b39","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.110787","article-title":"Enhanced online CAM: Single-stage weakly supervised semantic segmentation via collaborative guidance","volume":"156","author":"Zhang","year":"2024","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.imavis.2025.105893_b40","doi-asserted-by":"crossref","unstructured":"J. Fang, Y. Ning, X. Nie, X. Liu, Z. Cheng, VLHP: Learning Discriminative Vision-Language Hybrid Prototypes for Weakly Supervised Semantic Segmentation, in: Proceedings of the 33rd ACM International Conference on Multimedia, 2025, pp. 2939\u20132948.","DOI":"10.1145\/3746027.3754893"},{"key":"10.1016\/j.imavis.2025.105893_b41","doi-asserted-by":"crossref","unstructured":"J. Wang, T. Dai, B. Zhang, S. Yu, E.G. Lim, J. Xiao, Class Token as Proxy: Optimal Transport-assisted Proxy Learning for Weakly Supervised Semantic Segmentation, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2025, pp. 21645\u201321654.","DOI":"10.1109\/CVPR52734.2025.01402"},{"key":"10.1016\/j.imavis.2025.105893_b42","doi-asserted-by":"crossref","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","article-title":"The pascal visual object classes (voc) challenge","volume":"88","author":"Everingham","year":"2010","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.imavis.2025.105893_b43","series-title":"Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13","first-page":"740","article-title":"Microsoft coco: Common objects in context","author":"Lin","year":"2014"},{"key":"10.1016\/j.imavis.2025.105893_b44","doi-asserted-by":"crossref","unstructured":"J. Long, E. Shelhamer, T. Darrell, Fully convolutional networks for semantic segmentation, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2015, pp. 3431\u20133440.","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"10.1016\/j.imavis.2025.105893_b45","series-title":"Imagenet-21k pretraining for the masses","author":"Ridnik","year":"2021"},{"key":"10.1016\/j.imavis.2025.105893_b46","series-title":"Eva-clip: Improved training techniques for clip at scale","author":"Sun","year":"2023"},{"key":"10.1016\/j.imavis.2025.105893_b47","doi-asserted-by":"crossref","DOI":"10.1109\/TPAMI.2024.3404422","article-title":"Mctformer+: Multi-class token transformer for weakly supervised semantic segmentation","author":"Xu","year":"2024","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."}],"container-title":["Image and Vision Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0262885625004810?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0262885625004810?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,2,16]],"date-time":"2026-02-16T12:28:51Z","timestamp":1771244931000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0262885625004810"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3]]},"references-count":47,"alternative-id":["S0262885625004810"],"URL":"https:\/\/doi.org\/10.1016\/j.imavis.2025.105893","relation":{},"ISSN":["0262-8856"],"issn-type":[{"value":"0262-8856","type":"print"}],"subject":[],"published":{"date-parts":[[2026,3]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Disentangling co-occurrence with class-specific banks for Weakly Supervised Semantic Segmentation","name":"articletitle","label":"Article Title"},{"value":"Image and Vision Computing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.imavis.2025.105893","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2025 Published by Elsevier B.V.","name":"copyright","label":"Copyright"}],"article-number":"105893"}}