{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:10:14Z","timestamp":1765339814737,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":74,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["82261138629, 62206180"],"award-info":[{"award-number":["82261138629, 62206180"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Guangdong Basic and Applied Basic Research Foundation","award":["2023A1515010688"],"award-info":[{"award-number":["2023A1515010688"]}]},{"name":"Guangdong Provincial Key Laboratory","award":["2023B1212060076"],"award-info":[{"award-number":["2023B1212060076"]}]},{"DOI":"10.13039\/501100015805","name":"Shenzhen Municipal Science and Technology Innovation Council","doi-asserted-by":"publisher","award":["JCYJ20220531101412030"],"award-info":[{"award-number":["JCYJ20220531101412030"]}],"id":[{"id":"10.13039\/501100015805","id-type":"DOI","asserted-by":"publisher"}]},{"name":"XJTLU Research Development Funds","award":["RDF-23-01-053"],"award-info":[{"award-number":["RDF-23-01-053"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755285","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T07:26:51Z","timestamp":1761377211000},"page":"4020-4029","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["DisFaceRep: Representation Disentanglement for Co-occurring Facial Components in Weakly Supervised Face Parsing"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6813-7666","authenticated-orcid":false,"given":"Xiaoqin","family":"Wang","sequence":"first","affiliation":[{"name":"School of Computer Science and Software Engineering, Shenzhen University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8728-2842","authenticated-orcid":false,"given":"Xianxu","family":"Hou","sequence":"additional","affiliation":[{"name":"School of AI and Advanced Computing, Xi'an Jiaotong-Liverpool University, Suzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-0520-020X","authenticated-orcid":false,"given":"Meidan","family":"Ding","sequence":"additional","affiliation":[{"name":"School of Computer Science and Software Engineering, Shenzhen University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7516-9546","authenticated-orcid":false,"given":"Junliang","family":"Chen","sequence":"additional","affiliation":[{"name":"Department of Electrical and Electronic Engineering, The Hong Kong Polytechnic University, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-1352-2952","authenticated-orcid":false,"given":"Kaijun","family":"Deng","sequence":"additional","affiliation":[{"name":"School of Computer Science and Software Engineering, Shenzhen University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5678-4500","authenticated-orcid":false,"given":"Jinheng","family":"Xie","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, National University of Singapore, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1420-0815","authenticated-orcid":false,"given":"Linlin","family":"Shen","sequence":"additional","affiliation":[{"name":"Computer Vision Institute, School of Artificial Intelligence, Shenzhen University, Shenzhen, China and Guangdong Provincial Key Laboratory of Intelligent Information Processing, Shenzhen University, Shenzhen, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00231"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00523"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46478-7_34"},{"key":"e_1_3_2_1_4_1","volume-title":"Semantic image segmentation with deep convolutional nets and fully connected crfs. arXiv preprint arXiv:1412.7062","author":"Chen Liang-Chieh","year":"2014","unstructured":"Liang-Chieh Chen, George Papandreou, Iasonas Kokkinos, Kevin Murphy, and Alan L Yuille. 2014. Semantic image segmentation with deep convolutional nets and fully connected crfs. arXiv preprint arXiv:1412.7062 (2014)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00104"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.191"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_8_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_9_1","volume-title":"Christopher KI Williams, John Winn, and Andrew Zisserman.","author":"Everingham Mark","year":"2010","unstructured":"Mark Everingham, Luc Van Gool, Christopher KI Williams, John Winn, and Andrew Zisserman. 2010. The pascal visual object classes (voc) challenge. International journal of computer vision, Vol. 88 (2010), 303-338."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00434"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12268"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-49409-8_14"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00216"},{"key":"e_1_3_2_1_14_1","volume-title":"Recurseed and edgepredictmix: Single-stage learning is sufficient for weakly-supervised semantic segmentation. arXiv preprint arXiv:2204.06754","author":"Jo Sanghyun","year":"2022","unstructured":"Sanghyun Jo, In-Jae Yu, and Kyungsu Kim. 2022. Recurseed and edgepredictmix: Single-stage learning is sufficient for weakly-supervised semantic segmentation. arXiv preprint arXiv:2204.06754 (2022)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00063"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_42"},{"key":"e_1_3_2_1_17_1","volume-title":"Efficient inference in fully connected crfs with gaussian edge potentials. Advances in neural information processing systems","author":"Kr\u00e4henb\u00fchl Philipp","year":"2011","unstructured":"Philipp Kr\u00e4henb\u00fchl and Vladlen Koltun. 2011. Efficient inference in fully connected crfs with gaussian edge potentials. Advances in neural information processing systems, Vol. 24 (2011)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00691"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01090"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00559"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i3.16294"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681287"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.624"},{"key":"e_1_3_2_1_24_1","volume-title":"International conference on learning representations.","author":"Liang-Chieh Chen","year":"2015","unstructured":"Chen Liang-Chieh, George Papandreou, Iasonas Kokkinos, Kevin Murphy, and Alan Yuille. 2015. Semantic image segmentation with deep convolutional nets and fully connected crfs. In International conference on learning representations."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.344"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00580"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01469"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2021.104190"},{"key":"e_1_3_2_1_29_1","volume-title":"Face parsing via recurrent propagation. arXiv preprint arXiv:1708.01936","author":"Liu Sifei","year":"2017","unstructured":"Sifei Liu, Jianping Shi, Ji Liang, and Ming-Hsuan Yang. 2017. Face parsing via recurrent propagation. arXiv preprint arXiv:1708.01936 (2017)."},{"key":"e_1_3_2_1_30_1","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition. 3451-3459","author":"Liu Sifei","year":"2015","unstructured":"Sifei Liu, Jimei Yang, Chang Huang, and Ming-Hsuan Yang. 2015. Multi-objective convolutional learning for face labeling. In Proceedings of the IEEE conference on computer vision and pattern recognition. 3451-3459."},{"key":"e_1_3_2_1_31_1","volume-title":"European Conference on Computer Vision. Springer, 38-55","author":"Liu Shilong","year":"2024","unstructured":"Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Qing Jiang, Chunyuan Li, Jianwei Yang, Hang Su, et al., 2024. Grounding dino: Marrying dino with grounded pre-training for open-set object detection. In European Conference on Computer Vision. Springer, 38-55."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6832"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3023152"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.3390\/app10093135"},{"key":"e_1_3_2_1_36_1","volume-title":"2012 IEEE Conference on Computer Vision and Pattern Recognition. IEEE, 2480-2487","author":"Luo Ping","year":"2012","unstructured":"Ping Luo, Xiaogang Wang, and Xiaoou Tang. 2012. Hierarchical face parsing via deep learning. In 2012 IEEE Conference on Computer Vision and Pattern Recognition. IEEE, 2480-2487."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i6.32661"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00728"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/2964284.2973812"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.203"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.209"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298780"},{"key":"e_1_3_2_1_43_1","volume-title":"International conference on machine learning. PmLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748-8763."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01875"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20056-4_26"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02009"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.74"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.447"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58536-5_21"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3113780"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58610-2_16"},{"key":"e_1_3_2_1_52_1","volume-title":"International conference on machine learning. PMLR, 10347-10357","author":"Touvron Hugo","year":"2021","unstructured":"Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre Sablayrolles, and Herv\u00e9 J\u00e9gou. 2021. Training data-efficient image transformers & distillation through attention. In International conference on machine learning. PMLR, 10347-10357."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.315"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01229"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.420"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19818-2_12"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00339"},{"key":"e_1_3_2_1_58_1","volume-title":"Wider or deeper: Revisiting the resnet model for visual recognition. Pattern recognition","author":"Wu Zifeng","year":"2019","unstructured":"Zifeng Wu, Chunhua Shen, and Anton Van Den Hengel. 2019. Wider or deeper: Revisiting the resnet model for visual recognition. Pattern recognition, Vol. 90 (2019), 119-133."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00444"},{"key":"e_1_3_2_1_60_1","volume-title":"Mctformer: Multi-class token transformer for weakly supervised semantic segmentation","author":"Xu Lian","year":"2024","unstructured":"Lian Xu, Mohammed Bennamoun, Farid Boussaid, Hamid Laga, Wanli Ouyang, and Dan Xu. 2024. Mctformer: Multi-class token transformer for weakly supervised semantic segmentation. IEEE transactions on pattern analysis and machine intelligence (2024)."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00427"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00346"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i9.33018"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19818-2_19"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00364"},{"key":"e_1_3_2_1_66_1","volume-title":"Frozen CLIP-DINO: a Strong Backbone for Weakly Supervised Semantic Segmentation","author":"Zhang Bingfeng","year":"2025","unstructured":"Bingfeng Zhang, Siyue Yu, Jimin Xiao, Yunchao Wei, and Yao Zhao. 2025. Frozen CLIP-DINO: a Strong Backbone for Weakly Supervised Semantic Segmentation. IEEE Transactions on Pattern Analysis and Machine Intelligence (2025)."},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00715"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00144"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00412"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01814"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.319"},{"key":"e_1_3_2_1_72_1","volume-title":"Face parsing via a fully-convolutional continuous CRF neural network. arXiv preprint arXiv:1708.03736","author":"Zhou Lei","year":"2017","unstructured":"Lei Zhou, Zhi Liu, and Xiangjian He. 2017. Face parsing via a fully-convolutional continuous CRF neural network. arXiv preprint arXiv:1708.03736 (2017)."},{"key":"e_1_3_2_1_73_1","first-page":"222","volume-title":"ISNN 2015, Jeju, South Korea, October 15-18, 2015, Proceedings 12","author":"Zhou Yisu","year":"2015","unstructured":"Yisu Zhou, Xiaolin Hu, and Bo Zhang. 2015. Interlinked convolutional neural networks for face parsing. In Advances in Neural Networks-ISNN 2015: 12th International Symposium on Neural Networks, ISNN 2015, Jeju, South Korea, October 15-18, 2015, Proceedings 12. Springer, 222-231."},{"key":"e_1_3_2_1_74_1","volume-title":"International Journal of Computer Vision","author":"Zhu Lianghui","year":"2024","unstructured":"Lianghui Zhu, Xinggang Wang, Jiapei Feng, Tianheng Cheng, Yingyue Li, Bo Jiang, Dingwen Zhang, and Junwei Han. 2024. WeakCLIP: Adapting CLIP for Weakly-Supervised Semantic Segmentation. International Journal of Computer Vision (2024), 1-21."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755285","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:07:59Z","timestamp":1765339679000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755285"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":74,"alternative-id":["10.1145\/3746027.3755285","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755285","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}