{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:26:15Z","timestamp":1765308375558,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2023YFC2705700,2024YFF1207300"],"award-info":[{"award-number":["2023YFC2705700,2024YFF1207300"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Yunnan Provincial Key Science and Technology Project","award":["202502AS080002"],"award-info":[{"award-number":["202502AS080002"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62225113"],"award-info":[{"award-number":["62225113"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755509","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T05:47:42Z","timestamp":1761371262000},"page":"4580-4589","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["CSDN: CLIP-Driven Similarity-Aligned Distillation Network for Weakly-Supervised Object Localization"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-7200-9055","authenticated-orcid":false,"given":"Sifan","family":"Zuo","sequence":"first","affiliation":[{"name":"School of Cyber Science and Engineering, Wuhan University, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3540-5775","authenticated-orcid":false,"given":"Youfa","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Computer Science, Wuhan University, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0059-8458","authenticated-orcid":false,"given":"Bo","family":"Du","sequence":"additional","affiliation":[{"name":"School of Computer Science, Wuhan University, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20077-9_36"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475211"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00611"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i1.19918"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00232"},{"key":"e_1_3_2_1_6_1","volume-title":"International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=YicbFdNTTy","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=YicbFdNTTy"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00288"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00732"},{"key":"e_1_3_2_1_9_1","volume-title":"DIAL: Dense Image-Text ALignment for Weakly Supervised Semantic Segmentation. In European Conference on Computer Vision. Springer, 248-266","author":"Jang Soojin","year":"2024","unstructured":"Soojin Jang, Jungmin Yun, Junehyoung Kwon, Eunju Lee, and Youngbin Kim. 2024. DIAL: Dense Image-Text ALignment for Weakly Supervised Semantic Segmentation. In European Conference on Computer Vision. Springer, 248-266."},{"key":"e_1_3_2_1_10_1","first-page":"231","volume-title":"PT LXXXI","volume":"15139","author":"Jol Sanghyun","year":"2025","unstructured":"Sanghyun Jol, Fei Pane, In-Jae Yu, and Kyungsu Kim. 2025. DHR: Dual Features-Driven Hierarchical Rebalancing in Inter-and Intra-Class Regions for Weakly-Supervised Semantic Segmentation. COMPUTER VISION-ECCV 2024, PT LXXXI, Vol. 15139 (2025), 231-248."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01386"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2013.77"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00682"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i4.28127"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01469"},{"key":"e_1_3_2_1_16_1","volume-title":"Decoupled Weight Decay Regularization. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=Bkg6RiCqY7","author":"Loshchilov Ilya","year":"2019","unstructured":"Ilya Loshchilov and Frank Hutter. 2019. Decoupled Weight Decay Regularization. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=Bkg6RiCqY7"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58574-7_29"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2025.3554398"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00879"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00337"},{"key":"e_1_3_2_1_21_1","first-page":"9109","article-title":"Task-aware weakly supervised object localization with transformer","volume":"45","author":"Meng Meng","year":"2022","unstructured":"Meng Meng, Tianzhu Zhang, Zhe Zhang, Yongdong Zhang, and Feng Wu. 2022. Task-aware weakly supervised object localization with transformer. IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 45, 7 (2022), 9109-9121.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01147"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i2.25292"},{"key":"e_1_3_2_1_24_1","volume-title":"International conference on machine learning. PmLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748-8763."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","unstructured":"Jinhwan Seo Wonho Bae Danica J. Sutherland Junhyug Noh and Daijin Kim. 2022. Object Discovery via Contrastive Learning for Weakly Supervised Object Detection. In ECCV (31). 312-329. https:\/\/doi.org\/10.1007\/978-3-031-19821-2_18","DOI":"10.1007\/978-3-031-19821-2_18"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","unstructured":"Yunhang Shen Rongrong Ji Yan Wang Zhiwei Chen Feng Zheng Feiyue Huang and Yunsheng Wu. 2020. Enabling Deep Residual Networks for Weakly Supervised Object Detection. In ECCV (8). 118-136. https:\/\/doi.org\/10.1007\/978-3-030-58598-3_8","DOI":"10.1007\/978-3-030-58598-3_8"},{"key":"e_1_3_2_1_27_1","volume-title":"Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence. 1281-1289","author":"Su Hao","year":"2024","unstructured":"Hao Su and Meng Yang. 2024. A consistency and integration model with adaptive thresholds for weakly supervised object localization. In Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence. 1281-1289."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00320"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.5555\/3295222.3295349"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00711"},{"key":"e_1_3_2_1_31_1","unstructured":"Catherine Wah Steve Branson Peter Welinder Pietro Perona and Serge Belongie. 2011. The caltech-ucsd birds-200-2011 dataset. (2011)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01385"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681710"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00444"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00020"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00669"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-023-01919-2"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00364"},{"key":"e_1_3_2_1_39_1","volume-title":"Frozen CLIP-DINO: a Strong Backbone for Weakly Supervised Semantic Segmentation","author":"Zhang Bingfeng","year":"2025","unstructured":"Bingfeng Zhang, Siyue Yu, Jimin Xiao, Yunchao Wei, and Yao Zhao. 2025. Frozen CLIP-DINO: a Strong Backbone for Weakly Supervised Semantic Segmentation. IEEE Transactions on Pattern Analysis and Machine Intelligence (2025)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475675"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00144"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58529-7_17"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.319"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.319"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3309621"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755509","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:22:11Z","timestamp":1765308131000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755509"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":45,"alternative-id":["10.1145\/3746027.3755509","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755509","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}