{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T17:59:48Z","timestamp":1775066388675,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":66,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"The Guangzhou-HKUST(GZ) Joint Funding Program","award":["2023A03J0671"],"award-info":[{"award-number":["2023A03J0671"]}]},{"name":"The Nansha Key Area Science and Technology Project","award":["2023ZD003"],"award-info":[{"award-number":["2023ZD003"]}]},{"name":"The InnoHK funding launched by Innovation and Technology Commission, Hong Kong SAR, the Guangzhou Industrial Information and Intelligent Key Laboratory Project","award":["2024A03J0628"],"award-info":[{"award-number":["2024A03J0628"]}]},{"name":"Guangzhou-HKUST(GZ) Joint Funding Program","award":["2024A03J0618"],"award-info":[{"award-number":["2024A03J0618"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681192","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:49Z","timestamp":1729925989000},"page":"5527-5536","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["Language-Driven Interactive Shadow Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9726-4253","authenticated-orcid":false,"given":"Hongqiu","family":"Wang","sequence":"first","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-5272-5062","authenticated-orcid":false,"given":"Wei","family":"Wang","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5398-7847","authenticated-orcid":false,"given":"Haipeng","family":"Zhou","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7868-1100","authenticated-orcid":false,"given":"Huihui","family":"Xu","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5466-8119","authenticated-orcid":false,"given":"Shaozhi","family":"Wu","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3871-663X","authenticated-orcid":false,"given":"Lei","family":"Zhu","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou) &amp; The Hong Kong University of Science and Technology, Guangzhou, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3386496"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00493"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00274"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00565"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/1201775.882298"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612482"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2001.948679"},{"key":"e_1_3_2_1_8_1","volume-title":"MeViS: A Large-scale Benchmark for Video Segmentation with Motion Expressions. arXiv preprint arXiv:2308.08544","author":"Ding Henghui","year":"2023","unstructured":"Henghui Ding, Chang Liu, Shuting He, Xudong Jiang, and Chen Change Loy. 2023. MeViS: A Large-scale Benchmark for Video Segmentation with Motion Expressions. arXiv preprint arXiv:2308.08544 (2023)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00491"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3150959"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475199"},{"key":"e_1_3_2_1_12_1","volume-title":"On the removal of shadows from images","author":"Finlayson Graham D","year":"2005","unstructured":"Graham D Finlayson, Steven D Hordley, Cheng Lu, and Mark S Drew. 2005. On the removal of shadows from images. IEEE transactions on pattern analysis and machine intelligence, Vol. 28, 1 (2005), 59--68."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995725"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475663"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475310"},{"key":"e_1_3_2_1_17_1","volume-title":"Proceedings, Part I 14","author":"Hu Ronghang","year":"2016","unstructured":"Ronghang Hu, Marcus Rohrbach, and Trevor Darrell. 2016. Segmentation from natural language expressions. In Computer Vision--ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11--14, 2016, Proceedings, Part I 14. Springer, 108--124."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00778"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126331"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1086"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.249"},{"key":"e_1_3_2_1_22_1","volume-title":"Proceedings, Part II 11","author":"Lalonde Jean-Franccois","year":"2010","unstructured":"Jean-Franccois Lalonde, Alexei A Efros, and Srinivasa G Narasimhan. 2010. Detecting ground shadows in outdoor consumer photographs. In Computer Vision--ECCV 2010: 11th European Conference on Computer Vision, Heraklion, Crete, Greece, September 5--11, 2010, Proceedings, Part II 11. Springer, 322--335."},{"key":"e_1_3_2_1_23_1","volume-title":"Proceedings, Part XI 16","author":"Le Hieu","year":"2020","unstructured":"Hieu Le and Dimitris Samaras. 2020. From shadow segmentation to shadow removal. In Computer Vision--ECCV 2020: 16th European Conference, Glasgow, UK, August 23--28, 2020, Proceedings, Part XI 16. Springer, 264--281."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02032"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475235"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.106"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.143"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01007"},{"key":"e_1_3_2_1_29_1","first-page":"4761","article-title":"Cross-modal progressive comprehension for referring segmentation","volume":"44","author":"Liu Si","year":"2021","unstructured":"Si Liu, Tianrui Hui, et al. 2021. Cross-modal progressive comprehension for referring segmentation. IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 44, 9 (2021), 4761--4775.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"e_1_3_2_1_30_1","volume-title":"Rotated Multi-Scale Interaction Network for Referring Remote Sensing Image Segmentation. arXiv preprint arXiv:2312.12470","author":"Liu Sihan","year":"2023","unstructured":"Sihan Liu, Yiwei Ma, Xiaoqing Zhang, Haowei Wang, Jiayi Ji, Xiaoshuai Sun, and Rongrong Ji. 2023. Rotated Multi-Scale Interaction Network for Referring Remote Sensing Image Segmentation. arXiv preprint arXiv:2312.12470 (2023)."},{"key":"e_1_3_2_1_31_1","volume-title":"Multi-scale Promoted Self-adjusting Correlation Learning for Facial Action Unit Detection. arXiv preprint arXiv:2308.07770","author":"Liu Xin","year":"2023","unstructured":"Xin Liu, Kaishen Yuan, Xuesong Niu, Jingang Shi, Zitong Yu, Huanjing Yue, and Jingyu Yang. 2023. Multi-scale Promoted Self-adjusting Correlation Learning for Facial Action Unit Detection. arXiv preprint arXiv:2308.07770 (2023)."},{"key":"e_1_3_2_1_32_1","volume-title":"Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2020.3041100"},{"key":"e_1_3_2_1_34_1","volume-title":"Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101","author":"Loshchilov Ilya","year":"2017","unstructured":"Ilya Loshchilov and Frank Hutter. 2017. Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Hao Lu Xuesong Niu Jiyao Wang Yin Wang Qingyong Hu Jiaqi Tang Yuting Zhang Kaishen Yuan Bin Huang Zitong Yu et al. 2024. Gpt as psychologist? preliminary evaluations for gpt-4v on visual affective computing. arXiv preprint arXiv:2403.05916 (2024).","DOI":"10.1109\/CVPRW63382.2024.00037"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475531"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00091"},{"key":"e_1_3_2_1_38_1","volume-title":"V-net: Fully convolutional neural networks for volumetric medical image segmentation. In 2016 fourth international conference on 3D vision (3DV). Ieee, 565--571.","author":"Milletari Fausto","year":"2016","unstructured":"Fausto Milletari, Nassir Navab, and Seyed-Ahmad Ahmadi. 2016. V-net: Fully convolutional neural networks for volumetric medical image segmentation. In 2016 fourth international conference on 3D vision (3DV). Ieee, 565--571."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995585"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00075"},{"key":"e_1_3_2_1_41_1","volume-title":"Cast shadow segmentation using invariant color features. Computer vision and image understanding","author":"Salvador Elena","year":"2004","unstructured":"Elena Salvador, Andrea Cavallaro, and Touradj Ebrahimi. 2004. Cast shadow segmentation using invariant color features. Computer vision and image understanding, Vol. 95, 2 (2004), 238--259."},{"key":"e_1_3_2_1_42_1","volume-title":"Proceedings, Part XV 16","author":"Seo Seonguk","year":"2020","unstructured":"Seonguk Seo, Joon-Young Lee, and Bohyung Han. 2020. Urvos: Unified referring video object segmentation network with a large-scale benchmark. In Computer Vision--ECCV 2020: 16th European Conference, Glasgow, UK, August 23--28, 2020, Proceedings, Part XV 16. Springer, 208--223."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298818"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2015.09.006"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.387"},{"key":"e_1_3_2_1_46_1","volume-title":"Proceedings, Part VI 14","author":"Yago Vicente Tom\u00e1s F","year":"2016","unstructured":"Tom\u00e1s F Yago Vicente, Le Hou, Chen-Ping Yu, Minh Hoai, and Dimitris Samaras. 2016. Large-scale training of shadow detectors with noisily-annotated shadow examples. In Computer Vision--ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11--14, 2016, Proceedings, Part VI 14. Springer, 816--832."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2024.3412923"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161478"},{"key":"e_1_3_2_1_49_1","volume-title":"Advancing UWF-SLO Vessel Segmentation with Source-Free Active Domain Adaptation and a Novel Multi-Center Dataset. arXiv preprint arXiv:2406.13645","author":"Wang Hongqiu","year":"2024","unstructured":"Hongqiu Wang, Xiangde Luo, Wu Chen, Qingqing Tang, Mei Xin, Qiong Wang, and Lei Zhu. 2024. Advancing UWF-SLO Vessel Segmentation with Source-Free Active Domain Adaptation and a Novel Multi-Center Dataset. arXiv preprint arXiv:2406.13645 (2024)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2024.3426953"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-45087-7_8"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00007"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00195"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2015.58"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00259"},{"key":"e_1_3_2_1_56_1","volume-title":"European Conference on Computer Vision.","author":"Wu Hongtao","year":"2024","unstructured":"Hongtao Wu, Yijun Yang, Angelica Aviles-Rivero, Jingjing Ren, Sixiang chen, Haoyu Chen, and Lei Zhu. 2024. Semi-Supervised Video Desnowing Network via Temporal Decoupling Experts and Distribution-Driven Contrastive Regularization. In European Conference on Computer Vision."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612001"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680916"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00492"},{"key":"e_1_3_2_1_60_1","volume-title":"Video Instance Shadow Detection. arXiv preprint arXiv:2211.12827","author":"Xing Zhenghao","year":"2022","unstructured":"Zhenghao Xing, Tianyu Wang, Xiaowei Hu, Haoran Wu, Chi-Wing Fu, and Pheng-Ann Heng. 2022. Video Instance Shadow Detection. arXiv preprint arXiv:2211.12827 (2022)."},{"key":"e_1_3_2_1_61_1","volume-title":"AUFormer: Vision Transformers are Parameter-Efficient Facial Action Unit Detectors. arXiv preprint arXiv:2403.04697","author":"Yuan Kaishen","year":"2024","unstructured":"Kaishen Yuan, Zitong Yu, Xin Liu, Weicheng Xie, Huanjing Yue, and Jingyu Yang. 2024. AUFormer: Vision Transformers are Parameter-Efficient Facial Action Unit Detectors. arXiv preprint arXiv:2403.04697 (2024)."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681236"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5540209"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01231-1_8"},{"key":"e_1_3_2_1_65_1","volume-title":"Deformable detr: Deformable transformers for end-to-end object detection. arXiv preprint arXiv:2010.04159","author":"Zhu Xizhou","year":"2020","unstructured":"Xizhou Zhu, Weijie Su, Lewei Lu, Bin Li, Xiaogang Wang, and Jifeng Dai. 2020. Deformable detr: Deformable transformers for end-to-end object detection. arXiv preprint arXiv:2010.04159 (2020)."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547904"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681192","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681192","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:02Z","timestamp":1750295882000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681192"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":66,"alternative-id":["10.1145\/3664647.3681192","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681192","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}