{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T18:21:32Z","timestamp":1775067692077,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":80,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Guangzhou Industrial Information and Intelligent Key Laboratory Project","award":["No. 2024A03J0628"],"award-info":[{"award-number":["No. 2024A03J0628"]}]},{"name":"Nansha Key Area Science and Technology Project","award":["No. 2023ZD003"],"award-info":[{"award-number":["No. 2023ZD003"]}]},{"name":"Guangzhou-HKUST(GZ) Joint Funding Program","award":["No. 2024A03J0618,No. 2023A03J0671"],"award-info":[{"award-number":["No. 2024A03J0618,No. 2023A03J0671"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681236","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:33Z","timestamp":1729925973000},"page":"166-175","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":14,"title":["Timeline and Boundary Guided Diffusion Network for Video Shadow Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5398-7847","authenticated-orcid":false,"given":"Haipeng","family":"Zhou","sequence":"first","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9726-4253","authenticated-orcid":false,"given":"Hongqiu","family":"Wang","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8255-2997","authenticated-orcid":false,"given":"Tian","family":"Ye","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-2502-3578","authenticated-orcid":false,"given":"Zhaohu","family":"Xing","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9405-8232","authenticated-orcid":false,"given":"Jun","family":"Ma","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou) &amp; The Hong Kong University of Science and Technology, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1503-0240","authenticated-orcid":false,"given":"Ping","family":"Li","sequence":"additional","affiliation":[{"name":"The Hong Kong Polytechnic University, Hong Kong, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0835-3770","authenticated-orcid":false,"given":"Qiong","family":"Wang","sequence":"additional","affiliation":[{"name":"Shenzhen Institutes of Advanced Technology, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3871-663X","authenticated-orcid":false,"given":"Lei","family":"Zhu","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou) &amp; The Hong Kong University of Science and Technology, Guangzhou, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02171"},{"key":"e_1_3_2_1_2_1","volume-title":"Label-Efficient Semantic Segmentation with Diffusion Models. In International Conference on Learning Representations.","author":"Baranchuk Dmitry","year":"2021","unstructured":"Dmitry Baranchuk, Andrey Voynov, Ivan Rubachev, Valentin Khrulkov, and Artem Babenko. 2021. Label-Efficient Semantic Segmentation with Diffusion Models. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_3_1","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 4413--4421","author":"Berman Maxim","year":"2018","unstructured":"Maxim Berman, Amal Rannen Triki, and Matthew B Blaschko. 2018. The lov\u00e1sz-softmax loss: A tractable surrogate for the optimization of the intersection-over-union measure in neural networks. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 4413--4421."},{"key":"e_1_3_2_1_4_1","first-page":"15309","article-title":"Retrieval-augmented diffusion models","volume":"35","author":"Blattmann Andreas","year":"2022","unstructured":"Andreas Blattmann, Robin Rombach, Kaan Oktay, Jonas M\u00fcller, and Bj\u00f6rn Ommer. 2022. Retrieval-augmented diffusion models. Advances in Neural Information Processing Systems, Vol. 35 (2022), 15309--15324.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01816"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00090"},{"key":"e_1_3_2_1_8_1","volume-title":"The Eleventh International Conference on Learning Representations.","author":"Chen Ting","year":"2022","unstructured":"Ting Chen, Ruixiang Zhang, and Geoffrey Hinton. 2022. Analog Bits: Generating Discrete Data using Diffusion Models with Self-Conditioning. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00725"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00274"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00565"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19815-1_37"},{"key":"e_1_3_2_1_13_1","first-page":"11781","article-title":"Rethinking space-time networks with improved memory coverage for efficient video object segmentation","volume":"34","author":"Cheng Ho Kei","year":"2021","unstructured":"Ho Kei Cheng, Yu-Wing Tai, and Chi-Keung Tang. 2021. Rethinking space-time networks with improved memory coverage for efficient video object segmentation. Advances in Neural Information Processing Systems, Vol. 34 (2021), 11781--11794.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612482"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2003.1233909"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19790-1_42"},{"key":"e_1_3_2_1_17_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02467"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01199"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.249"},{"key":"e_1_3_2_1_21_1","first-page":"18100","article-title":"Card: Classification and regression diffusion models","volume":"35","author":"Han Xizewen","year":"2022","unstructured":"Xizewen Han, Huangjie Zheng, and Mingyuan Zhou. 2022. Card: Classification and regression diffusion models. Advances in Neural Information Processing Systems, Vol. 35 (2022), 18100--18115.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_23_1","volume-title":"Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531","author":"Hinton Geoffrey","year":"2015","unstructured":"Geoffrey Hinton, Oriol Vinyals, and Jeff Dean. 2015. Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 (2015)."},{"key":"e_1_3_2_1_24_1","volume-title":"Denoising diffusion probabilistic models. Advances in neural information processing systems","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in neural information processing systems, Vol. 33 (2020), 6840--6851."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3049331"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2020.3047977"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01987"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i2.20011"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58607-2_43"},{"key":"e_1_3_2_1_30_1","first-page":"3430","article-title":"Video object segmentation with adaptive feature bank and uncertain-region refinement","volume":"33","author":"Liang Yongqing","year":"2020","unstructured":"Yongqing Liang, Xin Li, Navid Jafari, and Jim Chen. 2020. Video object segmentation with adaptive feature bank and uncertain-region refinement. Advances in Neural Information Processing Systems, Vol. 33 (2020), 3430--3441.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.106"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01007"},{"key":"e_1_3_2_1_33_1","volume-title":"Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101","author":"Loshchilov Ilya","year":"2017","unstructured":"Ilya Loshchilov and Frank Hutter. 2017. Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)."},{"key":"e_1_3_2_1_34_1","first-page":"1","article-title":"Dual-branch network for cloud and cloud shadow segmentation","volume":"60","author":"Lu Chen","year":"2022","unstructured":"Chen Lu, Min Xia, Ming Qian, and Binyu Chen. 2022. Dual-branch network for cloud and cloud shadow segmentation. IEEE Transactions on Geoscience and Remote Sensing, Vol. 60 (2022), 1--12.","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00312"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00374"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2004.51"},{"key":"e_1_3_2_1_38_1","volume-title":"A boundary-aware network for shadow removal","author":"Niu Kunpeng","year":"2022","unstructured":"Kunpeng Niu, Yanli Liu, Enhua Wu, and Guanyu Xing. 2022. A boundary-aware network for shadow removal. IEEE Transactions on Multimedia (2022)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00932"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19830-4_34"},{"key":"e_1_3_2_1_42_1","volume-title":"UltraPixel: Advancing Ultra-High-Resolution Image Synthesis to New Peaks. arXiv preprint arXiv:2407.02158","author":"Ren Jingjing","year":"2024","unstructured":"Jingjing Ren, Wenbo Li, Haoyu Chen, Renjing Pei, Bin Shao, Yong Guo, Long Peng, Fenglong Song, and Lei Zhu. 2024. UltraPixel: Advancing Ultra-High-Resolution Image Synthesis to New Peaks. arXiv preprint arXiv:2407.02158 (2024)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01520"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.74"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01252-6_44"},{"key":"e_1_3_2_1_48_1","volume-title":"Denoising Diffusion Implicit Models. In International Conference on Learning Representations.","author":"Song Jiaming","year":"2020","unstructured":"Jiaming Song, Chenlin Meng, and Stefano Ermon. 2020. Denoising Diffusion Implicit Models. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_49_1","volume-title":"International Conference on Learning Representations.","author":"Song Yang","year":"2020","unstructured":"Yang Song, Jascha Sohl-Dickstein, Diederik P Kingma, Abhishek Kumar, Stefano Ermon, and Ben Poole. 2020. Score-Based Generative Modeling through Stochastic Differential Equations. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00070"},{"key":"e_1_3_2_1_51_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46466-4_49"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00971"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00135"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160647"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681192"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2024.3426953"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00192"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-32239-7_12"},{"key":"e_1_3_2_1_60_1","volume-title":"Detect any shadow: Segment anything for video shadow detection","author":"Wang Yonghui","year":"2023","unstructured":"Yonghui Wang, Wengang Zhou, Yunyao Mao, and Houqiang Li. 2023. Detect any shadow: Segment anything for video shadow detection. IEEE Transactions on Circuits and Systems for Video Technology (2023)."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612001"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680916"},{"key":"e_1_3_2_1_63_1","volume-title":"Online Unsupervised Video Object Segmentation via Contrastive Motion Clustering","author":"Xi Lin","year":"2023","unstructured":"Lin Xi, Weihai Chen, Xingming Wu, Zhong Liu, and Zhengguo Li. 2023. Online Unsupervised Video Object Segmentation via Contrastive Motion Clustering. IEEE Transactions on Circuits and Systems for Video Technology (2023)."},{"key":"e_1_3_2_1_64_1","first-page":"12077","article-title":"SegFormer: Simple and efficient design for semantic segmentation with transformers","volume":"34","author":"Xie Enze","year":"2021","unstructured":"Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M Alvarez, and Ping Luo. 2021. SegFormer: Simple and efficient design for semantic segmentation with transformers. Advances in Neural Information Processing Systems, Vol. 34 (2021), 12077--12090.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_65_1","volume-title":"Diff-unet: A diffusion embedded network for volumetric segmentation. arXiv preprint arXiv:2303.10326","author":"Xing Zhaohu","year":"2023","unstructured":"Zhaohu Xing, Liang Wan, Huazhu Fu, Guang Yang, and Lei Zhu. 2023. Diff-unet: A diffusion embedded network for volumetric segmentation. arXiv preprint arXiv:2303.10326 (2023)."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-16443-9_14"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2024.3360239"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.109969"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01165"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01214"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-43987-2_10"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02419"},{"key":"e_1_3_2_1_73_1","volume-title":"Vivim: a Video Vision Mamba for Medical Video Object Segmentation. arXiv preprint arXiv:2401.14168","author":"Yang Yijun","year":"2024","unstructured":"Yijun Yang, Zhaohu Xing, and Lei Zhu. 2024. Vivim: a Video Vision Mamba for Medical Video Object Segmentation. arXiv preprint arXiv:2401.14168 (2024)."},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00244"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611742"},{"key":"e_1_3_2_1_76_1","volume-title":"Planeseg: Building a plug-in for boosting planar region segmentation","author":"Zhang Zhicheng","year":"2023","unstructured":"Zhicheng Zhang, Song Chen, Zichuan Wang, and Jufeng Yang. 2023. Planeseg: Building a plug-in for boosting planar region segmentation. IEEE Transactions on Neural Networks and Learning Systems (2023)."},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01827"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2023.3274255"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00531"},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01231-1_8"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681236","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681236","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:03Z","timestamp":1750295883000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681236"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":80,"alternative-id":["10.1145\/3664647.3681236","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681236","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}