{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T16:01:32Z","timestamp":1774022492740,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":48,"publisher":"ACM","funder":[{"name":"Ningbo Science and Technology Bureau","award":["2024Z291"],"award-info":[{"award-number":["2024Z291"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,10]]},"DOI":"10.1145\/3721238.3730642","type":"proceedings-article","created":{"date-parts":[[2025,7,23]],"date-time":"2025-07-23T08:42:43Z","timestamp":1753260163000},"page":"1-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Generative Video Matting"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1265-3204","authenticated-orcid":false,"given":"Yongtao","family":"Ge","sequence":"first","affiliation":[{"name":"The University of Adelaide, Adelaide, Australia and Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-1048-1274","authenticated-orcid":false,"given":"Kangyang","family":"Xie","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1209-9533","authenticated-orcid":false,"given":"Guangkai","family":"Xu","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2350-8224","authenticated-orcid":false,"given":"Li","family":"Ke","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-1379-2846","authenticated-orcid":false,"given":"Mingyu","family":"Liu","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0517-1592","authenticated-orcid":false,"given":"Longtao","family":"Huang","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2093-2839","authenticated-orcid":false,"given":"Hui","family":"Xue","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4417-614X","authenticated-orcid":false,"given":"Hao","family":"Chen","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8648-8718","authenticated-orcid":false,"given":"Chunhua","family":"Shen","sequence":"additional","affiliation":[{"name":"Zhejiang University of Technology, Hangzhou, China and Zhejiang University, Hangzhou, China"}]}],"member":"320","published-online":{"date-parts":[[2025,7,27]]},"reference":[{"key":"e_1_3_3_3_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2004.1315061"},{"key":"e_1_3_3_3_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00843"},{"key":"e_1_3_3_3_4_1","unstructured":"Andreas Blattmann Tim Dockhorn Sumith Kulal Daniel Mendelevitch Maciej Kilian Dominik Lorenz Yam Levi Zion English Vikram Voleti Adam Letts Varun Jampani and Robin Rombach. 2023. Stable Video Diffusion: Scaling Latent Video Diffusion Models to Large Datasets. arXiv: Comp. Res. Repository abs\/2311.15127 (2023)."},{"key":"e_1_3_3_3_5_1","unstructured":"Tim Brooks Bill Peebles Connor Holmes Will DePue Yufei Guo Li Jing David Schnurr Joe Taylor Troy Luhman Eric Luhman Clarence Ng Ricky Wang and Aditya Ramesh. 2024. Video generation models as world simulators. https:\/\/openai.com\/research\/video-generation-models-as-world-simulators."},{"key":"e_1_3_3_3_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33783-3_39"},{"key":"e_1_3_3_3_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/566570.566572"},{"key":"e_1_3_3_3_8_1","volume-title":"Blender - a 3D modelling and rendering package","author":"Community Blender\u00a0Online","year":"2018","unstructured":"Blender\u00a0Online Community. 2018. Blender - a 3D modelling and rendering package. Blender Foundation, Stichting Blender Foundation, Amsterdam. http:\/\/www.blender.org"},{"key":"e_1_3_3_3_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01141"},{"key":"e_1_3_3_3_10_1","unstructured":"Epic Games. 2022. Unreal Engine 5. https:\/\/www.unrealengine.com."},{"key":"e_1_3_3_3_11_1","doi-asserted-by":"publisher","DOI":"10.5244\/C.29.99"},{"key":"e_1_3_3_3_12_1","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Esser Patrick","year":"2024","unstructured":"Patrick Esser, Sumith Kulal, Andreas Blattmann, Rahim Entezari, Jonas M\u00fcller, Harry Saini, Yam Levi, Dominik Lorenz, Axel Sauer, Frederic Boesel, et\u00a0al. 2024. Scaling rectified flow transformers for high-resolution image synthesis. In Proc. Int. Conf. Mach. Learn."},{"key":"e_1_3_3_3_13_1","unstructured":"Edward\u00a0J Hu Yelong Shen Phillip Wallis Zeyuan Allen-Zhu Yuanzhi Li Shean Wang Lu Wang and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv: Comp. Res. Repository (2021)."},{"key":"e_1_3_3_3_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00371"},{"key":"e_1_3_3_3_15_1","doi-asserted-by":"crossref","unstructured":"Nikita Karaev Ignacio Rocco Benjamin Graham Natalia Neverova Andrea Vedaldi and Christian Rupprecht. 2023. DynamicStereo: Consistent Dynamic Depth from Stereo Videos. Proc. IEEE Conf. Comp. Vis. Patt. Recogn. (2023).","DOI":"10.1109\/CVPR52729.2023.01271"},{"key":"e_1_3_3_3_16_1","volume-title":"Proc. Advances in Neural Inf. Process. Syst.","author":"Karras Tero","year":"2022","unstructured":"Tero Karras, Miika Aittala, Timo Aila, and Samuli Laine. 2022. Elucidating the Design Space of Diffusion-Based Generative Models. In Proc. Advances in Neural Inf. Process. Syst."},{"key":"e_1_3_3_3_17_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i1.19999"},{"key":"e_1_3_3_3_18_1","doi-asserted-by":"crossref","unstructured":"Rawal Khirodkar Timur Bagautdinov Julieta Martinez Su Zhaoen Austin James Peter Selednik Stuart Anderson and Shunsuke Saito. 2024. Sapiens: Foundation for Human Vision Models. arXiv: Comp. Res. Repository 2408.12569 (2024).","DOI":"10.1007\/978-3-031-73235-5_12"},{"key":"e_1_3_3_3_19_1","unstructured":"Black\u00a0Forest Labs. 2023. FLUX. https:\/\/github.com\/black-forest-labs\/flux."},{"key":"e_1_3_3_3_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.447"},{"key":"e_1_3_3_3_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475512"},{"key":"e_1_3_3_3_22_1","doi-asserted-by":"crossref","unstructured":"Jizhizi Li Jing Zhang Stephen\u00a0J Maybank and Dacheng Tao. 2022. Bridging composite and real: towards end-to-end deep image matting. Int. J. Comput. Vision 130 2 (2022) 246\u2013266.","DOI":"10.1007\/s11263-021-01541-0"},{"key":"e_1_3_3_3_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02145"},{"key":"e_1_3_3_3_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00865"},{"key":"e_1_3_3_3_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00319"},{"key":"e_1_3_3_3_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_3_3_27_1","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Lipman Yaron","year":"2023","unstructured":"Yaron Lipman, Ricky T.\u00a0Q. Chen, Heli Ben-Hamu, Maximilian Nickel, and Matthew Le. 2023. Flow Matching for Generative Modeling. In Proc. Int. Conf. Learn. Representations."},{"key":"e_1_3_3_3_28_1","volume-title":"Proc. IEEE Int. Conf. Comp. Vis.","author":"Lu Hao","year":"2019","unstructured":"Hao Lu, Yutong Dai, Chunhua Shen, and Songcen Xu. 2019. Context-Aware Image Matting for Simultaneous Foreground and Alpha Estimation. In Proc. IEEE Int. Conf. Comp. Vis."},{"key":"e_1_3_3_3_29_1","doi-asserted-by":"crossref","unstructured":"Sihan Ma Jizhizi Li Jing Zhang He Zhang and Dacheng Tao. 2023. Rethinking Portrait Matting with Pirvacy Preserving. Int. J. Comput. Vision (2023) 2172\u20132197.","DOI":"10.1007\/s11263-023-01797-8"},{"key":"e_1_3_3_3_30_1","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Podell Dustin","year":"2024","unstructured":"Dustin Podell, Zion English, Kyle Lacey, Andreas Blattmann, Tim Dockhorn, Jonas M\u00fcller, Joe Penna, and Robin Rombach. 2024. SDXL: improving latent diffusion models for high-resolution image synthesis. In Proc. Int. Conf. Learn. Representations."},{"key":"e_1_3_3_3_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01369"},{"key":"e_1_3_3_3_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01369"},{"key":"e_1_3_3_3_33_1","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In Proc. Int. Conf. Mach. Learn."},{"key":"e_1_3_3_3_34_1","unstructured":"Nikhila Ravi Valentin Gabeur Yuan-Ting Hu Ronghang Hu Chaitanya Ryali Tengyu Ma Haitham Khedr Roman R\u00e4dle Chloe Rolland Laura Gustafson Eric Mintun Junting Pan Kalyan\u00a0Vasudev Alwala Nicolas Carion Chao-Yuan Wu Ross Girshick Piotr Doll\u00e1r and Christoph Feichtenhofer. 2024. SAM 2: Segment Anything in Images and Videos. arXiv: Comp. Res. Repository 2408.00714 (2024)."},{"key":"e_1_3_3_3_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206503"},{"key":"e_1_3_3_3_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_3_3_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"e_1_3_3_3_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00236"},{"key":"e_1_3_3_3_39_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19818-2_25"},{"key":"e_1_3_3_3_40_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-37431-9_34"},{"key":"e_1_3_3_3_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01356"},{"key":"e_1_3_3_3_42_1","unstructured":"supervisely ecosystem. 2018. Supervisely person dataset. https:\/\/github.com\/supervisely-ecosystem\/persons."},{"key":"e_1_3_3_3_43_1","doi-asserted-by":"crossref","unstructured":"Luyuan Wang Hanyuan Zhang Qinjie Xiao Hao Xu Chunhua Shen and Xiaogang Jin. 2022. Effective Eyebrow Matting with Domain Adaptation. Computer Graphics Forum 41 7 (2022) 347\u2013358.","DOI":"10.1111\/cgf.14682"},{"key":"e_1_3_3_3_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3641519.3657519"},{"key":"e_1_3_3_3_45_1","volume-title":"Proc. IEEE Conf. Comp. Vis. Patt. Recogn.","author":"Xu Ning","year":"2017","unstructured":"Ning Xu, Brian Price, Scott Cohen, and Thomas Huang. 2017. Designing effective inter-pixel information flow for natural image matting. In Proc. IEEE Conf. Comp. Vis. Patt. Recogn."},{"key":"e_1_3_3_3_46_1","unstructured":"Zhuoyi Yang Jiayan Teng Wendi Zheng Ming Ding Shiyu Huang Jiazheng Xu Yuanming Yang Wenyi Hong Xiaohan Zhang Guanyu Feng et\u00a0al. [n. d.]. CogVideoX: Text-to-Video Diffusion Models with An Expert Transformer. arXiv: Comp. Res. Repository 2408.06072 ([n. d.])."},{"key":"e_1_3_3_3_47_1","doi-asserted-by":"crossref","unstructured":"Lvmin Zhang and Maneesh Agrawala. 2024. Transparent Image Layer Diffusion using Latent Transparency. ACM Trans. Graph. (2024).","DOI":"10.1145\/3658150"},{"key":"e_1_3_3_3_48_1","volume-title":"Proc. Advances in Neural Inf. Process. Syst.","author":"Zhao Sijie","year":"2024","unstructured":"Sijie Zhao, Yong Zhang, Xiaodong Cun, Shaoshu Yang, Muyao Niu, Xiaoyu Li, Wenbo Hu, and Ying Shan. 2024. CV-VAE: A Compatible Video VAE for Latent Generative Video Models. In Proc. Advances in Neural Inf. Process. Syst."},{"key":"e_1_3_3_3_49_1","doi-asserted-by":"crossref","unstructured":"Dongqing Zou Xiaowu Chen Guangying Cao and Xiaogang Wang. 2019. Unsupervised video matting via sparse and low-rank representation. IEEE Trans. Pattern Anal. Mach. Intell. 42 6 (2019) 1501\u20131514.","DOI":"10.1109\/TPAMI.2019.2895331"}],"event":{"name":"SIGGRAPH Conference Papers '25: Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers","location":"Vancouver BC Canada","acronym":"SIGGRAPH Conference Papers '25","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Proceedings of the Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3721238.3730642","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T15:04:38Z","timestamp":1774019078000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721238.3730642"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,27]]},"references-count":48,"alternative-id":["10.1145\/3721238.3730642","10.1145\/3721238"],"URL":"https:\/\/doi.org\/10.1145\/3721238.3730642","relation":{},"subject":[],"published":{"date-parts":[[2025,7,27]]},"assertion":[{"value":"2025-07-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}