{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T19:18:53Z","timestamp":1776107933024,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":92,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No. 62322210"],"award-info":[{"award-number":["No. 62322210"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100009592","name":"Beijing Municipal Science and Technology Commission","doi-asserted-by":"publisher","award":["No. Z231100005923031"],"award-info":[{"award-number":["No. Z231100005923031"]}],"id":[{"id":"10.13039\/501100009592","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Innovation Funding of ICT, CAS","award":["No. E461020"],"award-info":[{"award-number":["No. E461020"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,10]]},"DOI":"10.1145\/3721238.3730623","type":"proceedings-article","created":{"date-parts":[[2025,7,23]],"date-time":"2025-07-23T08:42:43Z","timestamp":1753260163000},"page":"1-12","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Sketch3DVE: Sketch-based 3D-Aware Scene Video Editing"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-4736-8100","authenticated-orcid":false,"given":"Feng-Lin","family":"Liu","sequence":"first","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-8271-2104","authenticated-orcid":false,"given":"Shi-Yang","family":"Li","sequence":"additional","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0416-4374","authenticated-orcid":false,"given":"Yan-Pei","family":"Cao","sequence":"additional","affiliation":[{"name":"VAST, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0284-726X","authenticated-orcid":false,"given":"Hongbo","family":"Fu","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology, HongKong, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1021-8148","authenticated-orcid":false,"given":"Lin","family":"Gao","sequence":"additional","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,7,27]]},"reference":[{"key":"e_1_3_3_2_2_1","unstructured":"Alibaba. 2024. https:\/\/github.com\/alimama-creative\/FLUX-Controlnet-Inpainting"},{"key":"e_1_3_3_2_3_1","unstructured":"Sherwin Bahmani Ivan Skorokhodov Aliaksandr Siarohin Willi Menapace Guocheng Qian Michael Vasilkovsky Hsin-Ying Lee Chaoyang Wang Jiaxu Zou Andrea Tagliasacchi David\u00a0B. Lindell and Sergey Tulyakov. 2024. VD3D: Taming Large Video Diffusion Transformers for 3D Camera Control. CoRR abs\/2407.12781 (2024)."},{"key":"e_1_3_3_2_4_1","unstructured":"Jianhong Bai Menghan Xia Xintao Wang Ziyang Yuan Xiao Fu Zuozhu Liu Haoji Hu Pengfei Wan and Di Zhang. 2024. SynCamMaster: Synchronizing Multi-Camera Video Generation from Diverse Viewpoints. CoRR abs\/2412.07760 (2024)."},{"key":"e_1_3_3_2_5_1","doi-asserted-by":"crossref","unstructured":"Paul\u00a0J. Besl and Neil\u00a0D. McKay. 1992. A Method for Registration of 3-D Shapes. IEEE Trans. Pattern Anal. Mach. Intell. 14 2 (1992) 239\u2013256.","DOI":"10.1109\/34.121791"},{"key":"e_1_3_3_2_6_1","doi-asserted-by":"crossref","unstructured":"Kirill Brodt and Mikhail Bessmeltsev. 2022. Sketch2Pose: estimating a 3D character pose from a bitmap sketch. ACM Trans. Graph. 41 4 (2022) 85:1\u201385:15.","DOI":"10.1145\/3528223.3530106"},{"key":"e_1_3_3_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02121"},{"key":"e_1_3_3_2_8_1","doi-asserted-by":"crossref","unstructured":"Shu-Yu Chen Wanchao Su Lin Gao Shihong Xia and Hongbo Fu. 2020. DeepFaceDrawing: deep generation of face images from sketches. ACM Trans. Graph. 39 4 (2020) 72.","DOI":"10.1145\/3386569.3392386"},{"key":"e_1_3_3_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00522"},{"key":"e_1_3_3_2_10_1","doi-asserted-by":"crossref","unstructured":"Chenjian Gao Xilin Wang Qian Yu Lu Sheng Jing Zhang Xiaoguang Han Yi-Zhe Song and Dong Xu. 2024. 3D Reconstruction From a Single Sketch via View-Dependent Depth Sampling. IEEE Trans. Pattern Anal. Mach. Intell. 46 12 (2024) 9661\u20139676.","DOI":"10.1109\/TPAMI.2024.3424404"},{"key":"e_1_3_3_2_11_1","unstructured":"Zekai Gu Rui Yan Jiahao Lu Peng Li Zhiyang Dou Chenyang Si Zhen Dong Qifeng Liu Cheng Lin Ziwei Liu Wenping Wang and Yuan Liu. 2025. Diffusion as Shader: 3D-aware Video Diffusion for Versatile Video Generation Control. CoRR abs\/2501.03847 (2025)."},{"key":"e_1_3_3_2_12_1","doi-asserted-by":"crossref","unstructured":"Xiaoguang Han Chang Gao and Yizhou Yu. 2017. DeepSketch2Face: a deep learning based sketching system for 3D face and caricature modeling. ACM Trans. Graph. 36 4 (2017) 126:1\u2013126:12.","DOI":"10.1145\/3072959.3073629"},{"key":"e_1_3_3_2_13_1","unstructured":"Hao He Yinghao Xu Yuwei Guo Gordon Wetzstein Bo Dai Hongsheng Li and Ceyuan Yang. 2024. CameraCtrl: Enabling Camera Control for Text-to-Video Generation. CoRR abs\/2404.02101 (2024)."},{"key":"e_1_3_3_2_14_1","unstructured":"Chen Hou Guoqiang Wei Yan Zeng and Zhibo Chen. 2024. Training-free Camera Control for Video Generation. CoRR abs\/2406.10126 (2024)."},{"key":"e_1_3_3_2_15_1","doi-asserted-by":"crossref","unstructured":"Zhitong Huang Mohan Zhang and Jing Liao. 2024. LVCD: Reference-based Lineart Video Colorization with Diffusion Models. ACM Trans. Graph. 43 6 (2024) 177:1\u2013177:11.","DOI":"10.1145\/3687910"},{"key":"e_1_3_3_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.632"},{"key":"e_1_3_3_2_17_1","doi-asserted-by":"crossref","unstructured":"Ondrej Jamriska S\u00e1rka Sochorov\u00e1 Ondrej Texler Michal Luk\u00e1c Jakub Fiser Jingwan Lu Eli Shechtman and Daniel S\u00fdkora. 2019. Stylizing video by example. ACM Trans. Graph. 38 4 (2019) 107:1\u2013107:11.","DOI":"10.1145\/3306346.3323006"},{"key":"e_1_3_3_2_18_1","doi-asserted-by":"crossref","unstructured":"Lifan Jiang Shuang Chen Boxi Wu Xiaotong Guan and Jiahui Zhang. 2025. VidSketch: Hand-drawn Sketch-Driven Video Generation with Diffusion Control. CoRR abs\/2502.01101 (2025).","DOI":"10.2139\/ssrn.5279103"},{"key":"e_1_3_3_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00183"},{"key":"e_1_3_3_2_20_1","first-page":"150","volume-title":"Eur. Conf. Comput. Vis.","author":"Ju Xuan","year":"2024","unstructured":"Xuan Ju, Xian Liu, Xintao Wang, Yuxuan Bian, Ying Shan, and Qiang Xu. 2024. BrushNet: A Plug-and-Play Image Inpainting Model with Decomposed Dual-Branch Diffusion. In Eur. Conf. Comput. Vis. , Vol.\u00a015078. 150\u2013168."},{"key":"e_1_3_3_2_21_1","doi-asserted-by":"crossref","unstructured":"Yoni Kasten Dolev Ofri Oliver Wang and Tali Dekel. 2021. Layered Neural Atlases for Consistent Video Editing. ACM Trans. Graph. 40 6 (2021) 210:1\u2013210:12.","DOI":"10.1145\/3478513.3480546"},{"key":"e_1_3_3_2_22_1","doi-asserted-by":"crossref","unstructured":"Bernhard Kerbl Georgios Kopanas Thomas Leimk\u00fchler and George Drettakis. 2023. 3D Gaussian Splatting for Real-Time Radiance Field Rendering. ACM Trans. Graph. 42 4 (2023) 139:1\u2013139:14.","DOI":"10.1145\/3592433"},{"key":"e_1_3_3_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"e_1_3_3_2_24_1","unstructured":"Max Ku Cong Wei Weiming Ren Harry Yang and Wenhu Chen. 2024. AnyV2V: A Plug-and-Play Framework For Any Video-to-Video Editing Tasks. CoRR abs\/2403.14468 (2024)."},{"key":"e_1_3_3_2_25_1","unstructured":"Kuaishou. 2024. https:\/\/kling.kuaishou.com"},{"key":"e_1_3_3_2_26_1","volume-title":"Open-Sora-Plan","author":"Lab PKU-Yuan","year":"2024","unstructured":"PKU-Yuan Lab and Tuzhan\u00a0AI etc.2024. Open-Sora-Plan. https:\/\/doi.org\/10.5281\/zenodo.10948109"},{"key":"e_1_3_3_2_27_1","doi-asserted-by":"crossref","unstructured":"Jinyu Li Xiaokun Pan Gan Huang Ziyang Zhang Nan Wang Hujun Bao and Guofeng Zhang. 2024. RD-VIO: Robust Visual-Inertial Odometry for Mobile Augmented Reality in Dynamic Environments. IEEE Trans. Vis. Comput. Graph. 30 10 (2024) 6941\u20136955.","DOI":"10.1109\/TVCG.2024.3353263"},{"key":"e_1_3_3_2_28_1","doi-asserted-by":"crossref","unstructured":"Xiaoyu Li Bo Zhang Jing Liao and Pedro\u00a0V. Sander. 2022. Deep Sketch-Guided Cartoon Video Inbetweening. IEEE Trans. Vis. Comput. Graph. 28 8 (2022) 2938\u20132952.","DOI":"10.1109\/TVCG.2021.3049419"},{"key":"e_1_3_3_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02092"},{"key":"e_1_3_3_2_30_1","doi-asserted-by":"crossref","unstructured":"Feng-Lin Liu Shu-Yu Chen Yu-Kun Lai Chunpeng Li Yue-Ren Jiang Hongbo Fu and Lin Gao. 2022. DeepFaceVideoEditing: sketch-based deep editing of face videos. ACM Trans. Graph. 41 4 (2022) 167:1\u2013167:16.","DOI":"10.1145\/3528223.3530056"},{"key":"e_1_3_3_2_31_1","doi-asserted-by":"crossref","unstructured":"Feng-Lin Liu Hongbo Fu Yu-Kun Lai and Lin Gao. 2024a. SketchDream: Sketch-based Text-To-3D Generation and Editing. ACM Trans. Graph. 43 4 (2024) 44:1\u201344:13.","DOI":"10.1145\/3658120"},{"key":"e_1_3_3_2_32_1","volume-title":"Adv. Neural Inform. Process. Syst.","author":"Liu Haotian","year":"2023","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong\u00a0Jae Lee. 2023a. Visual Instruction Tuning. In Adv. Neural Inform. Process. Syst."},{"key":"e_1_3_3_2_33_1","unstructured":"Kunhao Liu Ling Shao and Shijian Lu. 2024c. Novel View Extrapolation with Video Diffusion Priors. CoRR abs\/2411.14208 (2024)."},{"key":"e_1_3_3_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"e_1_3_3_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00821"},{"key":"e_1_3_3_2_36_1","unstructured":"Zhiheng Liu Hao Ouyang Qiuyu Wang Ka\u00a0Leong Cheng Jie Xiao Kai Zhu Nan Xue Yu Liu Yujun Shen and Yang Cao. 2024b. InFusion: Inpainting 3D Gaussians via Learning Depth Completion from Diffusion Prior. CoRR abs\/2404.11613 (2024)."},{"key":"e_1_3_3_2_37_1","unstructured":"Zichen Liu Yue Yu Hao Ouyang Qiuyu Wang Ka\u00a0Leong Cheng Wen Wang Zhiheng Liu Qifeng Chen and Yujun Shen. 2024d. MagicQuill: An Intelligent Interactive Image Editing System. CoRR abs\/2411.09703 (2024)."},{"key":"e_1_3_3_2_38_1","doi-asserted-by":"crossref","unstructured":"Fei Luo Yongqiong Zhu Yanping Fu Huajian Zhou Zezheng Chen and Chunxia Xiao. 2023. Sparse RGB-D images create a real thing: A flexible voxel based 3D reconstruction pipeline for single object. Vis. Informatics 7 1 (2023) 66\u201376.","DOI":"10.1016\/j.visinf.2022.12.002"},{"key":"e_1_3_3_2_39_1","doi-asserted-by":"crossref","unstructured":"Hao Ma Jingyuan Yang and Hui Huang. 2024. Taming diffusion model for exemplar-based image translation. Computational Visual Media 10 6 (2024) 1031\u20131043.","DOI":"10.1007\/s41095-023-0371-3"},{"key":"e_1_3_3_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01343"},{"key":"e_1_3_3_2_41_1","doi-asserted-by":"crossref","unstructured":"Ben Mildenhall Pratul\u00a0P. Srinivasan Matthew Tancik Jonathan\u00a0T. Barron Ravi Ramamoorthi and Ren Ng. 2022. NeRF: representing scenes as neural radiance fields for view synthesis. Commun. ACM 65 1 (2022) 99\u2013106.","DOI":"10.1145\/3503250"},{"key":"e_1_3_3_2_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01980"},{"key":"e_1_3_3_2_43_1","unstructured":"Chong Mou Mingdeng Cao Xintao Wang Zhaoyang Zhang Ying Shan and Jian Zhang. 2024a. ReVideo: Remake a Video with Motion and Content Control. CoRR abs\/2405.13865 (2024)."},{"key":"e_1_3_3_2_44_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i5.28226"},{"key":"e_1_3_3_2_45_1","unstructured":"OpenAI. 2024. Sora Overview:https:\/\/openai.com\/index\/sora\/. https:\/\/openai.com\/index\/sora\/"},{"key":"e_1_3_3_2_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00773"},{"key":"e_1_3_3_2_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3680528.3687656"},{"key":"e_1_3_3_2_48_1","doi-asserted-by":"crossref","unstructured":"Tiziano Portenier Qiyang Hu Attila Szab\u00f3 Siavash\u00a0Arjomand Bigdeli Paolo Favaro and Matthias Zwicker. 2018. Faceshop: deep sketch-based face image editing. ACM Trans. Graph. 37 4 (2018) 99.","DOI":"10.1145\/3197517.3201393"},{"key":"e_1_3_3_2_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01460"},{"key":"e_1_3_3_2_50_1","unstructured":"Alec Radford Jong\u00a0Wook Kim Chris Hallacy Aditya Ramesh Gabriel Goh Sandhini Agarwal Girish Sastry Amanda Askell Pamela Mishkin Jack Clark Gretchen Krueger and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision(Proceedings of Machine Learning Research Vol.\u00a0139). 8748\u20138763."},{"key":"e_1_3_3_2_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_3_2_52_1","doi-asserted-by":"crossref","unstructured":"Manuel Ruder Alexey Dosovitskiy and Thomas Brox. 2018. Artistic Style Transfer for Videos and Spherical Images. Int. J. Comput. Vis. 126 11 (2018) 1199\u20131219.","DOI":"10.1007\/s11263-018-1089-z"},{"key":"e_1_3_3_2_53_1","unstructured":"runway. 2024. https:\/\/runwayml.com\/research\/introducing-gen-3-alpha"},{"key":"e_1_3_3_2_54_1","volume-title":"Adv. Neural Inform. Process. Syst.","author":"Seo Junyoung","year":"2024","unstructured":"Junyoung Seo, Kazumi Fukuda, Takashi Shibuya, Takuya Narihira, Naoki Murata, Shoukang Hu, Chieh-Hsin Lai, Seungryong Kim, and Yuki Mitsufuji. 2024. GenWarp: Single Image to Novel Views with Semantic-Preserving Generative Warping. In Adv. Neural Inform. Process. Syst."},{"key":"e_1_3_3_2_55_1","first-page":"19313","volume-title":"Adv. Neural Inform. Process. Syst.","author":"Sitzmann Vincent","year":"2021","unstructured":"Vincent Sitzmann, Semon Rezchikov, Bill Freeman, Josh Tenenbaum, and Fr\u00e9do Durand. 2021. Light Field Networks: Neural Scene Representations with Single-Evaluation Rendering. In Adv. Neural Inform. Process. Syst.19313\u201319325."},{"key":"e_1_3_3_2_56_1","unstructured":"StabilityAI. 2024. https:\/\/www.stablevideo.com\/"},{"key":"e_1_3_3_2_57_1","doi-asserted-by":"crossref","unstructured":"Jia-Mu Sun Tong Wu and Lin Gao. 2024b. Recent advances in implicit representation-based 3D shape generation. Vis. Intell. 2 1 (2024).","DOI":"10.1007\/s44267-024-00042-1"},{"key":"e_1_3_3_2_58_1","unstructured":"Wenqiang Sun Shuo Chen Fangfu Liu Zilong Chen Yueqi Duan Jun Zhang and Yikai Wang. 2024a. DimensionX: Create Any 3D and 4D Scenes from a Single Image with Controllable Video Diffusion. CoRR abs\/2411.04928 (2024)."},{"key":"e_1_3_3_2_59_1","doi-asserted-by":"crossref","unstructured":"Zhenggang Tang Yuchen Fan Dilin Wang Hongyu Xu Rakesh Ranjan Alexander\u00a0G. Schwing and Zhicheng Yan. 2024. MV-DUSt3R+: Single-Stage Scene Reconstruction from Sparse Views In 2 Seconds. CoRR abs\/2412.06974 (2024).","DOI":"10.1109\/CVPR52734.2025.00498"},{"key":"e_1_3_3_2_60_1","volume-title":"Vchitect-2.0: Parallel Transformer for Scaling Up Video Diffusion Models","author":"Team Vchitect","year":"2024","unstructured":"Vchitect Team and Shanghai Artificial\u00a0Intelligence Laboratory. 2024. Vchitect-2.0: Parallel Transformer for Scaling Up Video Diffusion Models. https:\/\/github.com\/Vchitect\/Vchitect-2.0"},{"key":"e_1_3_3_2_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3550469.3555382"},{"key":"e_1_3_3_2_62_1","doi-asserted-by":"crossref","unstructured":"Lili Wang Qinglin Qi Yi Chen Wei Ke and Aimin Hao. 2014. Interactive texture design and synthesis from mesh sketches. Frontiers Comput. Sci. 8 2 (2014) 330\u2013341.","DOI":"10.1007\/s11704-014-3285-5"},{"key":"e_1_3_3_2_63_1","doi-asserted-by":"crossref","unstructured":"Qinghe Wang Yawen Luo Xiaoyu Shi Xu Jia Huchuan Lu Tianfan Xue Xintao Wang Pengfei Wan Di Zhang and Kun Gai. 2025. CineMaster: A 3D-Aware and Controllable Framework for Cinematic Text-to-Video Generation. CoRR abs\/2502.08639 (2025).","DOI":"10.1145\/3721238.3730755"},{"key":"e_1_3_3_2_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01956"},{"key":"e_1_3_3_2_65_1","unstructured":"Wen Wang Kangyang Xie Zide Liu Hao Chen Yue Cao Xinlong Wang and Chunhua Shen. 2023. Zero-Shot Video Editing Using Off-The-Shelf Image Diffusion Models. CoRR abs\/2303.17599 (2023)."},{"key":"e_1_3_3_2_66_1","unstructured":"Yuelei Wang Jian Zhang Peng-Tao Jiang Hao Zhang Jinwei Chen and Bo Li. 2024c. CPA: Camera-pose-awareness Diffusion Transformer for Video Generation. CoRR abs\/2412.01429 (2024)."},{"key":"e_1_3_3_2_67_1","doi-asserted-by":"publisher","DOI":"10.1145\/3641519.3657518"},{"key":"e_1_3_3_2_68_1","unstructured":"Shuchen Weng Haojie Zheng Peixuan Zhang Yuchen Hong Han Jiang Si Li and Boxin Shi. 2024. VIRES: Video Instance Repainting with Sketch and Text Guidance. CoRR abs\/2411.16199 (2024)."},{"key":"e_1_3_3_2_69_1","unstructured":"Rundi Wu Ruiqi Gao Ben Poole Alex Trevithick Changxi Zheng Jonathan\u00a0T. Barron and Aleksander Holynski. 2024. CAT4D: Create Anything in 4D with Multi-View Video Diffusion Models. CoRR abs\/2411.18613 (2024)."},{"key":"e_1_3_3_2_70_1","doi-asserted-by":"crossref","unstructured":"Haoran Xie Keisuke Arihara Syuhei Sato and Kazunori Miyata. 2024b. DualSmoke: Sketch-based smoke illustration design with two-stage generative model. Computational Visual Media 10 5 (2024) 965\u2013979.","DOI":"10.1007\/s41095-022-0318-0"},{"key":"e_1_3_3_2_71_1","unstructured":"Tianhao Xie Noam Aigerman Eugene Belilovsky and Tiberiu Popa. 2024a. Sketch-guided Cage-based 3D Gaussian Splatting Deformation. CoRR abs\/2411.12168 (2024)."},{"key":"e_1_3_3_2_72_1","doi-asserted-by":"crossref","unstructured":"Jinbo Xing Hanyuan Liu Menghan Xia Yong Zhang Xintao Wang Ying Shan and Tien-Tsin Wong. 2024. ToonCrafter: Generative Cartoon Interpolation. ACM Trans. Graph. 43 6 (2024) 245:1\u2013245:11.","DOI":"10.1145\/3687761"},{"key":"e_1_3_3_2_73_1","unstructured":"Dejia Xu Weili Nie Chao Liu Sifei Liu Jan Kautz Zhangyang Wang and Arash Vahdat. 2024. CamCo: Camera-Controllable 3D-Consistent Image-to-Video Generation. CoRR abs\/2406.02509 (2024)."},{"key":"e_1_3_3_2_74_1","doi-asserted-by":"crossref","unstructured":"Yukun Xu Keyang Ye Tianjia Shao and Yanlin Weng. 2025. Animatable 3D Gaussians for modeling dynamic humans. Frontiers Comput. Sci. 19 9 (2025) 199704.","DOI":"10.1007\/s11704-024-40497-5"},{"key":"e_1_3_3_2_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01763"},{"key":"e_1_3_3_2_76_1","unstructured":"Lihe Yang Bingyi Kang Zilong Huang Zhen Zhao Xiaogang Xu Jiashi Feng and Hengshuang Zhao. 2024b. Depth Anything V2. CoRR abs\/2406.09414 (2024)."},{"key":"e_1_3_3_2_77_1","doi-asserted-by":"publisher","DOI":"10.1145\/3641519.3657481"},{"key":"e_1_3_3_2_78_1","doi-asserted-by":"publisher","DOI":"10.1145\/3610548.3618160"},{"key":"e_1_3_3_2_79_1","unstructured":"Zhuoyi Yang Jiayan Teng Wendi Zheng Ming Ding Shiyu Huang Jiazheng Xu Yuanming Yang Wenyi Hong Xiaohan Zhang Guanyu Feng Da Yin Xiaotao Gu Yuxuan Zhang Weihan Wang Yean Cheng Ting Liu Bin Xu Yuxiao Dong and Jie Tang. 2024c. CogVideoX: Text-to-Video Diffusion Models with An Expert Transformer. CoRR abs\/2408.06072 (2024)."},{"key":"e_1_3_3_2_80_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00457"},{"key":"e_1_3_3_2_81_1","unstructured":"Wangbo Yu Jinbo Xing Li Yuan Wenbo Hu Xiaoyu Li Zhipeng Huang Xiangjun Gao Tien-Tsin Wong Ying Shan and Yonghong Tian. 2024. ViewCrafter: Taming Video Diffusion Models for High-fidelity Novel View Synthesis. CoRR abs\/2409.02048 (2024)."},{"key":"e_1_3_3_2_82_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00586"},{"key":"e_1_3_3_2_83_1","unstructured":"Junyi Zhang Charles Herrmann Junhwa Hur Varun Jampani Trevor Darrell Forrester Cole Deqing Sun and Ming-Hsuan Yang. 2024b. MonST3R: A Simple Approach for Estimating Geometry in the Presence of Motion. CoRR abs\/2410.03825 (2024)."},{"key":"e_1_3_3_2_84_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_3_2_85_1","unstructured":"Tingyang Zhang Qingzhe Gao Weiyu Li Libin Liu and Baoquan Chen. 2024a. BAGS: Building Animatable Gaussian Splatting from a Monocular Video with Diffusion Priors. CoRR abs\/2403.11427 (2024)."},{"key":"e_1_3_3_2_86_1","unstructured":"Yabo Zhang Xinpeng Zhou Yihan Zeng Hang Xu Hui Li and Wangmeng Zuo. 2025. FramePainter: Endowing Interactive Image Editing with Video Diffusion Priors. CoRR abs\/2501.08225 (2025)."},{"key":"e_1_3_3_2_87_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00684"},{"key":"e_1_3_3_2_88_1","unstructured":"Guangcong Zheng Teng Li Rui Jiang Yehao Lu Tao Wu and Xi Li. 2024a. CamI2V: Camera-Controlled Image-to-Video Diffusion Model. CoRR abs\/2410.15957 (2024)."},{"key":"e_1_3_3_2_89_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680641"},{"key":"e_1_3_3_2_90_1","doi-asserted-by":"crossref","unstructured":"Xin-Yang Zheng Hao Pan Peng-Shuai Wang Xin Tong Yang Liu and Heung-Yeung Shum. 2023. Locally Attentional SDF Diffusion for Controllable 3D Shape Generation. ACM Trans. Graph. 42 4 (2023) 91:1\u201391:13.","DOI":"10.1145\/3592103"},{"key":"e_1_3_3_2_91_1","volume-title":"Open-Sora: Democratizing Efficient Video Production for All","author":"Zheng Zangwei","year":"2024","unstructured":"Zangwei Zheng, Xiangyu Peng, Tianji Yang, Chenhui Shen, Shenggui Li, Hongxin Liu, Yukun Zhou, Tianyi Li, and Yang You. 2024b. Open-Sora: Democratizing Efficient Video Production for All. https:\/\/github.com\/hpcaitech\/Open-Sora"},{"key":"e_1_3_3_2_92_1","doi-asserted-by":"crossref","unstructured":"Tinghui Zhou Richard Tucker John Flynn Graham Fyffe and Noah Snavely. 2018. Stereo magnification: learning view synthesis using multiplane images. ACM Trans. Graph. 37 4 (2018) 65.","DOI":"10.1145\/3197517.3201323"},{"key":"e_1_3_3_2_93_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.244"}],"event":{"name":"SIGGRAPH Conference Papers '25: Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers","location":"Vancouver BC Canada","acronym":"SIGGRAPH Conference Papers '25","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Proceedings of the Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3721238.3730623","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T14:59:57Z","timestamp":1774018797000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721238.3730623"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,27]]},"references-count":92,"alternative-id":["10.1145\/3721238.3730623","10.1145\/3721238"],"URL":"https:\/\/doi.org\/10.1145\/3721238.3730623","relation":{},"subject":[],"published":{"date-parts":[[2025,7,27]]},"assertion":[{"value":"2025-07-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}