{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,17]],"date-time":"2026-07-17T06:09:26Z","timestamp":1784268566513,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":59,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62441231,62293542,62472065,U23B2010"],"award-info":[{"award-number":["62441231,62293542,62472065,U23B2010"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"CUHK Direct Grants (RCFUS)","award":["4055189"],"award-info":[{"award-number":["4055189"]}]},{"name":"RGC Early Career Scheme (ECS)","award":["24209224"],"award-info":[{"award-number":["24209224"]}]},{"name":"Liao Ning Province Science and Technology Plan","award":["2023JH26,10200016"],"award-info":[{"award-number":["2023JH26,10200016"]}]},{"name":"Dalian City Science and Technology Innovation Fund","award":["2023JJ11CG001"],"award-info":[{"award-number":["2023JJ11CG001"]}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["DUT22ZD210"],"award-info":[{"award-number":["DUT22ZD210"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,10]]},"DOI":"10.1145\/3721238.3730755","type":"proceedings-article","created":{"date-parts":[[2025,7,23]],"date-time":"2025-07-23T08:42:43Z","timestamp":1753260163000},"page":"1-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["CineMaster: A 3D-Aware and Controllable Framework for Cinematic Text-to-Video Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6908-5485","authenticated-orcid":false,"given":"Qinghe","family":"Wang","sequence":"first","affiliation":[{"name":"Dalian University of Technology, Dalian, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-3716-8914","authenticated-orcid":false,"given":"Yawen","family":"Luo","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong, Hong Kong, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-3696-4442","authenticated-orcid":false,"given":"Xiaoyu","family":"Shi","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3168-3505","authenticated-orcid":false,"given":"Xu","family":"Jia","sequence":"additional","affiliation":[{"name":"Dalian University of Technology, Dalian, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6668-9758","authenticated-orcid":false,"given":"Huchuan","family":"Lu","sequence":"additional","affiliation":[{"name":"Dalian University of Technology, Dalian, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5031-6618","authenticated-orcid":false,"given":"Tianfan","family":"Xue","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong, Hong Kong, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6585-8604","authenticated-orcid":false,"given":"Xintao","family":"Wang","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7225-565X","authenticated-orcid":false,"given":"Pengfei","family":"Wan","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5475-2728","authenticated-orcid":false,"given":"Di","family":"Zhang","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3636-3618","authenticated-orcid":false,"given":"Kun","family":"Gai","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,7,27]]},"reference":[{"key":"e_1_3_3_2_2_1","doi-asserted-by":"crossref","unstructured":"Sherwin Bahmani Ivan Skorokhodov Guocheng Qian Aliaksandr Siarohin Willi Menapace Andrea Tagliasacchi David\u00a0B Lindell and Sergey Tulyakov. 2024. AC3D: Analyzing and Improving 3D Camera Control in Video Diffusion Transformers. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.18673 (2024).","DOI":"10.1109\/CVPR52734.2025.02130"},{"key":"e_1_3_3_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3641519.3657525"},{"key":"e_1_3_3_2_4_1","doi-asserted-by":"crossref","unstructured":"Weikang Bian Zhaoyang Huang Xiaoyu Shi Yijin Li Fu-Yun Wang and Hongsheng Li. 2025. GS-DiT: Advancing Video Generation with Pseudo 4D Gaussian Fields through Efficient Dense 3D Point Tracking. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2501.02690 (2025).","DOI":"10.1109\/CVPR52734.2025.02023"},{"key":"e_1_3_3_2_5_1","unstructured":"Andreas Blattmann Tim Dockhorn Sumith Kulal Daniel Mendelevitch Maciej Kilian Dominik Lorenz Yam Levi Zion English Vikram Voleti Adam Letts et\u00a0al. 2023. Stable video diffusion: Scaling latent video diffusion models to large datasets. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.15127 (2023)."},{"key":"e_1_3_3_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00727"},{"key":"e_1_3_3_2_7_1","unstructured":"Haoxin Chen Menghan Xia Yingqing He Yong Zhang Xiaodong Cun Shaoshu Yang Jinbo Xing Yaofang Liu Qifeng Chen Xintao Wang et\u00a0al. 2023b. Videocrafter1: Open diffusion models for high-quality video generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.19512 (2023)."},{"key":"e_1_3_3_2_8_1","unstructured":"Weifeng Chen Yatai Ji Jie Wu Hefeng Wu Pan Xie Jiashi Li Xin Xia Xuefeng Xiao and Liang Lin. 2023a. Control-a-video: Controllable text-to-video generation with diffusion models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.13840 (2023)."},{"key":"e_1_3_3_2_9_1","unstructured":"Yingjie Chen Yifang Men Yuan Yao Miaomiao Cui and Liefeng Bo. 2025. Perception-as-Control: Fine-grained Controllable Image Animation with 3D-aware Motion Representation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2501.05020 (2025)."},{"key":"e_1_3_3_2_10_1","volume-title":"Blender - a 3D modelling and rendering package","author":"Community Blender\u00a0Online","year":"2018","unstructured":"Blender\u00a0Online Community. 2018. Blender - a 3D modelling and rendering package. Blender Foundation, Stichting Blender Foundation, Amsterdam. http:\/\/www.blender.org"},{"key":"e_1_3_3_2_11_1","unstructured":"Mostafa Dehghani Basil Mustafa Josip Djolonga Jonathan Heek Matthias Minderer Mathilde Caron Andreas Steiner Joan Puigcerver Robert Geirhos Ibrahim\u00a0M Alabdulmohsin et\u00a0al. 2024. Patch n\u2019pack: Navit a vision transformer for any aspect ratio and resolution. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_2_12_1","volume-title":"Forty-first International Conference on Machine Learning","author":"Esser Patrick","year":"2024","unstructured":"Patrick Esser, Sumith Kulal, Andreas Blattmann, Rahim Entezari, Jonas M\u00fcller, Harry Saini, Yam Levi, Dominik Lorenz, Axel Sauer, Frederic Boesel, et\u00a0al. 2024. Scaling rectified flow transformers for high-resolution image synthesis. In Forty-first International Conference on Machine Learning."},{"key":"e_1_3_3_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00641"},{"key":"e_1_3_3_2_14_1","unstructured":"Xiao Fu Xian Liu Xintao Wang Sida Peng Menghan Xia Xiaoyu Shi Ziyang Yuan Pengfei Wan Di Zhang and Dahua Lin. 2024. 3DTrajMaster: Mastering 3D Trajectory for Multi-Entity Motion in Video Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.07759 (2024)."},{"key":"e_1_3_3_2_15_1","unstructured":"Zekai Gu Rui Yan Jiahao Lu Peng Li Zhiyang Dou Chenyang Si Zhen Dong Qifeng Liu Cheng Lin Ziwei Liu et\u00a0al. 2025. Diffusion as Shader: 3D-aware Video Diffusion for Versatile Video Generation Control. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2501.03847 (2025)."},{"key":"e_1_3_3_2_16_1","first-page":"330","volume-title":"European Conference on Computer Vision","author":"Guo Yuwei","year":"2024","unstructured":"Yuwei Guo, Ceyuan Yang, Anyi Rao, Maneesh Agrawala, Dahua Lin, and Bo Dai. 2024. Sparsectrl: Adding sparse controls to text-to-video diffusion models. In European Conference on Computer Vision. Springer, 330\u2013348."},{"key":"e_1_3_3_2_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72946-1_19"},{"key":"e_1_3_3_2_18_1","unstructured":"Yuwei Guo Ceyuan Yang Anyi Rao Zhengyang Liang Yaohui Wang Yu Qiao Maneesh Agrawala Dahua Lin and Bo Dai. 2023. Animatediff: Animate your personalized text-to-image diffusion models without specific tuning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.04725 (2023)."},{"key":"e_1_3_3_2_19_1","unstructured":"Hao He Yinghao Xu Yuwei Guo Gordon Wetzstein Bo Dai Hongsheng Li and Ceyuan Yang. 2024. CameraCtrl: Enabling Camera Control for Text-to-Video Generation. arxiv:https:\/\/arXiv.org\/abs\/2404.02101\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2404.02101"},{"key":"e_1_3_3_2_20_1","unstructured":"Jonathan Ho Ajay Jain and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in neural information processing systems 33 (2020) 6840\u20136851."},{"key":"e_1_3_3_2_21_1","unstructured":"Jonathan Ho and Tim Salimans. 2022. Classifier-free diffusion guidance. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2207.12598 (2022)."},{"key":"e_1_3_3_2_22_1","volume-title":"International Conference on Learning Representations","author":"Hu Edward\u00a0J","year":"2022","unstructured":"Edward\u00a0J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2022. LoRA: Low-Rank Adaptation of Large Language Models. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=nZeVKeeFYf9"},{"key":"e_1_3_3_2_23_1","first-page":"8153","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Hu Li","year":"2024","unstructured":"Li Hu. 2024. Animate anyone: Consistent and controllable image-to-video synthesis for character animation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 8153\u20138163."},{"key":"e_1_3_3_2_24_1","volume-title":"Thirty-seventh Conference on Neural Information Processing Systems","author":"Hu Minghui","year":"2023","unstructured":"Minghui Hu, Jianbin Zheng, Daqing Liu, Chuanxia Zheng, Chaoyue Wang, Dacheng Tao, and Tat-Jen Cham. 2023. Cocktail: Mixing multi-modality control for text-conditional image generation. In Thirty-seventh Conference on Neural Information Processing Systems."},{"key":"e_1_3_3_2_25_1","unstructured":"Lianghua Huang Di Chen Yu Liu Yujun Shen Deli Zhao and Jingren Zhou. 2023. Composer: Creative and controllable image synthesis with composable conditions. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2302.09778 (2023)."},{"key":"e_1_3_3_2_26_1","unstructured":"Diederik\u00a0P Kingma. 2013. Auto-encoding variational bayes. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1312.6114 (2013)."},{"key":"e_1_3_3_2_27_1","unstructured":"Diederik\u00a0P Kingma. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1412.6980 (2014)."},{"key":"e_1_3_3_2_28_1","unstructured":"Mathis Koroglu Hugo Caselles-Dupr\u00e9 Guillaume\u00a0Jeanneret Sanmiguel and Matthieu Cord. 2024. OnlyFlow: Optical Flow based Motion Conditioning for Video Diffusion Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.10501 (2024)."},{"key":"e_1_3_3_2_29_1","unstructured":"Han Lin Jaemin Cho Abhay Zala and Mohit Bansal. 2024. Ctrl-Adapter: An Efficient and Versatile Framework for Adapting Diverse Controls to Any Diffusion Model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.09967 (2024)."},{"key":"e_1_3_3_2_30_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_3_2_31_1","unstructured":"Yaron Lipman Ricky\u00a0TQ Chen Heli Ben-Hamu Maximilian Nickel and Matt Le. 2022. Flow matching for generative modeling. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2210.02747 (2022)."},{"key":"e_1_3_3_2_32_1","unstructured":"Shilong Liu Zhaoyang Zeng Tianhe Ren Feng Li Hao Zhang Jie Yang Chunyuan Li Jianwei Yang Hang Su Jun Zhu et\u00a0al. 2023. Grounding dino: Marrying dino with grounded pre-training for open-set object detection. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.05499 (2023)."},{"key":"e_1_3_3_2_33_1","unstructured":"Mario Lucic Karol Kurach Marcin Michalski Sylvain Gelly and Olivier Bousquet. 2017. Are gans created equal? a large-scale study. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1711.10337 (2017)."},{"key":"e_1_3_3_2_34_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i5.28226"},{"key":"e_1_3_3_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"e_1_3_3_2_36_1","unstructured":"Colin Raffel Noam Shazeer Adam Roberts Katherine Lee Sharan Narang Michael Matena Yanqi Zhou Wei Li and Peter\u00a0J Liu. 2020. Exploring the limits of transfer learning with a unified text-to-text transformer. Journal of machine learning research 21 140 (2020) 1\u201367."},{"key":"e_1_3_3_2_37_1","unstructured":"Nikhila Ravi Valentin Gabeur Yuan-Ting Hu Ronghang Hu Chaitanya Ryali Tengyu Ma Haitham Khedr Roman R\u00e4dle Chloe Rolland Laura Gustafson Eric Mintun Junting Pan Kalyan\u00a0Vasudev Alwala Nicolas Carion Chao-Yuan Wu Ross Girshick Piotr Doll\u00e1r and Christoph Feichtenhofer. 2024. SAM 2: Segment Anything in Images and Videos. arxiv:https:\/\/arXiv.org\/abs\/2408.00714\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2408.00714"},{"key":"e_1_3_3_2_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_3_2_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00852"},{"key":"e_1_3_3_2_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3641519.3657497"},{"key":"e_1_3_3_2_41_1","unstructured":"Xincheng Shuai Henghui Ding Zhenyuan Qin Hao Luo Xingjun Ma and Dacheng Tao. 2025. Free-Form Motion Control: A Synthetic Video Generation Dataset with Controllable Camera and Object Motions. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2501.01425 (2025)."},{"key":"e_1_3_3_2_42_1","unstructured":"Jiaming Song Chenlin Meng and Stefano Ermon. 2020. Denoising diffusion implicit models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2010.02502 (2020)."},{"key":"e_1_3_3_2_43_1","first-page":"92","volume-title":"European Conference on Computer Vision","author":"Sun Yanan","year":"2024","unstructured":"Yanan Sun, Yanchen Liu, Yinhao Tang, Wenjie Pei, and Kai Chen. 2024. Anycontrol: create your artwork with versatile control on text-to-image generation. In European Conference on Computer Vision. Springer, 92\u2013109."},{"key":"e_1_3_3_2_44_1","unstructured":"Thomas Unterthiner Sjoerd Van\u00a0Steenkiste Karol Kurach Raphael Marinier Marcin Michalski and Sylvain Gelly. 2018. Towards accurate generative models of video: A new metric & challenges. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1812.01717 (2018)."},{"key":"e_1_3_3_2_45_1","unstructured":"Cong Wang Jiaxi Gu Panwen Hu Haoyu Zhao Yuanfan Guo Jianhua Han Hang Xu and Xiaodan Liang. 2024a. EasyControl: Transfer ControlNet to Video Diffusion for Controllable Generation and Interpolation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2408.13005 (2024)."},{"key":"e_1_3_3_2_46_1","unstructured":"Xiang Wang Hangjie Yuan Shiwei Zhang Dayou Chen Jiuniu Wang Yingya Zhang Yujun Shen Deli Zhao and Jingren Zhou. 2023. VideoComposer: Compositional Video Synthesis with Motion Controllability. http:\/\/arxiv.org\/abs\/2306.02018 arXiv:https:\/\/arXiv.org\/abs\/2306.02018 [cs]."},{"key":"e_1_3_3_2_47_1","doi-asserted-by":"crossref","unstructured":"Zhouxia Wang Ziyang Yuan Xintao Wang Tianshui Chen Menghan Xia Ping Luo and Ying Shan. 2024b. MotionCtrl: A Unified and Flexible Motion Controller for Video Generation. https:\/\/doi.org\/10.48550\/arXiv.2312.03641 arXiv:https:\/\/arXiv.org\/abs\/2312.03641 [cs].","DOI":"10.1145\/3641519.3657518"},{"key":"e_1_3_3_2_48_1","unstructured":"Jianzong Wu Xiangtai Li Yanhong Zeng Jiangning Zhang Qianyu Zhou Yining Li Yunhai Tong and Kai Chen. 2024. Motionbooth: Motion-aware customized text-to-video generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.17758 (2024)."},{"key":"e_1_3_3_2_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01929"},{"key":"e_1_3_3_2_50_1","unstructured":"Zeqi Xiao Wenqi Ouyang Yifan Zhou Shuai Yang Lei Yang Jianlou Si and Xingang Pan. 2024a. Trajectory Attention for Fine-grained Video Motion Control. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.19324 (2024)."},{"key":"e_1_3_3_2_51_1","unstructured":"An Yang Baosong Yang Binyuan Hui Bo Zheng Bowen Yu Chang Zhou Chengpeng Li Chengyuan Li Dayiheng Liu Fei Huang Guanting Dong Haoran Wei Huan Lin Jialong Tang Jialin Wang Jian Yang Jianhong Tu Jianwei Zhang Jianxin Ma Jin Xu Jingren Zhou Jinze Bai Jinzheng He Junyang Lin Kai Dang Keming Lu Keqin Chen Kexin Yang Mei Li Mingfeng Xue Na Ni Pei Zhang Peng Wang Ru Peng Rui Men Ruize Gao Runji Lin Shijie Wang Shuai Bai Sinan Tan Tianhang Zhu Tianhao Li Tianyu Liu Wenbin Ge Xiaodong Deng Xiaohuan Zhou Xingzhang Ren Xinyu Zhang Xipin Wei Xuancheng Ren Yang Fan Yang Yao Yichang Zhang Yu Wan Yunfei Chu Yuqiong Liu Zeyu Cui Zhenru Zhang and Zhihao Fan. 2024c. Qwen2 Technical Report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.10671 (2024)."},{"key":"e_1_3_3_2_52_1","unstructured":"Lihe Yang Bingyi Kang Zilong Huang Zhen Zhao Xiaogang Xu Jiashi Feng and Hengshuang Zhao. 2024b. Depth Anything V2. arXiv:https:\/\/arXiv.org\/abs\/2406.09414 (2024)."},{"key":"e_1_3_3_2_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3641519.3657481"},{"key":"e_1_3_3_2_54_1","unstructured":"Shengming Yin Chenfei Wu Jian Liang Jie Shi Houqiang Li Gong Ming and Nan Duan. 2023. DragNUWA: Fine-grained Control in Video Generation by Integrating Text Image and Trajectory. arxiv:https:\/\/arXiv.org\/abs\/2308.08089\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2308.08089"},{"key":"e_1_3_3_2_55_1","unstructured":"Junyi Zhang Charles Herrmann Junhwa Hur Varun Jampani Trevor Darrell Forrester Cole Deqing Sun and Ming-Hsuan Yang. 2024a. MonST3R: A Simple Approach for Estimating Geometry in the Presence of Motion. arXiv preprint arxiv:https:\/\/arXiv.org\/abs\/2410.03825 (2024)."},{"key":"e_1_3_3_2_56_1","doi-asserted-by":"crossref","unstructured":"Lvmin Zhang Anyi Rao and Maneesh Agrawala. 2023. Adding Conditional Control to Text-to-Image Diffusion Models.","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_3_2_57_1","unstructured":"Zhenghao Zhang Junchao Liao Menghao Li Long Qin and Weizhi Wang. 2024b. Tora: Trajectory-oriented Diffusion Transformer for Video Generation. http:\/\/arxiv.org\/abs\/2407.21705 arXiv:https:\/\/arXiv.org\/abs\/2407.21705 [cs]."},{"key":"e_1_3_3_2_58_1","doi-asserted-by":"crossref","unstructured":"Shihao Zhao Dongdong Chen Yen-Chun Chen Jianmin Bao Shaozhe Hao Lu Yuan and Kwan-Yee\u00a0K Wong. 2023. Uni-controlnet: All-in-one control to text-to-image diffusion models. Advances in Neural Information Processing Systems 36 (2023) 11127\u201311150.","DOI":"10.52202\/075280-0491"},{"key":"e_1_3_3_2_59_1","unstructured":"Guangcong Zheng Teng Li Rui Jiang Yehao Lu Tao Wu and Xi Li. 2024. Cami2v: Camera-controlled image-to-video diffusion model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.15957 (2024)."},{"key":"e_1_3_3_2_60_1","unstructured":"Tinghui Zhou Richard Tucker John Flynn Graham Fyffe and Noah Snavely. 2018. Stereo Magnification: Learning View Synthesis using Multiplane Images. arxiv:https:\/\/arXiv.org\/abs\/1805.09817\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/1805.09817"}],"event":{"name":"SIGGRAPH Conference Papers '25: Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers","location":"Vancouver BC Canada","acronym":"SIGGRAPH Conference Papers '25","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Proceedings of the Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3721238.3730755","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T15:02:15Z","timestamp":1774018935000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721238.3730755"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,27]]},"references-count":59,"alternative-id":["10.1145\/3721238.3730755","10.1145\/3721238"],"URL":"https:\/\/doi.org\/10.1145\/3721238.3730755","relation":{},"subject":[],"published":{"date-parts":[[2025,7,27]]},"assertion":[{"value":"2025-07-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}