{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T03:15:14Z","timestamp":1774667714689,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":60,"publisher":"ACM","funder":[{"name":"Young Elite Scientists Sponsorship Program by the Chinese Association for Science and Technology (CAST)"},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"Huawei-BUAA Joint Lab","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62461160331, 62132001"],"award-info":[{"award-number":["62461160331, 62132001"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3754502","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T05:56:43Z","timestamp":1761371803000},"page":"9237-9246","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Multi-Object Sketch Animation with Grouping and Motion Trajectory Priors"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-0583-7098","authenticated-orcid":false,"given":"Guotao","family":"Liang","sequence":"first","affiliation":[{"name":"School of Software, Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-2044-8314","authenticated-orcid":false,"given":"Juncheng","family":"Hu","sequence":"additional","affiliation":[{"name":"School of Software, Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7961-4499","authenticated-orcid":false,"given":"Ximing","family":"Xing","sequence":"additional","affiliation":[{"name":"School of Software, Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3516-0111","authenticated-orcid":false,"given":"Jing","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Software, Beihang University, Beijing, China and Qingdao Research Institute, Beihang University, Qingdao, Shandong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0538-7940","authenticated-orcid":false,"given":"Qian","family":"Yu","sequence":"additional","affiliation":[{"name":"School of Software, Beihang University, Beijing, China and Qingdao Research Institute, Beihang University, Qingdao, Shandong, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.02644"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/2461912.2461964"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3414685.3417840"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19790-1_21"},{"key":"e_1_3_2_1_5_1","unstructured":"Haoxin Chen Menghan Xia Yingqing He Yong Zhang Xiaodong Cun Shaoshu Yang Jinbo Xing Yaofang Liu Qifeng Chen Xintao Wang et al. 2023. Videocrafter1: Open diffusion models for high-quality video generation. arXiv preprint arXiv:2310.19512 (2023)."},{"key":"e_1_3_2_1_6_1","volume-title":"Sketch-pix2seq: a model to generate sketches of multiple categories. arXiv preprint arXiv:1709.04121","author":"Chen Yajing","year":"2017","unstructured":"Yajing Chen, Shikui Tu, Yuqi Yi, and Lei Xu. 2017. Sketch-pix2seq: a model to generate sketches of multiple categories. arXiv preprint arXiv:1709.04121 (2017)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"James Davis Maneesh Agrawala Erika Chuang Zoran Popovi\u0107 and David Salesin. 2006. A sketching interface for articulated figure animation. In Acm siggraph 2006 courses. 15-es.","DOI":"10.1145\/1185657.1185776"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1038\/s44159-023-00212-w"},{"key":"e_1_3_2_1_9_1","volume-title":"Daniel LK Yamins, and Nicholas B Turk-Browne","author":"Fan Judith E","year":"2018","unstructured":"Judith E Fan, Daniel LK Yamins, and Nicholas B Turk-Browne. 2018. Common object representations for visual production and recognition. Cognitive science, Vol. 42, 8 (2018), 2670-2698."},{"key":"e_1_3_2_1_10_1","first-page":"5207","article-title":"Clipdraw: Exploring text-to-drawing synthesis through language-image encoders","volume":"35","author":"Frans Kevin","year":"2022","unstructured":"Kevin Frans, Lisa Soros, and Olaf Witkowski. 2022. Clipdraw: Exploring text-to-drawing synthesis through language-image encoders. Advances in Neural Information Processing Systems, Vol. 35 (2022), 5207-5218.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00414"},{"key":"e_1_3_2_1_12_1","volume-title":"A neural representation of sketch drawings. arXiv preprint arXiv:1704.03477","author":"Ha David","year":"2017","unstructured":"David Ha and Douglas Eck. 2017. A neural representation of sketch drawings. arXiv preprint arXiv:1704.03477 (2017)."},{"key":"e_1_3_2_1_13_1","volume-title":"International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=Hy6GHpkCW","author":"Ha David","year":"2018","unstructured":"David Ha and Douglas Eck. 2018. A Neural Representation of Sketch Drawings. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=Hy6GHpkCW"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1177\/0301006620908207"},{"key":"e_1_3_2_1_15_1","volume-title":"VectorPainter: Advanced Stylized Vector Graphics Synthesis Using Stroke-Style Priors. In 2025 IEEE International Conference on Multimedia and Expo (ICME). IEEE, 1-6.","author":"Hu Juncheng","year":"2025","unstructured":"Juncheng Hu, Ximing Xing, Jing Zhang, and Qian Yu. 2025. VectorPainter: Advanced Stylized Vector Graphics Synthesis Using Stroke-Style Priors. In 2025 IEEE International Conference on Multimedia and Expo (ICME). IEEE, 1-6."},{"key":"e_1_3_2_1_16_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 8153-8163","author":"Hu Li","year":"2024","unstructured":"Li Hu. 2024. Animate anyone: Consistent and controllable image-to-video synthesis for character animation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 8153-8163."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093440"},{"key":"e_1_3_2_1_18_1","volume-title":"s c. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. s c. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_19_1","volume-title":"Artisketch: A system for articulated sketch modeling. In Computer Graphics Forum","author":"Levi Zohar","year":"2013","unstructured":"Zohar Levi and Craig Gotsman. 2013. Artisketch: A system for articulated sketch modeling. In Computer Graphics Forum, Vol. 32. Wiley Online Library, 235-244."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2019.00154"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3414685.3417763"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0963-9"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00679"},{"key":"e_1_3_2_1_24_1","volume-title":"Learning to draw: Emergent communication through sketching. Advances in neural information processing systems","author":"Mihai Daniela","year":"2021","unstructured":"Daniela Mihai and Jonathon Hare. 2021. Learning to draw: Emergent communication through sketching. Advances in neural information processing systems, Vol. 34 (2021), 7153-7166."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503250"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3450626.3459833"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19772-7_1"},{"key":"e_1_3_2_1_28_1","unstructured":"OpenAI. 2023. Introducing ChatGPT. https:\/\/openai.com\/index\/chatgpt\/."},{"key":"e_1_3_2_1_29_1","unstructured":"Maxime Oquab Timoth\u00e9e Darcet Th\u00e9o Moutakanni Huy Vo Marc Szafraniec Vasil Khalidov Pierre Fernandez Daniel Haziza Francisco Massa Alaaeldin El-Nouby et al. 2023. Dinov2: Learning robust visual features without supervision. arXiv preprint arXiv:2304.07193 (2023)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/2485895.2485916"},{"key":"e_1_3_2_1_31_1","volume-title":"Sketch-based skeleton-driven 2D animation and motion capture","author":"Pan Junjun","unstructured":"Junjun Pan and Jian J Zhang. 2011. Sketch-based skeleton-driven 2D animation and motion capture. Springer."},{"key":"e_1_3_2_1_32_1","volume-title":"The Eleventh International Conference on Learning Representations (ICLR).","author":"Poole Ben","year":"2023","unstructured":"Ben Poole, Ajay Jain, Jonathan T. Barron, and Ben Mildenhall. 2023. DreamFusion: Text-to-3D using 2D Diffusion. In The Eleventh International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_33_1","unstructured":"Zhiyu Qu Tao Xiang and Yi-Zhe Song. 2023. SketchDreamer: Interactive Text-Augmented Creative Sketch Ideation. In BMVC."},{"key":"e_1_3_2_1_34_1","volume-title":"International conference on machine learning. PmLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748-8763."},{"key":"e_1_3_2_1_35_1","volume-title":"Enhancing Sketch Animation: Text-to-Video Diffusion Models with Temporal Consistency and Rigidity Constraints. arXiv preprint arXiv:2411.19381","author":"Rai Gaurav","year":"2024","unstructured":"Gaurav Rai and Ojaswa Sharma. 2024. Enhancing Sketch Animation: Text-to-Video Diffusion Models with Temporal Consistency and Rigidity Constraints. arXiv preprint arXiv:2411.19381 (2024)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01416"},{"key":"e_1_3_2_1_37_1","volume-title":"Denoising Diffusion Implicit Models. In International Conference on Learning Representations (ICLR).","author":"Song Jiaming","year":"2021","unstructured":"Jiaming Song, Chenlin Meng, and Stefano Ermon. 2021. Denoising Diffusion Implicit Models. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00090"},{"key":"e_1_3_2_1_39_1","first-page":"16083","article-title":"Any-to-any generation via composable diffusion","volume":"36","author":"Tang Zineng","year":"2023","unstructured":"Zineng Tang, Ziyi Yang, Chenguang Zhu, Michael Zeng, and Mohit Bansal. 2023. Any-to-any generation via composable diffusion. Advances in Neural Information Processing Systems, Vol. 36 (2023), 16083-16099.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_40_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00383"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3528223.3530068"},{"key":"e_1_3_2_1_43_1","volume-title":"Kristine Zheng, Alex Zhao, Judith E Fan, and Antonio Torralba.","author":"Vinker Yael","year":"2024","unstructured":"Yael Vinker, Tamar Rott Shaham, Kristine Zheng, Alex Zhao, Judith E Fan, and Antonio Torralba. 2024. SketchAgent: Language-Driven Sequential Sketch Generation. arXiv preprint arXiv:2411.17673 (2024)."},{"key":"e_1_3_2_1_44_1","volume-title":"Modelscope text-to-video technical report. arXiv preprint arXiv:2308.06571","author":"Wang Jiuniu","year":"2023","unstructured":"Jiuniu Wang, Hangjie Yuan, Dayou Chen, Yingya Zhang, Xiang Wang, and Shiwei Zhang. 2023a. Modelscope text-to-video technical report. arXiv preprint arXiv:2308.06571 (2023)."},{"key":"e_1_3_2_1_45_1","first-page":"7594","article-title":"Videocomposer: Compositional video synthesis with motion controllability","volume":"36","author":"Wang Xiang","year":"2023","unstructured":"Xiang Wang, Hangjie Yuan, Shiwei Zhang, Dayou Chen, Jiuniu Wang, Yingya Zhang, Yujun Shen, Deli Zhao, and Jingren Zhou. 2023b. Videocomposer: Compositional video synthesis with motion controllability. Advances in Neural Information Processing Systems, Vol. 36 (2023), 7594-7611.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3478513.3480488","article-title":"Deepvecfont: synthesizing high-quality vector fonts via dual-modality learning","volume":"40","author":"Wang Yizhi","year":"2021","unstructured":"Yizhi Wang and Zhouhui Lian. 2021. Deepvecfont: synthesizing high-quality vector fonts via dual-modality learning. ACM Transactions on Graphics (TOG), Vol. 40, 6 (2021), 1-15.","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.164"},{"key":"e_1_3_2_1_48_1","volume-title":"European Conference on Computer Vision. Springer, 399-417","author":"Xing Jinbo","year":"2024","unstructured":"Jinbo Xing, Menghan Xia, Yong Zhang, Haoxin Chen, Wangbo Yu, Hanyuan Liu, Gongye Liu, Xintao Wang, Ying Shan, and Tien-Tsin Wong. 2024c. Dynamicrafter: Animating open-domain images with video diffusion priors. In European Conference on Computer Vision. Springer, 399-417."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01815"},{"key":"e_1_3_2_1_50_1","volume-title":"SVGFusion: Scalable Text-to-SVG Generation via Vector Space Diffusion. arXiv preprint arXiv:2412.10437","author":"Xing Ximing","year":"2024","unstructured":"Ximing Xing, Juncheng Hu, Jing Zhang, Dong Xu, and Qian Yu. 2024a. SVGFusion: Scalable Text-to-SVG Generation via Vector Space Diffusion. arXiv preprint arXiv:2412.10437 (2024)."},{"key":"e_1_3_2_1_51_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Xing Ximing","year":"2024","unstructured":"Ximing Xing, Chuang Wang, Haitao Zhou, Jing Zhang, Qian Yu, and Dong Xu. 2024b. Diffsketcher: Text guided vector sketch synthesis through latent diffusion models. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_52_1","volume-title":"SVGDreamer: Advancing Editability and Diversity in Text-Guided SVG Generation","author":"Xing Ximing","year":"2025","unstructured":"Ximing Xing, Qian Yu, Chuang Wang, Haitao Zhou, Jing Zhang, and Dong Xu. 2025b. SVGDreamer: Advancing Editability and Diversity in Text-Guided SVG Generation. IEEE Transactions on Pattern Analysis and Machine Intelligence (2025)."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00435"},{"key":"e_1_3_2_1_54_1","volume-title":"Deep learning for free-hand sketch: A survey","author":"Xu Peng","year":"2022","unstructured":"Peng Xu, Timothy M Hospedales, Qiyue Yin, Yi-Zhe Song, Tao Xiang, and Liang Wang. 2022. Deep learning for free-hand sketch: A survey. IEEE transactions on pattern analysis and machine intelligence, Vol. 45, 1 (2022), 285-312."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/VCIP63160.2024.10849934"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.93"},{"key":"e_1_3_2_1_57_1","volume-title":"Sketch-a-net: A deep neural network that beats humans. International journal of computer vision","author":"Yu Qian","year":"2017","unstructured":"Qian Yu, Yongxin Yang, Feng Liu, Yi-Zhe Song, Tao Xiang, and Timothy M Hospedales. 2017. Sketch-a-net: A deep neural network that beats humans. International journal of computer vision, Vol. 122 (2017), 411-425."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3658204"},{"key":"e_1_3_2_1_59_1","volume-title":"I2vgen-xl: High-quality image-to-video synthesis via cascaded diffusion models. arXiv preprint arXiv:2311.04145","author":"Zhang Shiwei","year":"2023","unstructured":"Shiwei Zhang, Jiayu Wang, Yingya Zhang, Kang Zhao, Hangjie Yuan, Zhiwu Qin, Xiang Wang, Deli Zhao, and Jingren Zhou. 2023. I2vgen-xl: High-quality image-to-video synthesis via cascaded diffusion models. arXiv preprint arXiv:2311.04145 (2023)."},{"key":"e_1_3_2_1_60_1","volume-title":"Computer Graphics Forum","author":"Zheng Yudian","unstructured":"Yudian Zheng, Xiaodong Cun, Menghan Xia, and Chi-Man Pun. 2024. Sketch Video Synthesis. In Computer Graphics Forum, Vol. 43. Wiley Online Library, e15044."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3754502","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:04:32Z","timestamp":1765339472000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3754502"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":60,"alternative-id":["10.1145\/3746027.3754502","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3754502","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}