{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T03:29:58Z","timestamp":1777865398916,"version":"3.51.4"},"reference-count":49,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"NSFC","doi-asserted-by":"publisher","award":["62172420"],"award-info":[{"award-number":["62172420"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004826","name":"Beijing Natural Science Foundation","doi-asserted-by":"publisher","award":["L254039"],"award-info":[{"award-number":["L254039"]}],"id":[{"id":"10.13039\/501100004826","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iccv51701.2025.01073","type":"proceedings-article","created":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T19:45:49Z","timestamp":1777491949000},"page":"11537-11546","source":"Crossref","is-referenced-by-count":0,"title":["Multi-Object Sketch Animation by Scene Decomposition and Motion Planning"],"prefix":"10.1109","author":[{"given":"Jingyu","family":"Liu","sequence":"first","affiliation":[{"name":"Renmin University of China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zijie","family":"Xin","sequence":"additional","affiliation":[{"name":"Renmin University of China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuhan","family":"Fu","sequence":"additional","affiliation":[{"name":"Renmin University of China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruixiang","family":"Zhao","sequence":"additional","affiliation":[{"name":"Renmin University of China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bangxiang","family":"Lan","sequence":"additional","affiliation":[{"name":"Renmin University of China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xirong","family":"Li","sequence":"additional","affiliation":[{"name":"Renmin University of China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.02644"},{"key":"ref2","article-title":"Videocrafter1: Open diffusion models for high-quality video generation","author":"Chen","year":"2023","journal-title":"arXiv preprint"},{"key":"ref3","first-page":"413","article-title":"Ksketch: a\u2019kinetic\u2019sketch pad for novice animators","author":"Richard","year":"2008","journal-title":"CHI"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/3197517.3201326"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00730"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00414"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02012"},{"key":"ref8","article-title":"A neural representation of sketch drawings","author":"Ha","year":"2018","journal-title":"ICLR"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00779"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.52202\/075280-3443"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02060"},{"key":"ref12","article-title":"Vbench++: Comprehensive and versatile benchmark suite for video generative models","author":"Huang","year":"2024","journal-title":"arXiv preprint"},{"key":"ref13","first-page":"351","article-title":"Draco: bringing life to illustrations with kinetic textures","author":"Habib Kazi","year":"2014","journal-title":"CHI"},{"key":"ref14","article-title":"Llm-grounded video diffusion models","author":"Lian","year":"2024","journal-title":"ICLR"},{"key":"ref15","article-title":"Videodirectorgpt: Consistent multi-scene video generation via 11 m-guided planning","author":"Lin","year":"2023","journal-title":"arXiv preprint"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.110131"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00150"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"ref20","article-title":"Dreamfusion: Text-to-3d using 2d diffusion","author":"Poole","year":"2022","journal-title":"arXiv preprint"},{"key":"ref21","article-title":"Diffusiongpt: Llm-driven text-to-image generation system","author":"Qin","year":"2024","journal-title":"arXiv preprint"},{"key":"ref22","first-page":"643","article-title":"Layoutllm-t2i: Eliciting layout guidance from 11 m for text-to-image generation","author":"Qu","year":"2023","journal-title":"ACM MM"},{"key":"ref23","article-title":"Enhancing sketch animation: Text-to-video diffusion models with temporal consistency and rigidity constraints","author":"Rai","year":"2024","journal-title":"arXiv preprint"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1111\/cgf.15176"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3592788"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/MCG.2010.106"},{"key":"ref27","article-title":"Denoising diffusion implicit models","author":"Song","year":"2021","journal-title":"ICLR"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3174236"},{"key":"ref29","article-title":"Motionzero: Exploiting motion priors for zero-shot text-to-video generation","author":"Su","year":"2023","journal-title":"arXiv preprint"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52734.2025.00787"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3379337.3415892"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.52202\/075280-0707"},{"key":"ref33","article-title":"Videotetris: Towards compositional text-to-video generation","author":"Tian","year":"2024","journal-title":"arXiv preprint"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/3528223.3530068"},{"key":"ref35","article-title":"Modelscope text-to-video technical report","author":"Wang","year":"2023","journal-title":"arXiv preprint"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.52202\/075280-0334"},{"key":"ref37","first-page":"1","article-title":"Real-time interaction with animated human figures in chinese ancient","author":"Wei","year":"2024","journal-title":"ICMEW"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72952-2_23"},{"key":"ref39","article-title":"Comp4d: Llm-guided compositional 4d scene generation","author":"Xu","year":"2024","journal-title":"arXiv preprint"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73254-6_8"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/VCIP63160.2024.10849934"},{"key":"ref42","article-title":"Cogvideox: Text-to-video diffusion models with an expert transformer","author":"Yang","year":"2024","journal-title":"arXiv preprint"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/APWCS.2010.27"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1080\/10447318.2024.2301857"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1145\/3592413"},{"key":"ref46","article-title":"Trans4d: Realistic geometry-aware transition for compositional text-to-4d synthesis","volume-title":"arXiv preprint","author":"Zeng","year":"2024"},{"key":"ref47","article-title":"I2vgen-xl: High-quality image-to-video synthesis via cascaded diffusion models","volume-title":"arXiv preprint","author":"Zhang","year":"2023"},{"key":"ref48","article-title":"Gala3d: Towards text-to-3d complex scene generation via layout-guided generative gaussian splatting","author":"Zhou","year":"2024","journal-title":"ICML"},{"key":"ref49","article-title":"Compositional 3d-aware video generation with 11 m director","author":"Zhu","year":"2024","journal-title":"NeurIPS"}],"event":{"name":"2025 IEEE\/CVF International Conference on Computer Vision (ICCV)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,10,19]]},"end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/CVF International Conference on Computer Vision (ICCV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11443115\/11443287\/11443492.pdf?arnumber=11443492","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T06:26:59Z","timestamp":1777530419000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11443492\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":49,"URL":"https:\/\/doi.org\/10.1109\/iccv51701.2025.01073","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}