{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T20:03:54Z","timestamp":1769630634644,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":62,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100003977","name":"Israel Science Foundation","doi-asserted-by":"publisher","award":["624\/25"],"award-info":[{"award-number":["624\/25"]}],"id":[{"id":"10.13039\/501100003977","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,12,15]]},"DOI":"10.1145\/3757377.3763992","type":"proceedings-article","created":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T16:30:41Z","timestamp":1765211441000},"page":"1-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Gaussian See, Gaussian Do: Semantic 3D Motion Transfer from Multiview Video"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-6827-9868","authenticated-orcid":false,"given":"Yarin","family":"Bekor","sequence":"first","affiliation":[{"name":"Technion - Israel Institute of Technology, Raanana, Israel"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3728-5454","authenticated-orcid":false,"given":"Gal Michael","family":"Harari","sequence":"additional","affiliation":[{"name":"Technion \u2013 Israel Institute of Technology, Givatayim, Israel"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6478-1422","authenticated-orcid":false,"given":"Or","family":"Perel","sequence":"additional","affiliation":[{"name":"NVIDIA, Tel Aviv, Israel; University of Toronto, Toronto, Canada and Vector Institute, Toronto, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6700-7379","authenticated-orcid":false,"given":"Or","family":"Litany","sequence":"additional","affiliation":[{"name":"Technion \u2013 Israel Institute of Technology, Haifa, Israel and NVIDIA, Haifa, Israel"}]}],"member":"320","published-online":{"date-parts":[[2025,12,14]]},"reference":[{"key":"e_1_3_3_2_2_1","doi-asserted-by":"crossref","unstructured":"Kfir Aberman Peizhuo Li Dani Lischinski Olga Sorkine-Hornung Daniel Cohen-Or and Baoquan Chen. 2020. Skeleton-Aware Networks for Deep Motion Retargeting. ACM Transactions on Graphics (TOG) 39 4 (2020) 62.","DOI":"10.1145\/3386569.3392462"},{"key":"e_1_3_3_2_3_1","doi-asserted-by":"crossref","unstructured":"Sherwin Bahmani Xian Liu Wang Yifan Ivan Skorokhodov Victor Rong Ziwei Liu Xihui Liu Jeong\u00a0Joon Park Sergey Tulyakov Gordon Wetzstein Andrea Tagliasacchi and David\u00a0B. Lindell. 2024a. TC4D: Trajectory-Conditioned Text-to-4D Generation. arXiv (2024).","DOI":"10.1007\/978-3-031-72952-2_4"},{"key":"e_1_3_3_2_4_1","doi-asserted-by":"crossref","unstructured":"Sherwin Bahmani Ivan Skorokhodov Victor Rong Gordon Wetzstein Leonidas Guibas Peter Wonka Sergey Tulyakov Jeong\u00a0Joon Park Andrea Tagliasacchi and David\u00a0B. Lindell. 2024b. 4D-fy: Text-to-4D Generation Using Hybrid Score Distillation Sampling. IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2024).","DOI":"10.1109\/CVPR52733.2024.00764"},{"key":"e_1_3_3_2_5_1","doi-asserted-by":"crossref","unstructured":"Ilya Baran and Jovan Popovi\u0107. 2007. Automatic rigging and animation of 3d characters. ACM Transactions on graphics (TOG) 26 3 (2007) 72\u2013es.","DOI":"10.1145\/1276377.1276467"},{"key":"e_1_3_3_2_6_1","unstructured":"Andreas Blattmann Tim Dockhorn Sumith Kulal Daniel Mendelevitch Maciej Kilian Dominik Lorenz Yam Levi Zion English Vikram Voleti Adam Letts et\u00a0al. 2023. Stable video diffusion: Scaling latent video diffusion models to large datasets. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.15127 (2023)."},{"key":"e_1_3_3_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01391"},{"key":"e_1_3_3_2_8_1","unstructured":"Zedong Chu Feng Xiong Meiduo Liu Jinzhi Zhang Mingqi Shao Zhaoxu Sun Di Wang and Mu Xu. 2024. HumanRig: Learning Automatic Rigging for Humanoid Character in a Large Scale Dataset. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.02317 (2024)."},{"key":"e_1_3_3_2_9_1","unstructured":"Rinon Gal Yuval Alaluf Yuval Atzmon Or Patashnik Amit\u00a0H Bermano Gal Chechik and Daniel Cohen-Or. 2022. An image is worth one word: Personalizing text-to-image generation using textual inversion. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2208.01618 (2022)."},{"key":"e_1_3_3_2_10_1","unstructured":"Yuwei Guo Ceyuan Yang Anyi Rao Zhengyang Liang Yaohui Wang Yu Qiao Maneesh Agrawala Dahua Lin and Bo Dai. 2024b. AnimateDiff: Animate Your Personalized Text-to-Image Diffusion Models without Specific Tuning. arxiv:https:\/\/arXiv.org\/abs\/2307.04725\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2307.04725"},{"key":"e_1_3_3_2_11_1","unstructured":"Zhiyang Guo Jinxu Xiang Kai Ma Wengang Zhou Houqiang Li and Ran Zhang. 2024a. Make-It-Animatable: An Efficient Framework for Authoring Animation-Ready 3D Characters. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.18197 (2024)."},{"key":"e_1_3_3_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01808"},{"key":"e_1_3_3_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00404"},{"key":"e_1_3_3_2_14_1","doi-asserted-by":"publisher","unstructured":"Adobe\u00a0Systems Inc.2024. Mixamo. https:\/\/doi.org\/10.57702\/xbooguyb DOI retrieved: December 2 2024.","DOI":"10.57702\/xbooguyb"},{"key":"e_1_3_3_2_15_1","volume-title":"The Twelfth International Conference on Learning Representations","author":"Jiang Yanqin","year":"2024","unstructured":"Yanqin Jiang, Li Zhang, Jin Gao, Weiming Hu, and Yao Yao. 2024. Consistent4D: Consistent 360\u00b0 Dynamic Object Generation from Monocular Video. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=sPUrdFGepF"},{"key":"e_1_3_3_2_16_1","doi-asserted-by":"crossref","unstructured":"Manuel Kansy Jacek Naruniec Christopher Schroers Markus Gross and Romann\u00a0M Weber. 2024. Reenact Anything: Semantic Video Motion Transfer Using Motion-Textual Inversion. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2408.00458 (2024).","DOI":"10.1145\/3721238.3730668"},{"key":"e_1_3_3_2_17_1","doi-asserted-by":"crossref","unstructured":"Bernhard Kerbl Georgios Kopanas Thomas Leimk\u00fchler and George Drettakis. 2023. 3d gaussian splatting for real-time radiance field rendering. ACM Trans. Graph. 42 4 (2023) 139\u20131.","DOI":"10.1145\/3592433"},{"key":"e_1_3_3_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00168"},{"key":"e_1_3_3_2_19_1","doi-asserted-by":"crossref","unstructured":"Peizhuo Li Kfir Aberman Rana Hanocka Libin Liu Olga Sorkine-Hornung and Baoquan Chen. 2021. Learning skeletal articulations with neural blend shapes. ACM Transactions on Graphics (TOG) 40 4 (2021) 1\u201315.","DOI":"10.1145\/3450626.3459852"},{"key":"e_1_3_3_2_20_1","doi-asserted-by":"crossref","unstructured":"Zhiqi Li Yiming Chen and Peidong Liu. 2024. Dreammesh4d: Video-to-4d generation with sparse-controlled gaussian-mesh hybrid representation. Advances in Neural Information Processing Systems 37 (2024) 21377\u201321400.","DOI":"10.52202\/079017-0674"},{"key":"e_1_3_3_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00819"},{"key":"e_1_3_3_2_22_1","unstructured":"Pengyang Ling Jiazi Bu Pan Zhang Xiaoyi Dong Yuhang Zang Tong Wu Huaian Chen Jiaqi Wang and Yi Jin. 2024a. MotionClone: Training-Free Motion Cloning for Controllable Video Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.05338 (2024)."},{"key":"e_1_3_3_2_23_1","unstructured":"Isabella Liu Zhan Xu Wang Yifan Hao Tan Zexiang Xu Xiaolong Wang Hao Su and Zifan Shi. 2025. RigAnything: Template-Free Autoregressive Rigging for Diverse 3D Assets. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2502.09615 (2025)."},{"key":"e_1_3_3_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV62453.2024.00044"},{"key":"e_1_3_3_2_25_1","doi-asserted-by":"crossref","unstructured":"Jing Ma and Dongliang Zhang. 2023. TARig: Adaptive template-aware neural rigging for humanoid characters. Computers & Graphics 114 (2023) 158\u2013167.","DOI":"10.1016\/j.cag.2023.05.018"},{"key":"e_1_3_3_2_26_1","unstructured":"Joanna Materzy\u0144ska Josef Sivic Eli Shechtman Antonio Torralba Richard Zhang and Bryan Russell. 2023. Customizing Motion in Text-to-Video Diffusion Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.04966 (2023)."},{"key":"e_1_3_3_2_27_1","unstructured":"Qiaowei Miao Yawei Luo and Yi Yang. 2024. PLA4D: Pixel-Level Alignments for Text-to-4D Gaussian Splatting. arxiv:https:\/\/arXiv.org\/abs\/2405.19957\u00a0[cs.CV]"},{"key":"e_1_3_3_2_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73636-06"},{"key":"e_1_3_3_2_29_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20047-2_28"},{"key":"e_1_3_3_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00197"},{"key":"e_1_3_3_2_31_1","unstructured":"Ben Poole Ajay Jain Jonathan\u00a0T. Barron and Ben Mildenhall. 2022. DreamFusion: Text-to-3D using 2D Diffusion. arXiv (2022)."},{"key":"e_1_3_3_2_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3680528.3687579"},{"key":"e_1_3_3_2_33_1","volume-title":"International Conference on Machine Learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. In International Conference on Machine Learning. https:\/\/api.semanticscholar.org\/CorpusID:231591445"},{"key":"e_1_3_3_2_34_1","unstructured":"Ohad Rahamim Ori Malca Dvir Samuel and Gal Chechik. 2024. Bringing Objects to Life: 4D generation from 3D objects. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.20422 (2024)."},{"key":"e_1_3_3_2_35_1","unstructured":"Jiawei Ren Liang Pan Jiaxiang Tang Chi Zhang Ang Cao Gang Zeng and Ziwei Liu. 2023. DreamGaussian4D: Generative 4D Gaussian Splatting. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.17142 (2023)."},{"key":"e_1_3_3_2_36_1","unstructured":"Uriel Singer Shelly Sheynin Adam Polyak Oron Ashual Iurii Makarov Filippos Kokkinos Naman Goyal Andrea Vedaldi Devi Parikh Justin Johnson and Yaniv Taigman. 2023. Text-To-4D Dynamic Scene Generation. arXiv:https:\/\/arXiv.org\/abs\/2301.11280 (2023)."},{"key":"e_1_3_3_2_37_1","first-page":"109","volume-title":"Symposium on Geometry processing","author":"Sorkine Olga","year":"2007","unstructured":"Olga Sorkine and Marc Alexa. 2007. As-rigid-as-possible surface modeling. In Symposium on Geometry processing , Vol.\u00a04. Citeseer, 109\u2013116."},{"key":"e_1_3_3_2_38_1","doi-asserted-by":"publisher","unstructured":"Robert\u00a0W. Sumner Johannes Schmid and Mark Pauly. 2007. Embedded deformation for shape manipulation. ACM Trans. Graph. 26 3 (July 2007) 80\u2013es. 10.1145\/1276377.1276478","DOI":"10.1145\/1276377.1276478"},{"key":"e_1_3_3_2_39_1","doi-asserted-by":"crossref","unstructured":"Yang-Tian Sun Qian-Cheng Fu Yue-Ren Jiang Zitao Liu Yu-Kun Lai Hongbo Fu and Lin Gao. 2022. Human motion transfer with 3d constraints and detail enhancement. IEEE Transactions on Pattern Analysis and Machine Intelligence 45 4 (2022) 4682\u20134693.","DOI":"10.1109\/TPAMI.2022.3201904"},{"key":"e_1_3_3_2_40_1","unstructured":"Guy Tevet Sigal Raab Brian Gordon Yonatan Shafir Daniel Cohen-Or and Amit\u00a0H Bermano. 2022. Human motion diffusion model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2209.14916 (2022)."},{"key":"e_1_3_3_2_41_1","volume-title":"Proceedings of the 20th International Society for Music Information Retrieval Conference, ISMIR 2019","author":"Tsuchida Shuhei","year":"2019","unstructured":"Shuhei Tsuchida, Satoru Fukayama, Masahiro Hamasaki, and Masataka Goto. 2019. AIST Dance Video Database: Multi-genre, Multi-dancer, and Multi-camera Database for Dance Information Processing. In Proceedings of the 20th International Society for Music Information Retrieval Conference, ISMIR 2019. Delft, Netherlands."},{"key":"e_1_3_3_2_42_1","unstructured":"Lukas Uzolas Elmar Eisemann and Petr Kellnhofer. 2024. MotionDreamer: Zero-Shot 3D Mesh Animation from Video Diffusion Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2405.20155 (2024)."},{"key":"e_1_3_3_2_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00901"},{"key":"e_1_3_3_2_44_1","unstructured":"Jiuniu Wang Hangjie Yuan Dayou Chen Yingya Zhang Xiang Wang and Shiwei Zhang. 2023. ModelScope Text-to-Video Technical Report. arxiv:https:\/\/arXiv.org\/abs\/2308.06571\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2308.06571"},{"key":"e_1_3_3_2_45_1","unstructured":"Luozhou Wang Ziyang Mai Guibao Shen Yixun Liang Xin Tao Pengfei Wan Di Zhang Yijun Li and Yingcong Chen. 2024a. Motion Inversion for Video Customization. arxiv:https:\/\/arXiv.org\/abs\/2403.20193\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2403.20193"},{"key":"e_1_3_3_2_46_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72933-1_3"},{"key":"e_1_3_3_2_47_1","first-page":"321","volume-title":"European Conference on Computer Vision","author":"Wang Xinzhou","year":"2024","unstructured":"Xinzhou Wang, Yikai Wang, Junliang Ye, Fuchun Sun, Zhengyi Wang, Ling Wang, Pengkun Liu, Kai Sun, Xintong Wang, Wende Xie, et\u00a0al. 2024b. Animatabledreamer: Text-guided non-rigid 3d model generation and reconstruction with canonical score distillation. In European Conference on Computer Vision. Springer, 321\u2013339."},{"key":"e_1_3_3_2_48_1","first-page":"6537","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Wei Yujie","year":"2024","unstructured":"Yujie Wei, Shiwei Zhang, Zhiwu Qing, Hangjie Yuan, Zhiheng Liu, Yu Liu, Yingya Zhang, Jingren Zhou, and Hongming Shan. 2024. DreamVideo: Composing Your Dream Videos with Customized Subject and Motion. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 6537\u20136549."},{"key":"e_1_3_3_2_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02036"},{"key":"e_1_3_3_2_50_1","unstructured":"Zijie Wu Chaohui Yu Yanqin Jiang Chenjie Cao Wang Fan and Xiang. Bai. 2024b. SC4D: Sparse-Controlled Video-to-4D Generation and Motion Transfer. arxiv:https:\/\/arXiv.org\/abs\/2404.03736 (2024)."},{"key":"e_1_3_3_2_51_1","unstructured":"Zeqi Xiao Yifan Zhou Shuai Yang and Xingang Pan. 2024. Video Diffusion Models are Training-free Motion Interpreter and Controller. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2405.14864 (2024)."},{"key":"e_1_3_3_2_52_1","unstructured":"Jinbo Xing Menghan Xia Yong Zhang Haoxin Chen Wangbo Yu Hanyuan Liu Xintao Wang Tien-Tsin Wong and Ying Shan. 2023. DynamiCrafter: Animating Open-domain Images with Video Diffusion Priors. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.12190 (2023)."},{"key":"e_1_3_3_2_53_1","unstructured":"Zhan Xu Yang Zhou Evangelos Kalogerakis Chris Landreth and Karan Singh. 2020. Rignet: Neural rigging for articulated characters. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2005.00559 (2020)."},{"key":"e_1_3_3_2_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00809"},{"key":"e_1_3_3_2_55_1","doi-asserted-by":"crossref","unstructured":"Yifei Zeng Yanqin Jiang Siyu Zhu Yuanxun Lu Youtian Lin Hao Zhu Weiming Hu Xun Cao and Yao Yao. 2024. STAG4D: Spatial-Temporal Anchored Generative 4D Gaussians. arxiv:https:\/\/arXiv.org\/abs\/2403.14939\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2403.14939","DOI":"10.1007\/978-3-031-72764-1_10"},{"key":"e_1_3_3_2_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680864"},{"key":"e_1_3_3_2_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01332"},{"key":"e_1_3_3_2_58_1","doi-asserted-by":"crossref","unstructured":"Mingyuan Zhang Zhongang Cai Liang Pan Fangzhou Hong Xinying Guo Lei Yang and Ziwei Liu. 2024a. Motiondiffuse: Text-driven human motion generation with diffusion model. IEEE transactions on pattern analysis and machine intelligence 46 6 (2024) 4115\u20134128.","DOI":"10.1109\/TPAMI.2024.3355414"},{"key":"e_1_3_3_2_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"},{"key":"e_1_3_3_2_60_1","unstructured":"Shiwei Zhang Jiayu Wang Yingya Zhang Kang Zhao Hangjie Yuan Zhiwu Qin Xiang Wang Deli Zhao and Jingren Zhou. 2023a. I2VGen-XL: High-Quality Image-to-Video Synthesis via Cascaded Diffusion Models. arxiv:https:\/\/arXiv.org\/abs\/2311.04145\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2311.04145"},{"key":"e_1_3_3_2_61_1","unstructured":"Rui Zhao Yuchao Gu Jay\u00a0Zhangjie Wu David\u00a0Junhao Zhang Jiawei Liu Weijia Wu Jussi Keppo and Mike\u00a0Zheng Shou. 2023a. MotionDirector: Motion Customization of Text-to-Video Diffusion Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.08465 (2023)."},{"key":"e_1_3_3_2_62_1","unstructured":"Yuyang Zhao Zhiwen Yan Enze Xie Lanqing Hong Zhenguo Li and Gim\u00a0Hee Lee. 2023b. Animate124: Animating one image to 4d dynamic scene. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.14603 (2023)."},{"key":"e_1_3_3_2_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00697"}],"event":{"name":"SA Conference Papers '25: SIGGRAPH Asia 2025 Conference Papers","location":"Hong Kong Hong Kong","acronym":"SA Conference Papers '25","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Proceedings of the SIGGRAPH Asia 2025 Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3757377.3763992","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T03:32:29Z","timestamp":1765251149000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3757377.3763992"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,14]]},"references-count":62,"alternative-id":["10.1145\/3757377.3763992","10.1145\/3757377"],"URL":"https:\/\/doi.org\/10.1145\/3757377.3763992","relation":{},"subject":[],"published":{"date-parts":[[2025,12,14]]},"assertion":[{"value":"2025-12-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}