{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,16]],"date-time":"2026-01-16T03:33:07Z","timestamp":1768534387463,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","funder":[{"name":"Shanghai Municipal Science and Technology Major Project","award":["2021SHZDZX0103"],"award-info":[{"award-number":["2021SHZDZX0103"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3754873","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:55:00Z","timestamp":1761375300000},"page":"7424-7433","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["UMSD:High Realism Motion Style Transfer via Unified Mamba-based Diffusion"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-9800-1253","authenticated-orcid":false,"given":"Ziyun","family":"Qian","sequence":"first","affiliation":[{"name":"College of Intelligent Robotics and Advanced Manufacturing, Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7522-9301","authenticated-orcid":false,"given":"Zeyu","family":"Xiao","sequence":"additional","affiliation":[{"name":"College of Intelligent Robotics and Advanced Manufacturing, Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9209-7804","authenticated-orcid":false,"given":"Xingliang","family":"Jin","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, East China Normal University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1829-5671","authenticated-orcid":false,"given":"Dingkang","family":"Yang","sequence":"additional","affiliation":[{"name":"College of Intelligent Robotics and Advanced Manufacturing, Fudan University&amp;ByteDance Inc., Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-6244-6081","authenticated-orcid":false,"given":"Mingcheng","family":"Li","sequence":"additional","affiliation":[{"name":"College of Intelligent Robotics and Advanced Manufacturing, Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-2981-9813","authenticated-orcid":false,"given":"Zhenyi","family":"Wu","sequence":"additional","affiliation":[{"name":"College of Intelligent Robotics and Advanced Manufacturing, Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-5792-6895","authenticated-orcid":false,"given":"Dongliang","family":"Kou","sequence":"additional","affiliation":[{"name":"College of Intelligent Robotics and Advanced Manufacturing, Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1374-7969","authenticated-orcid":false,"given":"Peng","family":"Zhai","sequence":"additional","affiliation":[{"name":"College of Intelligent Robotics and Advanced Manufacturing, Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0467-4347","authenticated-orcid":false,"given":"Lihua","family":"Zhang","sequence":"additional","affiliation":[{"name":"College of Intelligent Robotics and Advanced Manufacturing, Fudan University, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3386569.3392469"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3592458"},{"key":"e_1_3_2_1_3_1","first-page":"222","article-title":"Emotion from motion","volume":"96","author":"Amaya Kenji","year":"1996","unstructured":"Kenji Amaya, Armin Bruderlin, and Tom Calvert. 1996. Emotion from motion. In Graphics Interface, Vol. 96. 222-229.","journal-title":"Graphics Interface"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02009"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00698"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01726"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01324"},{"key":"e_1_3_2_1_8_1","volume-title":"Towards More Accurate Fake Detection on Images Generated from Advanced Generative and Neural Rendering Models. arXiv preprint arXiv:2411.08642","author":"Dong Chengdong","year":"2024","unstructured":"Chengdong Dong, Vijayakumar Bhagavatula, Zhenyu Zhou, and Ajay Kumar. 2024. Towards More Accurate Fake Detection on Images Generated from Advanced Generative and Neural Rendering Models. arXiv preprint arXiv:2411.08642 (2024)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2013.09.001"},{"key":"e_1_3_2_1_10_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Fan Ying","year":"2024","unstructured":"Ying Fan, Olivia Watkins, Yuqing Du, Hao Liu, Moonkyung Ryu, Craig Boutilier, Pieter Abbeel, Mohammad Ghavamzadeh, Kangwook Lee, and Kimin Lee. 2024. Reinforcement learning for fine-tuning text-to-image diffusion models. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_11_1","volume-title":"Advances in Neural Information Processing Systems","volume":"27","author":"Goodfellow Ian","year":"2014","unstructured":"Ian Goodfellow, Jean Pouget-Abadie, Mehdi Mirza, Bing Xu, David Warde-Farley, Sherjil Ozair, Aaron Courville, and Yoshua Bengio. 2014. Generative adversarial nets. Advances in Neural Information Processing Systems, Vol. 27 (2014)."},{"key":"e_1_3_2_1_12_1","volume-title":"Mamba: Linear-time sequence modeling with selective state spaces. arXiv preprint arXiv:2312.00752","author":"Gu Albert","year":"2023","unstructured":"Albert Gu and Tri Dao. 2023. Mamba: Linear-time sequence modeling with selective state spaces. arXiv preprint arXiv:2312.00752 (2023)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i3.27973"},{"key":"e_1_3_2_1_14_1","volume-title":"Advances in Neural Information Processing Systems","volume":"30","author":"Heusel Martin","year":"2017","unstructured":"Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, and Sepp Hochreiter. 2017. Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in Neural Information Processing Systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_15_1","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in Neural Information Processing Systems, Vol. 33 (2020), 6840-6851.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3516429"},{"key":"e_1_3_2_1_17_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Kang Bingyi","year":"2024","unstructured":"Bingyi Kang, Xiao Ma, Chao Du, Tianyu Pang, and Shuicheng Yan. 2024. Efficient diffusion policies for offline reinforcement learning. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00168"},{"key":"e_1_3_2_1_19_1","volume-title":"Jiashi Feng, and Zilong Huang.","author":"Lei Weixian","year":"2025","unstructured":"Weixian Lei, Jiacong Wang, Haochen Wang, Xiangtai Li, Jun Hao Liew, Jiashi Feng, and Zilong Huang. 2025. The scalability of simplicity: Empirical analysis of vision-language learning with a single transformer. arXiv preprint arXiv:2504.10462 (2025)."},{"key":"e_1_3_2_1_20_1","volume-title":"itransformer: Inverted transformers are effective for time series forecasting. arXiv preprint arXiv:2310.06625","author":"Liu Yong","year":"2023","unstructured":"Yong Liu, Tengge Hu, Haoran Zhang, Haixu Wu, Shiyu Wang, Lintao Ma, and Mingsheng Long. 2023. itransformer: Inverted transformers are effective for time series forecasting. arXiv preprint arXiv:2310.06625 (2023)."},{"key":"e_1_3_2_1_21_1","volume-title":"Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101","author":"Loshchilov Ilya","year":"2017","unstructured":"Ilya Loshchilov and Frank Hutter. 2017. Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)."},{"key":"e_1_3_2_1_22_1","volume-title":"Proceedings of the ACM SIGGRAPH\/Eurographics Symposium on Computer Animation. 21-30","author":"Ma Wanli","year":"2010","unstructured":"Wanli Ma, Shihong Xia, Jessica K Hodgins, Xiao Yang, Chunpeng Li, and Zhaoqi Wang. 2010. Modeling style and variation in human motion. In Proceedings of the ACM SIGGRAPH\/Eurographics Symposium on Computer Animation. 21-30."},{"key":"e_1_3_2_1_23_1","volume-title":"International Conference on Learning Representations. 1-36","author":"Miko\u0142aj Bi'nkowski","year":"2018","unstructured":"Bi'nkowski Miko\u0142aj, J Sutherland Dougal, Arbel Michael, and Gretton Arthur. 2018. Demystifying mmd gans. In International Conference on Learning Representations. 1-36."},{"key":"e_1_3_2_1_24_1","volume-title":"Generative Human Motion Stylization in Latent Space. In International Conference on Learning Representations.","author":"Mu Yuxuan","year":"2023","unstructured":"Yuxuan Mu, Xinxin Zuo, Peng Dai, Youliang Yan, Juwei Lu, Li Cheng, et al., 2023. Generative Human Motion Stylization in Latent Space. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3480145"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01333"},{"key":"e_1_3_2_1_27_1","volume-title":"Single Motion Diffusion. In International Conference on Learning Representations.","author":"Raab Sigal","year":"2024","unstructured":"Sigal Raab, Inbal Leibovitch, Guy Tevet, Moab Arar, Amit Haim Bermano, and Daniel Cohen-Or. 2024. Single Motion Diffusion. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_28_1","volume-title":"Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125","author":"Ramesh Aditya","year":"2022","unstructured":"Aditya Ramesh, Prafulla Dhariwal, Alex Nichol, Casey Chu, and Mark Chen. 2022. Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 (2022)."},{"key":"e_1_3_2_1_29_1","volume-title":"Scaling Properties of Diffusion Models for Perceptual Tasks. arXiv preprint arXiv:2411.08034","author":"Ravishankar Rahul","year":"2024","unstructured":"Rahul Ravishankar, Zeeshan Patel, Jathushan Rajasegaran, and Jitendra Malik. 2024. Scaling Properties of Diffusion Models for Perceptual Tasks. arXiv preprint arXiv:2411.08034 (2024)."},{"key":"e_1_3_2_1_30_1","first-page":"234","article-title":"U-net: Convolutional networks for biomedical image segmentation","author":"Ronneberger Olaf","year":"2015","unstructured":"Olaf Ronneberger, Philipp Fischer, Thomas Brox, Philipp Fischer, and Philipp Fischer. 2015. U-net: Convolutional networks for biomedical image segmentation. In Medical Image Computing and Computer-Assisted Intervention. 234-241.","journal-title":"Medical Image Computing and Computer-Assisted Intervention."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3407659"},{"key":"e_1_3_2_1_32_1","volume-title":"FineStyle: Semantic-Aware Fine-Grained Motion Style Transfer with Dual Interactive-Flow Fusion","author":"Song Wenfeng","year":"2023","unstructured":"Wenfeng Song, Xingliang Jin, Shuai Li, Chenglizhao Chen, Aimin Hao, and Xia Hou. 2023. FineStyle: Semantic-Aware Fine-Grained Motion Style Transfer with Dual Interactive-Flow Fusion. IEEE Transactions on Visualization and Computer Graphics (2023)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00084"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01613"},{"key":"e_1_3_2_1_35_1","volume-title":"Human motion diffusion model. arXiv preprint arXiv:2209.14916","author":"Tevet Guy","year":"2022","unstructured":"Guy Tevet, Sigal Raab, Brian Gordon, Yonatan Shafir, Daniel Cohen-Or, and Amit H Bermano. 2022. Human motion diffusion model. arXiv preprint arXiv:2209.14916 (2022)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00051"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/218380.218419"},{"key":"e_1_3_2_1_38_1","volume-title":"Advances in Neural Information Processing Systems","volume":"30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in Neural Information Processing Systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_39_1","volume-title":"VGR: Visual Grounded Reasoning. arXiv preprint arXiv:2506.11991","author":"Wang Jiacong","year":"2025","unstructured":"Jiacong Wang, Zijiang Kang, Haochen Wang, Haiyong Jiang, Jiawen Li, Bohong Wu, Ya Wang, Jiao Ran, Xiao Liang, Chao Feng, et al., 2025. VGR: Visual Grounded Reasoning. arXiv preprint arXiv:2506.11991 (2025)."},{"key":"e_1_3_2_1_40_1","volume-title":"World to code: Multi-modal data generation via self-instructed compositional captioning and filtering. arXiv preprint arXiv:2409.20424","author":"Wang Jiacong","year":"2024","unstructured":"Jiacong Wang, Bohong Wu, Haiyong Jiang, Xun Zhou, Xin Xiao, Haoyuan Guo, and Jun Xiao. 2024. World to code: Multi-modal data generation via self-instructed compositional captioning and filtering. arXiv preprint arXiv:2409.20424 (2024)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/2766999"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00829"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/505"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"crossref","unstructured":"Jiaxu Zhang Xin Chen Gang Yu and Zhigang Tu. 2024. Generative Motion Stylization of Cross-structure Characters within Canonical Motion Space. In ACM Multimedia.","DOI":"10.1145\/3664647.3680864"},{"key":"e_1_3_2_1_45_1","volume-title":"Motion Mamba: Efficient and Long Sequence Motion Generation. In European Conference on Computer Vision. Springer, 265-282","author":"Zhang Zeyu","year":"2025","unstructured":"Zeyu Zhang, Akide Liu, Ian Reid, Richard Hartley, Bohan Zhuang, and Hao Tang. 2025. Motion Mamba: Efficient and Long Sequence Motion Generation. In European Conference on Computer Vision. Springer, 265-282."},{"key":"e_1_3_2_1_46_1","volume-title":"SMooDi: Stylized Motion Diffusion Model. arXiv preprint arXiv:2407.12783","author":"Zhong Lei","year":"2024","unstructured":"Lei Zhong, Yiming Xie, Varun Jampani, Deqing Sun, and Huaizu Jiang. 2024. SMooDi: Stylized Motion Diffusion Model. arXiv preprint arXiv:2407.12783 (2024)."},{"key":"e_1_3_2_1_47_1","volume-title":"Motion Control for Enhanced Complex Action Video Generation. arXiv preprint arXiv:2411.08328","author":"Zhou Qiang","year":"2024","unstructured":"Qiang Zhou, Shaofeng Zhang, Nianzu Yang, Ye Qian, and Hao Li. 2024. Motion Control for Enhanced Complex Action Video Generation. arXiv preprint arXiv:2411.08328 (2024)."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3754873","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:01:14Z","timestamp":1765339274000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3754873"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":47,"alternative-id":["10.1145\/3746027.3754873","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3754873","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}