{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:09:41Z","timestamp":1765339781064,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":53,"publisher":"ACM","funder":[{"name":"Pioneer and Leading Goose R&D Program of Zhejiang","award":["No.2024C01020"],"award-info":[{"award-number":["No.2024C01020"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3754748","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T07:26:51Z","timestamp":1761377211000},"page":"9326-9335","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Towards Robust and Controllable Text-to-Motion via Masked Autoregressive Diffusion"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-8149-0533","authenticated-orcid":false,"given":"Zongye","family":"Zhang","sequence":"first","affiliation":[{"name":"State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-3813-0427","authenticated-orcid":false,"given":"Bohan","family":"Kong","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5181-6451","authenticated-orcid":false,"given":"Qingjie","family":"Liu","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, Beijing, China and Hangzhou Innovation Institute, Beihang University, Hangzhou, Zhejiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8001-2703","authenticated-orcid":false,"given":"Yunhong","family":"Wang","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Virtual Reality Technology and Systems, Beihang University, Beijing, China and Hangzhou Innovation Institute, Beihang University, Hangzhou, Zhejiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2019.00084"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV57658.2022.00053"},{"key":"e_1_3_2_1_3_1","unstructured":"Ling-Hao Chen Wenxun Dai Xuan Ju Shunlin Lu and Lei Zhang. 2024-10-24. MotionCLR: Motion Generation and Training-free Editing via Understanding Attention Mechanisms . arXiv:2410.18977 [cs]"},{"key":"e_1_3_2_1_4_1","first-page":"6989","volume-title":"Proceedings of the 32nd ACM International Conference, on Multimedia","author":"Wenshuo","year":"2024","unstructured":"Wenshuo chen, Hongru Xiao, Erhang Zhang, Lijie Hu, Lei Wang, Mengyuan Liu, and Chen Chen. 2024-10-28. SATO: Stable Text-to-Motion Framework. In Proceedings of the 32nd ACM International Conference, on Multimedia (New York, NY, USA) (MM, '24). Association for Computing Machinery, 6989-6997."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01726"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3677388.3696327"},{"key":"e_1_3_2_1_7_1","first-page":"1","volume-title":"ACM SIGGRAPH, 2024 Conference Papers","author":"Cohan Setareh","unstructured":"Setareh Cohan, Guy Tevet, Daniele Reda, Xue Bin Peng, and Michiel van de Panne. 2024-07-13. Flexible Motion In-betweening, with Diffusion Models. In ACM SIGGRAPH, 2024 Conference Papers (New York, NY, USA) (SIGGRAPH, '24). Association for Computing Machinery, 1-9."},{"key":"e_1_3_2_1_8_1","volume-title":"European Conference on Computer Vision (ECCV). 390-408","author":"Dai Wenxun","year":"2024","unstructured":"Wenxun Dai, Ling-Hao Chen, Jingbo Wang, Jinpeng Liu, Bo Dai, and Yansong Tang. 2024. MotionLCM: Real-Time Controllable Motion Generation via Latent Consistency Model. In European Conference on Computer Vision (ECCV). 390-408."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00186"},{"key":"e_1_3_2_1_10_1","first-page":"5142","article-title":"Generating Diverse and Natural 3D Human Motions from Text","author":"Guo Chuan","year":"2022","unstructured":"Chuan Guo, Shihao Zou, Xinxin Zuo, Sen Wang, Wei Ji, Xingyu Li, and Li Cheng. 2022a. Generating Diverse and Natural 3D Human Motions from Text. In Computer Vision and Pattern Recognition (CVPR). 5142-5151.","journal-title":"Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19833-5_34"},{"key":"e_1_3_2_1_12_1","volume-title":"AMD: Autoregressive Motion Diffusion. In AAAI Conference on Artificial Intelligence (AAAI). 2022-2030","author":"Han Bo","year":"2024","unstructured":"Bo Han, Hao Peng, Minjing Dong, Yi Ren, Yixuan Shen, and Chang Xu. 2024. AMD: Autoregressive Motion Diffusion. In AAAI Conference on Artificial Intelligence (AAAI). 2022-2030."},{"key":"e_1_3_2_1_13_1","volume-title":"Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in neural information processing systems","author":"Heusel Martin","year":"2017","unstructured":"Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, and Sepp Hochreiter. 2017. Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_14_1","volume-title":"BiPO: Bidirectional Partial Occlusion Network for Text-to-Motion Synthesis. arXiv.org","author":"Hong Seong-Eun","year":"2024","unstructured":"Seong-Eun Hong, Soobin Lim, Juyeong Hwang, Minwook Chang, and Hyeongyeop Kang. 2024. BiPO: Bidirectional Partial Occlusion Network for Text-to-Motion Synthesis. arXiv.org, Vol. abs\/2412.00112 (2024)."},{"key":"e_1_3_2_1_15_1","volume-title":"BAD: Bidirectional Auto-Regressive Diffusion for Text-to-Motion Generation. In ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 1-5.","author":"Hosseyni Seyed Rohollah","year":"2025","unstructured":"Seyed Rohollah Hosseyni, Ali Ahmad Rahmani, Seyed Jamal Seyedmohammadi, Sanaz Seyedin, and Arash Mohammadi. 2025. BAD: Bidirectional Auto-Regressive Diffusion for Text-to-Motion Generation. In ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 1-5."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681657"},{"key":"e_1_3_2_1_17_1","first-page":"20067","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Jiang Biao","year":"2023","unstructured":"Biao Jiang, Xin Chen, Wen Liu, Jingyi Yu, Gang Yu, and Tao Chen. 2023-12-15. MotionGPT: Human Motion, as a Foreign Language, . Advances in Neural Information Processing Systems, Vol. 36 (2023-12-15), 20067-20079."},{"key":"e_1_3_2_1_18_1","first-page":"2643","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","volume":"38","author":"Jing Beibei","year":"2024","unstructured":"Beibei Jing, Youjia Zhang, Zikai Song, Junqing Yu, and Wei Yang. 2024-03-24. AMD: Anatomical Motion Diffusion, with Interpretable Motion Decomposition, and Fusion, . Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 38, 3 (2024-03-24), 2643-2651. Issue 3."},{"key":"e_1_3_2_1_19_1","first-page":"8255","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","volume":"37","author":"Kim Jihoon","year":"2023","unstructured":"Jihoon Kim, Jiseob Kim, and Sungjoon Choi. 2023-06-26. FLAME: Free-Form Language-Based Motion Synthesis, & Editing, . Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 37, 7 (2023-06-26), 8255-8263. Issue 7."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01360"},{"key":"e_1_3_2_1_21_1","volume-title":"The Thirty-eighth Annual Conference on Neural Information Processing Systems","volume":"2406","author":"Li Tianhong","year":"2024","unstructured":"Tianhong Li, Yonglong Tian, He Li, Mingyang Deng, and Kaiming He. 2024. Autoregressive Image Generation without Vector Quantization. In The Thirty-eighth Annual Conference on Neural Information Processing Systems, Vol. abs\/2406.11838."},{"key":"e_1_3_2_1_22_1","first-page":"23222","article-title":"-06. Being Comes, from Not-Being: Open-Vocabulary Text-to-Motion Generation, with Wordless Training. In 2023 IEEE, \/CVF Conference, on Computer Vision, and Pattern Recognition (CVPR, ) (Vancouver, BC, Canada)","author":"Lin Junfan","year":"2023","unstructured":"Junfan Lin, Jianlong Chang, Lingbo Liu, Guanbin Li, Liang Lin, Qi Tian, and Chang Wen Chen. 2023-06. Being Comes, from Not-Being: Open-Vocabulary Text-to-Motion Generation, with Wordless Training. In 2023 IEEE, \/CVF Conference, on Computer Vision, and Pattern Recognition (CVPR, ) (Vancouver, BC, Canada). IEEE, 23222-23231.","journal-title":"IEEE"},{"key":"e_1_3_2_1_23_1","volume-title":"Conference on Neural Information Processing Systems (NeurIPS).","author":"Lin Jing","year":"2023","unstructured":"Jing Lin, Ailing Zeng, Shunlin Lu, Yuanhao Cai, Ruimao Zhang, Haoqian Wang, and Lei Zhang. 2023. Motion-X: A Large-scale 3D Expressive Whole-body Human Motion Dataset. In Conference on Neural Information Processing Systems (NeurIPS)."},{"volume-title":"Neural Discrete Representation Learning. In Conference on Neural Information Processing Systems (NeurIPS). 6306-6315","author":"A''","key":"e_1_3_2_1_24_1","unstructured":"A'' aron van den Oord, Oriol Vinyals, and Koray Kavukcuoglu. 2017. Neural Discrete Representation Learning. In Conference on Neural Information Processing Systems (NeurIPS). 6306-6315."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2023.3309107"},{"volume-title":"Proceedings of the IEEE, \/CVF Conference, on Computer Vision, and Pattern Recognition. 10975-10985","author":"Pavlakos Georgios","key":"e_1_3_2_1_26_1","unstructured":"Georgios Pavlakos, Vasileios Choutas, Nima Ghorbani, Timo Bolkart, Ahmed A. A. Osman, Dimitrios Tzionas, and Michael J. Black. 2019. Expressive Body Capture: 3D Hands, Face, and Body From, a Single Image. In Proceedings of the IEEE, \/CVF Conference, on Computer Vision, and Pattern Recognition. 10975-10985."},{"key":"e_1_3_2_1_27_1","volume-title":"Scalable Diffusion Models with Transformers. In IEEE International Conference on Computer Vision (ICCV). 4172-4182","author":"Peebles William","year":"2023","unstructured":"William Peebles and Saining Xie. 2023. Scalable Diffusion Models with Transformers. In IEEE International Conference on Computer Vision (ICCV). 4172-4182."},{"key":"e_1_3_2_1_28_1","first-page":"10965","article-title":"-10. Action-Conditioned 3D Human Motion Synthesis, with Transformer VAE. In 2021 IEEE, \/CVF International Conference, on Computer Vision (ICCV, ) (Montreal, QC, Canada)","author":"Petrovich Mathis","year":"2021","unstructured":"Mathis Petrovich, Michael J. Black, and Gul Varol. 2021-10. Action-Conditioned 3D Human Motion Synthesis, with Transformer VAE. In 2021 IEEE, \/CVF International Conference, on Computer Vision (ICCV, ) (Montreal, QC, Canada). IEEE, 10965-10975.","journal-title":"IEEE"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20047-2_28"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00870"},{"key":"e_1_3_2_1_31_1","volume-title":"Pu Wang, Minwoo Lee, Srijan Das, and Chen Chen.","author":"Pinyoanuntapong Ekkasit","year":"2025","unstructured":"Ekkasit Pinyoanuntapong, Muhammad Usama Saleem, Pu Wang, Minwoo Lee, Srijan Das, and Chen Chen. 2025. BAMM: Bidirectional Autoregressive Motion Model, . In Computer Vision, - ECCV, 2024, Ale\u0161 Leonardis, Elisa Ricci, Stefan Roth, Olga Russakovsky, Torsten Sattler, and G\u00fcl Varol (Eds.). Vol. 15073. Springer Nature Switzerland, 172-190."},{"key":"e_1_3_2_1_32_1","volume-title":"MMM: Generative Masked Motion Model. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Pinyoanuntapong Ekkasit","year":"2024","unstructured":"Ekkasit Pinyoanuntapong, Pu Wang, Minwoo Lee, and Chen Chen. 2024. MMM: Generative Masked Motion Model. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1089\/big.2016.0028"},{"key":"e_1_3_2_1_34_1","volume-title":"Learning Transferable Visual Models From Natural Language Supervision. In International Conference, on Machine Learning, . PMLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021-07-01. Learning Transferable Visual Models From Natural Language Supervision. In International Conference, on Machine Learning, . PMLR, 8748-8763."},{"key":"e_1_3_2_1_35_1","volume-title":"Hierarchical Text-Conditional Image Generation with CLIP Latents. arXiv.org","author":"Ramesh A.","year":"2022","unstructured":"A. Ramesh, Prafulla Dhariwal, Alex Nichol, Casey Chu, and Mark Chen. 2022. Hierarchical Text-Conditional Image Generation with CLIP Latents. arXiv.org, Vol. abs\/2204.06125 (2022)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_37_1","volume-title":"Computer Vision, - ECCV","author":"Sampieri Alessio","year":"2024","unstructured":"Alessio Sampieri, Alessio Palma, Indro Spinelli, and Fabio Galasso. 2025. Length-Aware Motion Synthesis, via Latent Diffusion, . In Computer Vision, - ECCV, 2024, Ale\u0161 Leonardis, Elisa Ricci, Stefan Roth, Olga Russakovsky, Torsten Sattler, and G\u00fcl Varol (Eds.). Vol. 15111. Springer Nature Switzerland, 107-124."},{"key":"e_1_3_2_1_38_1","volume-title":"Denoising Diffusion Implicit Models. In International Conference on Learning Representations (ICLR).","author":"Song Jiaming","year":"2021","unstructured":"Jiaming Song, Chenlin Meng, and Stefano Ermon. 2021. Denoising Diffusion Implicit Models. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3606921"},{"volume-title":"European Conference on Computer Vision. 358-374","author":"Tevet Guy","key":"e_1_3_2_1_40_1","unstructured":"Guy Tevet, Brian Gordon, Amir Hertz, Amit H. Bermano, and D. Cohen-Or. 2022. MotionCLIP: Exposing Human Motion Generation to CLIP Space. In European Conference on Computer Vision. 358-374."},{"key":"e_1_3_2_1_41_1","volume-title":"Human Motion Diffusion Model. In The Eleventh International Conference, on Learning Representations, .","author":"Tevet Guy","year":"2022","unstructured":"Guy Tevet, Sigal Raab, Brian Gordon, Yoni Shafir, Daniel Cohen-or, and Amit Haim Bermano. 2022-09-29. Human Motion Diffusion Model. In The Eleventh International Conference, on Learning Representations, ."},{"key":"e_1_3_2_1_42_1","volume-title":"Computer Vision, - ECCV","author":"Wan Weilin","year":"2024","unstructured":"Weilin Wan, Zhiyang Dou, Taku Komura, Wenping Wang, Dinesh Jayaraman, and Lingjie Liu. 2025. TLControl: Trajectory, and Language Control, for Human Motion Synthesis, . In Computer Vision, - ECCV, 2024, Ale\u0161 Leonardis, Elisa Ricci, Stefan Roth, Olga Russakovsky, Torsten Sattler, and G\u00fcl Varol (Eds.). Vol. 15095. Springer Nature Switzerland, 37-54."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611887"},{"key":"e_1_3_2_1_44_1","volume-title":"Conference on Neural Information Processing Systems (NeurIPS).","author":"Yuan Weihao","year":"2024","unstructured":"Weihao Yuan, Yisheng He, Weichao Shen, Yuan Dong, Xiaodong Gu, Zilong Dong, Liefeng Bo, and Qixing Huang. 2024. MoGenTS: Motion Generation based on Spatial-Temporal Joint Modeling. In Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01415"},{"key":"e_1_3_2_1_46_1","volume-title":"MotionDiffuse: Text-Driven Human Motion Generation With Diffusion Model","author":"Zhang Mingyuan","year":"2024","unstructured":"Mingyuan Zhang, Zhongang Cai, Liang Pan, Fangzhou Hong, Xinying Guo, Lei Yang, and Ziwei Liu. 2024. MotionDiffuse: Text-Driven Human Motion Generation With Diffusion Model, . IEEE Transactions on Pattern Analysis and Machine Intelligence (2024), 1-15."},{"key":"e_1_3_2_1_47_1","first-page":"364","article-title":"-10-01. ReMoDiffuse: Retrieval-Augmented Motion Diffusion Model. In 2023 IEEE, \/CVF International Conference, on Computer Vision (ICCV, ) (Paris, France)","author":"Zhang Mingyuan","year":"2023","unstructured":"Mingyuan Zhang, Xinying Guo, Liang Pan, Zhongang Cai, Fangzhou Hong, Huirong Li, Lei Yang, and Ziwei Liu. 2023-10-01. ReMoDiffuse: Retrieval-Augmented Motion Diffusion Model. In 2023 IEEE, \/CVF International Conference, on Computer Vision (ICCV, ) (Paris, France). IEEE, 364-373.","journal-title":"IEEE"},{"key":"e_1_3_2_1_48_1","first-page":"7368","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","volume":"38","author":"Zhang Yaqi","year":"2024","unstructured":"Yaqi Zhang, Di Huang, Bin Liu, Shixiang Tang, Yan Lu, Lu Chen, Lei Bai, Qi Chu, Nenghai Yu, and Wanli Ouyang. 2024-03-24. MotionGPT: Finetuned LLMs Are General-Purpose Motion Generators, . Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 38, 7 (2024-03-24), 7368-7376."},{"key":"e_1_3_2_1_49_1","volume-title":"Motion-X: A Large-Scale Multimodal 3D Whole-body Human Motion Dataset. arXiv preprint arXiv:2501.05098","author":"Zhang Yuhong","year":"2025","unstructured":"Yuhong Zhang, Jing Lin, Ailing Zeng, Guanlin Wu, Shunlin Lu, Yurong Fu, Yuanhao Cai, Ruimao Zhang, Haoqian Wang, and Lei Zhang. 2025. Motion-X: A Large-Scale Multimodal 3D Whole-body Human Motion Dataset. arXiv preprint arXiv:2501.05098 (2025)."},{"key":"e_1_3_2_1_50_1","first-page":"509","volume-title":"2023 IEEE\/CVF International Conference, on Computer Vision (ICCV, )","author":"Zhong Chongyang","year":"2023","unstructured":"Chongyang Zhong, Lei Hu, Zihao Zhang, and Shihong Xia. 2023-10-01. AttT2M: Text-Driven Human Motion Generation, with Multi-Perspective Attention Mechanism. In 2023 IEEE\/CVF International Conference, on Computer Vision (ICCV, ) (Paris, France). IEEE, 509-519."},{"key":"e_1_3_2_1_51_1","volume-title":"EMDM: Efficient Motion Diffusion Model, for Fast, and High-Quality Motion Generation, . In Computer Vision - ECCV","author":"Zhou Wenyang","year":"2025","unstructured":"Wenyang Zhou, Zhiyang Dou, Zeyu Cao, Zhouyingcheng Liao, Jingbo Wang, Wenjia Wang, Yuan Liu, Taku Komura, Wenping Wang, and Lingjie Liu. 2025. EMDM: Efficient Motion Diffusion Model, for Fast, and High-Quality Motion Generation, . In Computer Vision - ECCV, 2024, Ale\u0161 Leonardis, Elisa Ricci, Stefan Roth, Olga Russakovsky, Torsten Sattler, and G\u00fcl Varol (Eds.). Vol. 15060. Springer Nature Switzerland, 18-38."},{"key":"e_1_3_2_1_52_1","first-page":"1357","article-title":"AvatarGPT: All-in-One Framework for Motion Understanding","author":"Zhou Zixiang","year":"2024","unstructured":"Zixiang Zhou, Yu Wan, and Baoyuan Wang. 2024. AvatarGPT: All-in-One Framework for Motion Understanding, Planning, Generation and Beyond. In Computer Vision and Pattern Recognition (CVPR). 1357-1366.","journal-title":"Planning, Generation and Beyond. In Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_1_53_1","volume-title":"ParCo: Part-Coordinating Text-to-Motion Synthesis. In European Conference on Computer Vision","volume":"2403","author":"Zou Qiran","year":"2024","unstructured":"Qiran Zou, Shangyuan Yuan, Shian Du, Yu Wang, Chang Liu, Yi Xu, Jie Chen, and Xiangyang Ji. 2024. ParCo: Part-Coordinating Text-to-Motion Synthesis. In European Conference on Computer Vision, Vol. abs\/2403.18512."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3754748","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:06:04Z","timestamp":1765339564000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3754748"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":53,"alternative-id":["10.1145\/3746027.3754748","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3754748","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}