{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:47:39Z","timestamp":1777657659430,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Natural Science Foundation of Guangdong Province China (General Program)","award":["No.2022A1515011524"],"award-info":[{"award-number":["No.2022A1515011524"]}]},{"name":"Shenzhen Science and Technology Program","award":["JCYJ20220818103001002 and ZDSYS20211021111415025"],"award-info":[{"award-number":["JCYJ20220818103001002 and ZDSYS20211021111415025"]}]},{"name":"Young Scientists Fund of the National Natural Science Foundation of China","award":["No. 62106154"],"award-info":[{"award-number":["No. 62106154"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3612046","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:12Z","timestamp":1698391632000},"page":"8504-8514","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":20,"title":["Dance with You: The Diversity Controllable Dancer Generation via Diffusion Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-9963-9485","authenticated-orcid":false,"given":"Siyue","family":"Yao","sequence":"first","affiliation":[{"name":"The Chinese University of Hong Kong, Shenzhen, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3697-7927","authenticated-orcid":false,"given":"Mingjie","family":"Sun","sequence":"additional","affiliation":[{"name":"Soochow University, Suzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-0774-9308","authenticated-orcid":false,"given":"Bingliang","family":"Li","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong, Shenzhen, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-5410-4707","authenticated-orcid":false,"given":"Fengyu","family":"Yang","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong, Shenzhen, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9096-2670","authenticated-orcid":false,"given":"Junle","family":"Wang","sequence":"additional","affiliation":[{"name":"Tencent, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9511-7532","authenticated-orcid":false,"given":"Ruimao","family":"Zhang","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong, Shenzhen, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Esteve Valls Mascaro, and Dongheui Lee","author":"Ahn Hyemin","year":"2023","unstructured":"Hyemin Ahn, Esteve Valls Mascaro, and Dongheui Lee. 2023. Can We Use Diffusion Probabilistic Models for 3D Motion Prediction? arXiv preprint arXiv:2302.14503 2023)."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/566654.566606"},{"key":"e_1_3_2_2_3_1","volume-title":"BeLFusion: Latent Diffusion for Behavior-Driven Human Motion Prediction. arXiv preprint arXiv:2211.14304","author":"Barquero German","year":"2022","unstructured":"German Barquero, Sergio Escalera, and Cristina Palmero. 2022. BeLFusion: Latent Diffusion for Behavior-Driven Human Motion Prediction. arXiv preprint arXiv:2211.14304 (2022)."},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/2071423.2071477"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3450626.3459932","article-title":"Choreomaster: choreography-oriented music-driven dance synthesis","volume":"40","author":"Chen Kang","year":"2021","unstructured":"Kang Chen, Zhipeng Tan, Jin Lei, Song-Hai Zhang, Yuan-Chen Guo, Weidong Zhang, and Shi-Min Hu. 2021. Choreomaster: choreography-oriented music-driven dance synthesis. ACM Transactions on Graphics, Vol. 40, 4 (2021), 1--13.","journal-title":"ACM Transactions on Graphics"},{"key":"e_1_3_2_2_6_1","unstructured":"Kai Chen Jiaqi Wang Jiangmiao Pang Yuhang Cao Yu Xiong Xiaoxiao Li Shuyang Sun Wansen Feng Ziwei Liu Jiarui Xu et al. 2019. MMDetection: Open mmlab detection toolbox and benchmark. arXiv preprint arXiv:1906.07155 (2019)."},{"key":"e_1_3_2_2_7_1","volume-title":"HumanMAC: Masked Motion Completion for Human Motion Prediction. arXiv preprint arXiv:2302.03665","author":"Chen Ling-Hao","year":"2023","unstructured":"Ling-Hao Chen, Jiawei Zhang, Yewen Li, Yiren Pang, Xiaobo Xia, and Tongliang Liu. 2023. HumanMAC: Masked Motion Completion for Human Motion Prediction. arXiv preprint arXiv:2302.03665 (2023)."},{"key":"e_1_3_2_2_8_1","volume-title":"Jingwan Lu, David I Inouye, and Ajinkya Kale.","author":"Cho Wonwoong","year":"2023","unstructured":"Wonwoong Cho, Hareesh Ravi, Midhun Harikumar, Vinh Khuc, Krishna Kumar Singh, Jingwan Lu, David I Inouye, and Ajinkya Kale. 2023. Towards Enhanced Controllability of Diffusion Models. arXiv preprint arXiv:2302.14368 (2023)."},{"key":"e_1_3_2_2_9_1","unstructured":"MMPose Contributors. 2020. OpenMMLab Pose Estimation Toolbox and Benchmark. https:\/\/github.com\/open-mmlab\/mmpose."},{"key":"e_1_3_2_2_10_1","volume-title":"Generative choreography using deep learning. arXiv preprint arXiv:1605.06921","author":"Crnkovic-Friis Luka","year":"2016","unstructured":"Luka Crnkovic-Friis and Louise Crnkovic-Friis. 2016. Generative choreography using deep learning. arXiv preprint arXiv:1605.06921 (2016)."},{"key":"e_1_3_2_2_11_1","first-page":"8780","article-title":"Diffusion models beat gans on image synthesis","volume":"34","author":"Dhariwal Prafulla","year":"2021","unstructured":"Prafulla Dhariwal and Alexander Nichol. 2021. Diffusion models beat gans on image synthesis. In Advances in Neural Information Processing Systems, Vol. 34. 8780--8794.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"crossref","first-page":"501","DOI":"10.1109\/TVCG.2011.73","article-title":"Example-based automatic music-driven conventional dance motion synthesis","volume":"18","author":"Fan Rukun","year":"2011","unstructured":"Rukun Fan, Songhua Xu, and Weidong Geng. 2011. Example-based automatic music-driven conventional dance motion synthesis. IEEE Transactions on Visualization and Computer Graphics, Vol. 18, 3 (2011), 501--515.","journal-title":"IEEE Transactions on Visualization and Computer Graphics"},{"key":"e_1_3_2_2_13_1","volume-title":"Advances in Neural Information Processing Systems","volume":"30","author":"Heusel Martin","year":"2017","unstructured":"Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, and Sepp Hochreiter. 2017. Gans trained by a two time-scale update rule converge to a local nash equilibrium. In Advances in Neural Information Processing Systems, Vol. 30."},{"key":"e_1_3_2_2_14_1","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. In Advances in Neural Information Processing Systems, Vol. 33. 6840--6851.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_15_1","volume-title":"Classifier-Free Diffusion Guidance. In NeurIPS 2021 Workshop on Deep Generative Models and Downstream Applications.","author":"Ho Jonathan","year":"2021","unstructured":"Jonathan Ho and Tim Salimans. 2021. Classifier-Free Diffusion Guidance. In NeurIPS 2021 Workshop on Deep Generative Models and Downstream Applications."},{"key":"e_1_3_2_2_16_1","volume-title":"International Conference on Learning Representations.","author":"Huang Ruozi","year":"2020","unstructured":"Ruozi Huang, Huang Hu, Wei Wu, Kei Sawada, Mi Zhang, and Daxin Jiang. 2020. Dance Revolution: Long-term dance generation with music via curriculum learning. In International Conference on Learning Representations."},{"key":"e_1_3_2_2_17_1","volume-title":"FLAME: Free-form language-based motion synthesis & editing. arXiv preprint arXiv:2209.00349","author":"Kim Jihoon","year":"2022","unstructured":"Jihoon Kim, Jiseob Kim, and Sungjoon Choi. 2022a. FLAME: Free-form language-based motion synthesis & editing. arXiv preprint arXiv:2209.00349 (2022)."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00348"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/882262.882283"},{"key":"e_1_3_2_2_20_1","volume-title":"Proceedings of the International Conference on Learning Representations.","author":"Kingma Diederik P","year":"2015","unstructured":"Diederik P Kingma and Jimmy Ba. 2015. Adam: A Method for Stochastic Optimization. In Proceedings of the International Conference on Learning Representations."},{"key":"e_1_3_2_2_21_1","volume-title":"Music-Driven Group Choreography. arXiv preprint arXiv:2303.12337","author":"Le Nhat","year":"2023","unstructured":"Nhat Le, Thang Pham, Tuong Do, Erman Tjiputra, Quang D Tran, and Anh Nguyen. 2023. Music-Driven Group Choreography. arXiv preprint arXiv:2303.12337 (2023)."},{"key":"e_1_3_2_2_22_1","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Lee Hsin-Ying","year":"2019","unstructured":"Hsin-Ying Lee, Xiaodong Yang, Ming-Yu Liu, Ting-Chun Wang, Yu-Ding Lu, Ming-Hsuan Yang, and Jan Kautz. 2019. Dancing to music. In Advances in Neural Information Processing Systems, Vol. 32."},{"key":"e_1_3_2_2_23_1","volume-title":"Music similarity-based approach to generating dance motion sequence. Multimedia tools and applications","author":"Lee Minho","year":"2013","unstructured":"Minho Lee, Kyogu Lee, and Jaeheung Park. 2013. Music similarity-based approach to generating dance motion sequence. Multimedia tools and applications, Vol. 62 (2013), 895--912."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i2.20014"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01315"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19790-1_26"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/2816795.2818013"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.25080\/Majora-7b98e3ed-003"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11671"},{"key":"e_1_3_2_2_30_1","volume-title":"Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125","author":"Ramesh Aditya","year":"2022","unstructured":"Aditya Ramesh, Prafulla Dhariwal, Alex Nichol, Casey Chu, and Mark Chen. 2022. Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 (2022)."},{"key":"e_1_3_2_2_31_1","volume-title":"U-net: Convolutional networks for biomedical image segmentation. In Medical Image Computing and Computer-Assisted Intervention","author":"Ronneberger Olaf","year":"2015","unstructured":"Olaf Ronneberger, Philipp Fischer, and Thomas Brox. 2015. U-net: Convolutional networks for biomedical image segmentation. In Medical Image Computing and Computer-Assisted Intervention. Springer, 234--241."},{"key":"e_1_3_2_2_32_1","volume-title":"An overview of gradient descent optimization algorithms. arXiv preprint arXiv:1609.04747","author":"Ruder Sebastian","year":"2016","unstructured":"Sebastian Ruder. 2016. An overview of gradient descent optimization algorithms. arXiv preprint arXiv:1609.04747 (2016)."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.48"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.471"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01077"},{"key":"e_1_3_2_2_36_1","volume-title":"International Conference on Machine Learning. PMLR, 2256--2265","author":"Sohl-Dickstein Jascha","year":"2015","unstructured":"Jascha Sohl-Dickstein, Eric Weiss, Niru Maheswaranathan, and Surya Ganguli. 2015. Deep unsupervised learning using nonequilibrium thermodynamics. In International Conference on Machine Learning. PMLR, 2256--2265."},{"key":"e_1_3_2_2_37_1","volume-title":"Denoising Diffusion Implicit Models. In International Conference on Learning Representations.","author":"Song Jiaming","year":"2020","unstructured":"Jiaming Song, Chenlin Meng, and Stefano Ermon. 2020. Denoising Diffusion Implicit Models. In International Conference on Learning Representations."},{"key":"e_1_3_2_2_38_1","first-page":"12438","article-title":"Improved techniques for training score-based generative models","volume":"33","author":"Song Yang","year":"2020","unstructured":"Yang Song and Stefano Ermon. 2020. Improved techniques for training score-based generative models. In Advances in Neural Information Processing Systems, Vol. 33. 12438--12448.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240526"},{"key":"e_1_3_2_2_40_1","volume-title":"Human motion diffusion model. arXiv preprint arXiv:2209.14916","author":"Tevet Guy","year":"2022","unstructured":"Guy Tevet, Sigal Raab, Brian Gordon, Yonatan Shafir, Daniel Cohen-Or, and Amit H Bermano. 2022. Human motion diffusion model. arXiv preprint arXiv:2209.14916 (2022)."},{"key":"e_1_3_2_2_41_1","first-page":"6","article-title":"AIST Dance Video Database: Multi-Genre, Multi-Dancer, and Multi-Camera Database for Dance Information Processing","volume":"1","author":"Tsuchida Shuhei","year":"2019","unstructured":"Shuhei Tsuchida, Satoru Fukayama, Masahiro Hamasaki, and Masataka Goto. 2019. AIST Dance Video Database: Multi-Genre, Multi-Dancer, and Multi-Camera Database for Dance Information Processing.. In ISMIR, Vol. 1. 6.","journal-title":"ISMIR"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548090"},{"key":"e_1_3_2_2_43_1","volume-title":"Videogpt: Video generation using vq-vae and transformers. arXiv preprint arXiv:2104.10157","author":"Yan Wilson","year":"2021","unstructured":"Wilson Yan, Yunzhi Zhang, Pieter Abbeel, and Aravind Srinivas. 2021. Videogpt: Video generation using vq-vae and transformers. arXiv preprint arXiv:2104.10157 (2021)."},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3414005"},{"key":"e_1_3_2_2_45_1","volume-title":"SmoothNet: A Plug-and-Play Network for Refining Human Poses in Videos. In European Conference on Computer Vision. Springer.","author":"Zeng Ailing","year":"2022","unstructured":"Ailing Zeng, Lei Yang, Xuan Ju, Jiefeng Li, Jianyi Wang, and Qiang Xu. 2022. SmoothNet: A Plug-and-Play Network for Refining Human Poses in Videos. In European Conference on Computer Vision. Springer."},{"key":"e_1_3_2_2_46_1","volume-title":"Motiondiffuse: Text-driven human motion generation with diffusion model. arXiv preprint arXiv:2208.15001","author":"Zhang Mingyuan","year":"2022","unstructured":"Mingyuan Zhang, Zhongang Cai, Liang Pan, Fangzhou Hong, Xinying Guo, Lei Yang, and Ziwei Liu. 2022. Motiondiffuse: Text-driven human motion generation with diffusion model. arXiv preprint arXiv:2208.15001 (2022)."},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485664"}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612046","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3612046","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:08:25Z","timestamp":1755821305000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612046"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":47,"alternative-id":["10.1145\/3581783.3612046","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3612046","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}