{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T18:22:15Z","timestamp":1776968535235,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1145\/3731715.3733385","type":"proceedings-article","created":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T18:31:04Z","timestamp":1750876264000},"page":"671-679","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["MeloDance: Dance Generation Guided by Music Structure and Emotion"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-2561-5909","authenticated-orcid":false,"given":"Yixuan","family":"Li","sequence":"first","affiliation":[{"name":"Netease Cloud Music, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5144-5438","authenticated-orcid":false,"given":"Qiang","family":"Jin","sequence":"additional","affiliation":[{"name":"Netease Cloud Music, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8042-4799","authenticated-orcid":false,"given":"Huaping","family":"Liu","sequence":"additional","affiliation":[{"name":"Netease Cloud Music, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-2366-8113","authenticated-orcid":false,"given":"Jinhai","family":"Chen","sequence":"additional","affiliation":[{"name":"Netease Cloud Music, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0785-6147","authenticated-orcid":false,"given":"Xiangyu","family":"Zhao","sequence":"additional","affiliation":[{"name":"Netease Cloud Music, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-7266-2743","authenticated-orcid":false,"given":"Peng","family":"Li","sequence":"additional","affiliation":[{"name":"Netease Cloud Music, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,6,30]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460608"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3592458"},{"key":"e_1_3_2_1_3_1","volume-title":"An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv","author":"Alexey Dosovitskiy","year":"2010","unstructured":"Dosovitskiy Alexey. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv: 2010.11929 (2020)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547797"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2018.00191"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3450626.3459932","article-title":"Choreomaster: choreography-oriented music-driven dance synthesis","volume":"40","author":"Chen Kang","year":"2021","unstructured":"Kang Chen, Zhipeng Tan, Jin Lei, Song-Hai Zhang, Yuan-Chen Guo, Weidong Zhang, and Shi-Min Hu. 2021. Choreomaster: choreography-oriented music-driven dance synthesis. ACM Transactions on Graphics (TOG), Vol. 40, 4 (2021), 1--13.","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00941"},{"key":"e_1_3_2_1_8_1","volume-title":"Alec Radford, and Ilya Sutskever.","author":"Dhariwal Prafulla","year":"2020","unstructured":"Prafulla Dhariwal, Heewoo Jun, Christine Payne, Jong Wook Kim, Alec Radford, and Ilya Sutskever. 2020. Jukebox: A generative model for music. arXiv preprint arXiv:2005.00341 (2020)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095889"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.compeleceng.2022.108310"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413635"},{"key":"e_1_3_2_1_12_1","volume-title":"Dance revolution: Long-term dance generation with music via curriculum learning. arXiv preprint arXiv:2006.06119","author":"Huang Ruozi","year":"2020","unstructured":"Ruozi Huang, Huang Hu, Wei Wu, Kei Sawada, Mi Zhang, and Daxin Jiang. 2020. Dance revolution: Long-term dance generation with music via curriculum learning. arXiv preprint arXiv:2006.06119 (2020)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00348"},{"key":"e_1_3_2_1_14_1","volume-title":"All-In-One Metrical And Functional Structure Analysis With Neighborhood Attentions on Demixed Audio. In IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA).","author":"Kim Taejun","year":"2023","unstructured":"Taejun Kim and Juhan Nam. 2023. All-In-One Metrical And Functional Structure Analysis With Neighborhood Attentions on Demixed Audio. In IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)."},{"key":"e_1_3_2_1_15_1","volume-title":"AdsorbDiff: Adsorbate Placement via Conditional Denoising Diffusion. arXiv preprint arXiv:2405.03962","author":"Kolluru Adeesh","year":"2024","unstructured":"Adeesh Kolluru and John R Kitchin. 2024. AdsorbDiff: Adsorbate Placement via Conditional Denoising Diffusion. arXiv preprint arXiv:2405.03962 (2024)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3169782"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018553"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01315"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00151"},{"key":"e_1_3_2_1_20_1","volume-title":"Magic: multi art genre intelligent choreography dataset and network for 3d dance generation. arXiv preprint arXiv:2212.03741","author":"Li Ronghui","year":"2022","unstructured":"Ronghui Li, J Zhao, Y Zhang, M Su, Z Ren, H Zhang, and X Li. 2022. Magic: multi art genre intelligent choreography dataset and network for 3d dance generation. arXiv preprint arXiv:2212.03741 (2022)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00939"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3596711.3596800"},{"key":"e_1_3_2_1_23_1","volume-title":"Humantomato: Text-aligned whole-body motion generation. arXiv preprint arXiv:2310.12978","author":"Lu Shunlin","year":"2023","unstructured":"Shunlin Lu, Ling-Hao Chen, Ailing Zeng, Jing Lin, Ruimao Zhang, Lei Zhang, and Heung-Yeung Shum. 2023. Humantomato: Text-aligned whole-body motion generation. arXiv preprint arXiv:2310.12978 (2023)."},{"key":"e_1_3_2_1_24_1","volume-title":"A simple early exiting framework for accelerated sampling in diffusion models. arXiv preprint arXiv:2408.05927","author":"Moon Taehong","year":"2024","unstructured":"Taehong Moon, Moonseok Choi, EungGu Yun, Jongmin Yoon, Gayoung Lee, Jaewoong Cho, and Juho Lee. 2024. A simple early exiting framework for accelerated sampling in diffusion models. arXiv preprint arXiv:2408.05927 (2024)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11671"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01080"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20047-2_28"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612307"},{"key":"e_1_3_2_1_29_1","volume-title":"A Circumplex Model of Affect Journal of Personality and Social Psychology 39. \u00cd6I-I78","author":"Russell JA","year":"1980","unstructured":"JA Russell. 1980. A Circumplex Model of Affect Journal of Personality and Social Psychology 39. \u00cd6I-I78 (1980)."},{"key":"e_1_3_2_1_30_1","volume-title":"Burcu Karagol Ayan, Tim Salimans, et al.","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily L Denton, Kamyar Ghasemipour, Raphael Gontijo Lopes, Burcu Karagol Ayan, Tim Salimans, et al. 2022. Photorealistic text-to-image diffusion models with deep language understanding. Advances in neural information processing systems, Vol. 35 (2022), 36479--36494."},{"key":"e_1_3_2_1_31_1","volume-title":"Human motion diffusion as a generative prior. arXiv preprint arXiv:2303.01418","author":"Shafir Yonatan","year":"2023","unstructured":"Yonatan Shafir, Guy Tevet, Roy Kapon, and Amit H Bermano. 2023. Human motion diffusion as a generative prior. arXiv preprint arXiv:2303.01418 (2023)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01077"},{"key":"e_1_3_2_1_33_1","volume-title":"Bermano","author":"Tevet Guy","year":"2022","unstructured":"Guy Tevet, Sigal Raab, Brian Gordon, Yonatan Shafir, Daniel Cohen-Or, and Amit H. Bermano. 2022. Human Motion Diffusion Model. arxiv: 2209.14916 [cs.CV] https:\/\/arxiv.org\/abs\/2209.14916"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.62617\/mcb.v21i2.401"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00051"},{"key":"e_1_3_2_1_36_1","volume-title":"Attention is all you need. Advances in Neural Information Processing Systems","author":"Vaswani A","year":"2017","unstructured":"A Vaswani. 2017. Attention is all you need. Advances in Neural Information Processing Systems (2017)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3652583.3657998"},{"key":"e_1_3_2_1_38_1","volume-title":"Longdancediff: Long-term dance generation with conditional diffusion model. arXiv preprint arXiv:2308.11945","author":"Yang Siqi","year":"2023","unstructured":"Siqi Yang, Zejun Yang, and Zhisheng Wang. 2023. Longdancediff: Long-term dance generation with conditional diffusion model. arXiv preprint arXiv:2308.11945 (2023)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00503"},{"key":"e_1_3_2_1_40_1","volume-title":"Motiondiffuse: Text-driven human motion generation with diffusion model","author":"Zhang Mingyuan","year":"2024","unstructured":"Mingyuan Zhang, Zhongang Cai, Liang Pan, Fangzhou Hong, Xinying Guo, Lei Yang, and Ziwei Liu. 2024. Motiondiffuse: Text-driven human motion generation with diffusion model. IEEE Transactions on Pattern Analysis and Machine Intelligence (2024)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3512527.3531430"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00545"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485664"}],"event":{"name":"ICMR '25: International Conference on Multimedia Retrieval","location":"Chicago IL USA","acronym":"ICMR '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2025 International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3731715.3733385","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T04:08:23Z","timestamp":1755749303000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3731715.3733385"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":43,"alternative-id":["10.1145\/3731715.3733385","10.1145\/3731715"],"URL":"https:\/\/doi.org\/10.1145\/3731715.3733385","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]},"assertion":[{"value":"2025-06-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}