{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:05:32Z","timestamp":1742911532332,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":52,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819609161"},{"type":"electronic","value":"9789819609178"}],"license":[{"start":{"date-parts":[[2024,12,8]],"date-time":"2024-12-08T00:00:00Z","timestamp":1733616000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,8]],"date-time":"2024-12-08T00:00:00Z","timestamp":1733616000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-0917-8_14","type":"book-chapter","created":{"date-parts":[[2024,12,7]],"date-time":"2024-12-07T07:57:36Z","timestamp":1733558256000},"page":"239-255","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Rethinking Sampling for\u00a0Music-Driven Long-Term Dance Generation"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-2541-576X","authenticated-orcid":false,"given":"Tuong-Vy","family":"Truong-Thuy","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0006-9936-3814","authenticated-orcid":false,"given":"Gia-Cat","family":"Bui-Le","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0888-8908","authenticated-orcid":false,"given":"Hai-Dang","family":"Nguyen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7363-2610","authenticated-orcid":false,"given":"Trung-Nghia","family":"Le","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,8]]},"reference":[{"issue":"3","key":"14_CR1","doi-asserted-by":"publisher","first-page":"483","DOI":"10.1145\/566654.566606","volume":"21","author":"O Arikan","year":"2002","unstructured":"Arikan, O., Forsyth, D.A.: Interactive motion generation from examples. ACM Transactions on Graphics (TOG) 21(3), 483\u2013490 (2002)","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"14_CR2","doi-asserted-by":"crossref","unstructured":"Chen, X., Jiang, B., Liu, W., Huang, Z., Fu, B., Chen, T., Yu, G.: Executing your commands via motion diffusion in latent space. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 18000\u201318010 (2023)","DOI":"10.1109\/CVPR52729.2023.01726"},{"key":"14_CR3","doi-asserted-by":"crossref","unstructured":"Dabral, R., Mughal, M.H., Golyanik, V., Theobalt, C.: Mofusion: A framework for denoising-diffusion-based motion synthesis. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp. 9760\u20139770 (2023)","DOI":"10.1109\/CVPR52729.2023.00941"},{"key":"14_CR4","unstructured":"Dhariwal, P., Jun, H., Payne, C., Kim, J.W., Radford, A., Sutskever, I.: Jukebox: A generative model for music. arXiv preprint arXiv:2005.00341 (2020)"},{"issue":"3","key":"14_CR5","doi-asserted-by":"crossref","first-page":"501","DOI":"10.1109\/TVCG.2011.73","volume":"18","author":"R Fan","year":"2011","unstructured":"Fan, R., Xu, S., Geng, W.: Example-based automatic music-driven conventional dance motion synthesis. IEEE Trans. Visual Comput. Graphics 18(3), 501\u2013515 (2011)","journal-title":"IEEE Trans. Visual Comput. Graphics"},{"key":"14_CR6","unstructured":"Gopinath, D., Won, J.: Fairmotion-tools to load, process and visualize motion capture data (2020)"},{"key":"14_CR7","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in neural information processing systems 30 (2017)"},{"key":"14_CR8","unstructured":"Ho, J., Chan, W., Saharia, C., Whang, J., Gao, R., Gritsenko, A., Kingma, D.P., Poole, B., Norouzi, M., Fleet, D.J., et\u00a0al.: Imagen video: High definition video generation with diffusion models. arXiv preprint arXiv:2210.02303 (2022)"},{"key":"14_CR9","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"14_CR10","unstructured":"Ho, J., Salimans, T.: Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598 (2022)"},{"issue":"4","key":"14_CR11","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2897824.2925975","volume":"35","author":"D Holden","year":"2016","unstructured":"Holden, D., Saito, J., Komura, T.: A deep learning framework for character motion synthesis and editing. ACM Transactions on Graphics (TOG) 35(4), 1\u201311 (2016)","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"14_CR12","unstructured":"Huang, R., Hu, H., Wu, W., Sawada, K., Zhang, M., Jiang, D.: Dance revolution: Long-term dance generation with music via curriculum learning. arXiv preprint arXiv:2006.06119 (2020)"},{"key":"14_CR13","doi-asserted-by":"crossref","unstructured":"Huang, S., Wang, Z., Li, P., Jia, B., Liu, T., Zhu, Y., Liang, W., Zhu, S.C.: Diffusion-based generation, optimization, and planning in 3d scenes. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 16750\u201316761 (2023)","DOI":"10.1109\/CVPR52729.2023.01607"},{"key":"14_CR14","doi-asserted-by":"crossref","unstructured":"Kaufmann, M., Aksan, E., Song, J., Pece, F., Ziegler, R., Hilliges, O.: Convolutional autoencoders for human motion infilling. In: 2020 International Conference on 3D Vision (3DV). pp. 918\u2013927. IEEE (2020)","DOI":"10.1109\/3DV50981.2020.00102"},{"key":"14_CR15","doi-asserted-by":"crossref","unstructured":"Kim, J., Kim, J., Choi, S.: Flame: Free-form language-based motion synthesis & editing. In: Proceedings of the AAAI Conference on Artificial Intelligence. vol.\u00a037, pp. 8255\u20138263 (2023)","DOI":"10.1609\/aaai.v37i7.25996"},{"key":"14_CR16","unstructured":"Kingma, D.P., Ba, J.: Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"14_CR17","unstructured":"Kong, Z., Ping, W., Huang, J., Zhao, K., Catanzaro, B.: Diffwave: A versatile diffusion model for audio synthesis. arXiv preprint arXiv:2009.09761 (2020)"},{"key":"14_CR18","doi-asserted-by":"crossref","unstructured":"Kovar, L., Gleicher, M., Pighin, F.: Motion graphs. In: Seminal Graphics Papers: Pushing the Boundaries, Volume 2, pp. 723\u2013732 (2023)","DOI":"10.1145\/3596711.3596788"},{"key":"14_CR19","doi-asserted-by":"publisher","first-page":"44982","DOI":"10.1109\/ACCESS.2022.3169782","volume":"10","author":"K Kritsis","year":"2022","unstructured":"Kritsis, K., Gkiokas, A., Pikrakis, A., Katsouros, V.: Danceconv: Dance motion generation with convolutional networks. IEEE Access 10, 44982\u201345000 (2022)","journal-title":"IEEE Access"},{"key":"14_CR20","unstructured":"Lee, H.Y., Yang, X., Liu, M.Y., Wang, T.C., Lu, Y.D., Yang, M.H., Kautz, J.: Dancing to music. Advances in neural information processing systems 32 (2019)"},{"key":"14_CR21","doi-asserted-by":"crossref","unstructured":"Lee, T., Moon, G., Lee, K.M.: Multiact: Long-term 3d human motion generation from multiple action labels. In: Proceedings of the AAAI Conference on Artificial Intelligence. vol.\u00a037, pp. 1231\u20131239 (2023)","DOI":"10.1609\/aaai.v37i1.25206"},{"key":"14_CR22","doi-asserted-by":"crossref","unstructured":"Li, B., Zhao, Y., Zhelun, S., Sheng, L.: Danceformer: Music conditioned 3d dance generation with parametric motion transformer. In: Proceedings of the AAAI Conference on Artificial Intelligence. vol.\u00a036, pp. 1272\u20131279 (2022)","DOI":"10.1609\/aaai.v36i2.20014"},{"key":"14_CR23","unstructured":"Li, J., Yin, Y., Chu, H., Zhou, Y., Wang, T., Fidler, S., Li, H.: Learning to generate diverse dance motions with transformer. arXiv preprint arXiv:2008.08171 (2020)"},{"key":"14_CR24","doi-asserted-by":"crossref","unstructured":"Li, R., Yang, S., Ross, D.A., Kanazawa, A.: Ai choreographer: Music conditioned 3d dance generation with aist++. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 13401\u201313412 (2021)","DOI":"10.1109\/ICCV48922.2021.01315"},{"key":"14_CR25","doi-asserted-by":"crossref","unstructured":"Loper, M., Mahmood, N., Romero, J., Pons-Moll, G., Black, M.J.: Smpl: A skinned multi-person linear model. In: Seminal Graphics Papers: Pushing the Boundaries, Volume 2, pp. 851\u2013866 (2023)","DOI":"10.1145\/3596711.3596800"},{"key":"14_CR26","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)"},{"key":"14_CR27","doi-asserted-by":"crossref","unstructured":"Lugmayr, A., Danelljan, M., Romero, A., Yu, F., Timofte, R., Van\u00a0Gool, L.: Repaint: Inpainting using denoising diffusion probabilistic models. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp. 11461\u201311471 (2022)","DOI":"10.1109\/CVPR52688.2022.01117"},{"key":"14_CR28","doi-asserted-by":"crossref","unstructured":"Muller, M., Kurth, F., Clausen, M.: Chroma-based statistical audio features for audio matching. In: IEEE Workshop on Applications of Signal Processing to Audio and Acoustics, 2005. pp. 275\u2013278. IEEE (2005)","DOI":"10.1109\/ASPAA.2005.1540223"},{"issue":"3","key":"14_CR29","doi-asserted-by":"publisher","first-page":"747","DOI":"10.1109\/TMM.2011.2181492","volume":"14","author":"F Ofli","year":"2011","unstructured":"Ofli, F., Erzin, E., Yemez, Y., Tekalp, A.M.: Learn2dance: Learning statistical music-to-dance mappings for choreography synthesis. IEEE Trans. Multimedia 14(3), 747\u2013759 (2011)","journal-title":"IEEE Trans. Multimedia"},{"key":"14_CR30","unstructured":"Onuma, K., Faloutsos, C., Hodgins, J.K.: Fmdistance: A fast and effective distance function for motion capture data. Eurographics (Short Papers) 7 (2008)"},{"key":"14_CR31","doi-asserted-by":"crossref","unstructured":"Park, T., Liu, M.Y., Wang, T.C., Zhu, J.Y.: Semantic image synthesis with spatially-adaptive normalization. In: Proceedings of conference on computer vision and pattern recognition. pp. 2337\u20132346 (2019)","DOI":"10.1109\/CVPR.2019.00244"},{"key":"14_CR32","doi-asserted-by":"crossref","unstructured":"Perez, E., Strub, F., De\u00a0Vries, H., Dumoulin, V., Courville, A.: Film: Visual reasoning with a general conditioning layer. In: Proceedings of the AAAI conference on artificial intelligence. vol.\u00a032 (2018)","DOI":"10.1609\/aaai.v32i1.11671"},{"key":"14_CR33","doi-asserted-by":"crossref","unstructured":"Petrovich, M., Black, M.J., Varol, G.: Action-conditioned 3d human motion synthesis with transformer vae. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 10985\u201310995 (2021)","DOI":"10.1109\/ICCV48922.2021.01080"},{"key":"14_CR34","unstructured":"Raab, S., Leibovitch, I., Tevet, G., Arar, M., Bermano, A.H., Cohen-Or, D.: Single motion diffusion. arXiv preprint arXiv:2302.05905 (2023)"},{"key":"14_CR35","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"14_CR36","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: Dreambooth: Fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 22500\u201322510 (2023)","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"14_CR37","doi-asserted-by":"crossref","unstructured":"Shiratori, T., Nakazawa, A., Ikeuchi, K.: Dancing-to-music character animation. In: Computer Graphics Forum. vol.\u00a025, pp. 449\u2013458. Wiley Online Library (2006)","DOI":"10.1111\/j.1467-8659.2006.00964.x"},{"key":"14_CR38","unstructured":"Singer, U., Polyak, A., Hayes, T., Yin, X., An, J., Zhang, S., Hu, Q., Yang, H., Ashual, O., Gafni, O., et\u00a0al.: Make-a-video: Text-to-video generation without text-video data. arXiv preprint arXiv:2209.14792 (2022)"},{"key":"14_CR39","doi-asserted-by":"crossref","unstructured":"Siyao, L., Yu, W., Gu, T., Lin, C., Wang, Q., Qian, C., Loy, C.C., Liu, Z.: Bailando: 3d dance generation by actor-critic gpt with choreographic memory. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 11050\u201311059 (2022)","DOI":"10.1109\/CVPR52688.2022.01077"},{"key":"14_CR40","unstructured":"Sohl-Dickstein, J., Weiss, E., Maheswaranathan, N., Ganguli, S.: Deep unsupervised learning using nonequilibrium thermodynamics. In: International conference on machine learning. pp. 2256\u20132265. PMLR (2015)"},{"key":"14_CR41","doi-asserted-by":"publisher","first-page":"497","DOI":"10.1109\/TMM.2020.2981989","volume":"23","author":"G Sun","year":"2020","unstructured":"Sun, G., Wong, Y., Cheng, Z., Kankanhalli, M.S., Geng, W., Li, X.: Deepdance: music-to-dance motion choreography with adversarial learning. IEEE Trans. Multimedia 23, 497\u2013509 (2020)","journal-title":"IEEE Trans. Multimedia"},{"key":"14_CR42","first-page":"9995","volume":"35","author":"J Sun","year":"2022","unstructured":"Sun, J., Wang, C., Hu, H., Lai, H., Jin, Z., Hu, J.F.: You never stop dancing: Non-freezing dance generation via bank-constrained manifold projection. Adv. Neural. Inf. Process. Syst. 35, 9995\u201310007 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"14_CR43","doi-asserted-by":"crossref","unstructured":"Tang, T., Jia, J., Mao, H.: Dance with melody: An lstm-autoencoder approach to music-oriented dance synthesis. In: Proceedings of the 26th ACM international conference on Multimedia. pp. 1598\u20131606 (2018)","DOI":"10.1145\/3240508.3240526"},{"key":"14_CR44","unstructured":"Tevet, G., Raab, S., Gordon, B., Shafir, Y., Cohen-Or, D., Bermano, A.H.: Human motion diffusion model. arXiv preprint arXiv:2209.14916 (2022)"},{"key":"14_CR45","doi-asserted-by":"crossref","unstructured":"Tseng, J., Castellon, R., Liu, K.: Edge: Editable dance generation from music. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 448\u2013458 (2023)","DOI":"10.1109\/CVPR52729.2023.00051"},{"key":"14_CR46","unstructured":"Van Den\u00a0Oord, A., Vinyals, O., et\u00a0al.: Neural discrete representation learning. Advances in neural information processing systems 30 (2017)"},{"key":"14_CR47","doi-asserted-by":"crossref","unstructured":"Wang, J., Yan, S., Dai, B., Lin, D.: Scene-aware generative network for human motion synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 12206\u201312215 (2021)","DOI":"10.1109\/CVPR46437.2021.01203"},{"key":"14_CR48","unstructured":"Yang, S., Yang, Z., Wang, Z.: Longdancediff: Long-term dance generation with conditional diffusion model. arXiv preprint arXiv:2308.11945 (2023)"},{"key":"14_CR49","doi-asserted-by":"crossref","unstructured":"Yang, Z., Su, B., Wen, J.R.: Synthesizing long-term human motions with diffusion models via coherent sampling. In: Proceedings of the 31st ACM International Conference on Multimedia. pp. 3954\u20133964 (2023)","DOI":"10.1145\/3581783.3611887"},{"key":"14_CR50","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Barnes, C., Lu, J., Yang, J., Li, H.: On the continuity of rotation representations in neural networks. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp. 5745\u20135753 (2019)","DOI":"10.1109\/CVPR.2019.00589"},{"key":"14_CR51","doi-asserted-by":"crossref","unstructured":"Zhuang, H., Lei, S., Xiao, L., Li, W., Chen, L., Yang, S., Wu, Z., Kang, S., Meng, H.: Gtn-bailando: Genre consistent long-term 3d dance generation based on pre-trained genre token network. In: ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). pp.\u00a01\u20135. IEEE (2023)","DOI":"10.1109\/ICASSP49357.2023.10095203"},{"key":"14_CR52","doi-asserted-by":"crossref","unstructured":"Zhuang, W., Wang, C., Chai, J., Wang, Y., Shao, M., Xia, S.: Music2dance: Dancenet for music-driven dance generation. ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM) 18(2), 1\u201321 (2022)","DOI":"10.1145\/3485664"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ACCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-0917-8_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,7]],"date-time":"2024-12-07T08:26:17Z","timestamp":1733559977000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-0917-8_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,8]]},"ISBN":["9789819609161","9789819609178"],"references-count":52,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-0917-8_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,12,8]]},"assertion":[{"value":"8 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ACCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asian Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hanoi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vietnam","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"accv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}